trying to fix training

This commit is contained in:
Dobromir Popov
2025-03-29 03:53:38 +02:00
parent 2255a8363a
commit ebbc0ed2d7
7 changed files with 533 additions and 304 deletions

View File

@ -224,12 +224,14 @@ class DataInterface:
for tf in timeframes:
if tf in dfs:
X, y, ts = self._create_features(dfs[tf], window_size)
features.append(X)
if len(targets) == 0: # Only need targets from one timeframe
targets = y
timestamps = ts
if X is not None and y is not None:
features.append(X)
if len(targets) == 0: # Only need targets from one timeframe
targets = y
timestamps = ts
if not features:
logger.error("Failed to create features for any timeframe")
return None, None, None
# Stack features from all timeframes along the time dimension
@ -250,6 +252,9 @@ class DataInterface:
X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
X = np.clip(X, -1e6, 1e6) # Clip extreme values
# Log data shapes for debugging
logger.info(f"Prepared input data - X shape: {X.shape}, y shape: {np.array(targets).shape}")
return X, targets, timestamps
def _create_features(self, df, window_size):
@ -304,7 +309,13 @@ class DataInterface:
for i in range(len(ohlcv_scaled) - window_size):
# Input: window_size candles of OHLCV data
X.append(ohlcv_scaled[i:i+window_size])
window = ohlcv_scaled[i:i+window_size]
# Validate window data
if np.any(np.isnan(window)) or np.any(np.isinf(window)):
continue
X.append(window)
# Target: binary classification - price goes up (1) or down (0)
# 1 if close price increases in the next candle, 0 otherwise
@ -314,7 +325,18 @@ class DataInterface:
# Store timestamp for reference
timestamps.append(df['timestamp'].iloc[i+window_size])
return np.array(X), np.array(y), np.array(timestamps)
if not X:
logger.error("No valid windows created")
return None, None, None
X = np.array(X)
y = np.array(y)
timestamps = np.array(timestamps)
# Log shapes for debugging
logger.info(f"Created features - X shape: {X.shape}, y shape: {y.shape}")
return X, y, timestamps
def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
"""
@ -388,6 +410,95 @@ class DataInterface:
# OHLCV (5 features) per timeframe
return 5 * len(self.timeframes)
def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
"""
Calculate PnL based on predictions and actual price movements.
Args:
predictions (np.array): Model predictions (0: sell, 1: hold, 2: buy)
actual_prices (np.array): Actual price data
position_size (float): Size of the position to trade
Returns:
tuple: (total_pnl, win_rate, trade_history)
"""
if len(predictions) != len(actual_prices) - 1:
logger.error("Predictions and prices length mismatch")
return 0.0, 0.0, []
pnl = 0.0
trades = 0
wins = 0
trade_history = []
for i in range(len(predictions)):
pred = predictions[i]
current_price = actual_prices[i]
next_price = actual_prices[i + 1]
# Calculate price change percentage
price_change = (next_price - current_price) / current_price
# Calculate PnL based on prediction
if pred == 2: # Buy
trade_pnl = price_change * position_size
trades += 1
if trade_pnl > 0:
wins += 1
trade_history.append({
'type': 'buy',
'price': current_price,
'pnl': trade_pnl,
'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i]
})
elif pred == 0: # Sell
trade_pnl = -price_change * position_size
trades += 1
if trade_pnl > 0:
wins += 1
trade_history.append({
'type': 'sell',
'price': current_price,
'pnl': trade_pnl,
'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i]
})
pnl += trade_pnl if pred in [0, 2] else 0
win_rate = wins / trades if trades > 0 else 0.0
return pnl, win_rate, trade_history
def prepare_training_data(self, refresh=False, refresh_interval=300):
"""
Prepare training and validation data with optional refresh.
Args:
refresh (bool): Whether to force refresh data
refresh_interval (int): Minimum seconds between refreshes
Returns:
tuple: (X_train, y_train, X_val, y_val, prices) numpy arrays
"""
current_time = datetime.now()
if refresh or (current_time - getattr(self, 'last_refresh', datetime.min)).total_seconds() > refresh_interval:
logger.info("Refreshing training data...")
for tf in self.timeframes:
self.get_historical_data(timeframe=tf, n_candles=1000, use_cache=False)
self.last_refresh = current_time
# Get all data
X, y, _ = self.prepare_nn_input()
if X is None:
return None, None, None, None, None
# Get price data for PnL calculation
prices = self.dataframes[self.timeframes[0]]['close'].values
# Split into train/validation (80/20)
split_idx = int(len(X) * 0.8)
return (X[:split_idx], y[:split_idx], X[split_idx:], y[split_idx:],
prices[:split_idx], prices[split_idx:])
def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
"""
Prepare a single input sample from the most recent data for real-time inference.