trying to fix training
This commit is contained in:
@ -224,12 +224,14 @@ class DataInterface:
|
||||
for tf in timeframes:
|
||||
if tf in dfs:
|
||||
X, y, ts = self._create_features(dfs[tf], window_size)
|
||||
features.append(X)
|
||||
if len(targets) == 0: # Only need targets from one timeframe
|
||||
targets = y
|
||||
timestamps = ts
|
||||
if X is not None and y is not None:
|
||||
features.append(X)
|
||||
if len(targets) == 0: # Only need targets from one timeframe
|
||||
targets = y
|
||||
timestamps = ts
|
||||
|
||||
if not features:
|
||||
logger.error("Failed to create features for any timeframe")
|
||||
return None, None, None
|
||||
|
||||
# Stack features from all timeframes along the time dimension
|
||||
@ -250,6 +252,9 @@ class DataInterface:
|
||||
X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
|
||||
X = np.clip(X, -1e6, 1e6) # Clip extreme values
|
||||
|
||||
# Log data shapes for debugging
|
||||
logger.info(f"Prepared input data - X shape: {X.shape}, y shape: {np.array(targets).shape}")
|
||||
|
||||
return X, targets, timestamps
|
||||
|
||||
def _create_features(self, df, window_size):
|
||||
@ -304,7 +309,13 @@ class DataInterface:
|
||||
|
||||
for i in range(len(ohlcv_scaled) - window_size):
|
||||
# Input: window_size candles of OHLCV data
|
||||
X.append(ohlcv_scaled[i:i+window_size])
|
||||
window = ohlcv_scaled[i:i+window_size]
|
||||
|
||||
# Validate window data
|
||||
if np.any(np.isnan(window)) or np.any(np.isinf(window)):
|
||||
continue
|
||||
|
||||
X.append(window)
|
||||
|
||||
# Target: binary classification - price goes up (1) or down (0)
|
||||
# 1 if close price increases in the next candle, 0 otherwise
|
||||
@ -314,7 +325,18 @@ class DataInterface:
|
||||
# Store timestamp for reference
|
||||
timestamps.append(df['timestamp'].iloc[i+window_size])
|
||||
|
||||
return np.array(X), np.array(y), np.array(timestamps)
|
||||
if not X:
|
||||
logger.error("No valid windows created")
|
||||
return None, None, None
|
||||
|
||||
X = np.array(X)
|
||||
y = np.array(y)
|
||||
timestamps = np.array(timestamps)
|
||||
|
||||
# Log shapes for debugging
|
||||
logger.info(f"Created features - X shape: {X.shape}, y shape: {y.shape}")
|
||||
|
||||
return X, y, timestamps
|
||||
|
||||
def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
|
||||
"""
|
||||
@ -388,6 +410,95 @@ class DataInterface:
|
||||
# OHLCV (5 features) per timeframe
|
||||
return 5 * len(self.timeframes)
|
||||
|
||||
def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
|
||||
"""
|
||||
Calculate PnL based on predictions and actual price movements.
|
||||
|
||||
Args:
|
||||
predictions (np.array): Model predictions (0: sell, 1: hold, 2: buy)
|
||||
actual_prices (np.array): Actual price data
|
||||
position_size (float): Size of the position to trade
|
||||
|
||||
Returns:
|
||||
tuple: (total_pnl, win_rate, trade_history)
|
||||
"""
|
||||
if len(predictions) != len(actual_prices) - 1:
|
||||
logger.error("Predictions and prices length mismatch")
|
||||
return 0.0, 0.0, []
|
||||
|
||||
pnl = 0.0
|
||||
trades = 0
|
||||
wins = 0
|
||||
trade_history = []
|
||||
|
||||
for i in range(len(predictions)):
|
||||
pred = predictions[i]
|
||||
current_price = actual_prices[i]
|
||||
next_price = actual_prices[i + 1]
|
||||
|
||||
# Calculate price change percentage
|
||||
price_change = (next_price - current_price) / current_price
|
||||
|
||||
# Calculate PnL based on prediction
|
||||
if pred == 2: # Buy
|
||||
trade_pnl = price_change * position_size
|
||||
trades += 1
|
||||
if trade_pnl > 0:
|
||||
wins += 1
|
||||
trade_history.append({
|
||||
'type': 'buy',
|
||||
'price': current_price,
|
||||
'pnl': trade_pnl,
|
||||
'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i]
|
||||
})
|
||||
elif pred == 0: # Sell
|
||||
trade_pnl = -price_change * position_size
|
||||
trades += 1
|
||||
if trade_pnl > 0:
|
||||
wins += 1
|
||||
trade_history.append({
|
||||
'type': 'sell',
|
||||
'price': current_price,
|
||||
'pnl': trade_pnl,
|
||||
'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i]
|
||||
})
|
||||
|
||||
pnl += trade_pnl if pred in [0, 2] else 0
|
||||
|
||||
win_rate = wins / trades if trades > 0 else 0.0
|
||||
return pnl, win_rate, trade_history
|
||||
|
||||
def prepare_training_data(self, refresh=False, refresh_interval=300):
|
||||
"""
|
||||
Prepare training and validation data with optional refresh.
|
||||
|
||||
Args:
|
||||
refresh (bool): Whether to force refresh data
|
||||
refresh_interval (int): Minimum seconds between refreshes
|
||||
|
||||
Returns:
|
||||
tuple: (X_train, y_train, X_val, y_val, prices) numpy arrays
|
||||
"""
|
||||
current_time = datetime.now()
|
||||
if refresh or (current_time - getattr(self, 'last_refresh', datetime.min)).total_seconds() > refresh_interval:
|
||||
logger.info("Refreshing training data...")
|
||||
for tf in self.timeframes:
|
||||
self.get_historical_data(timeframe=tf, n_candles=1000, use_cache=False)
|
||||
self.last_refresh = current_time
|
||||
|
||||
# Get all data
|
||||
X, y, _ = self.prepare_nn_input()
|
||||
if X is None:
|
||||
return None, None, None, None, None
|
||||
|
||||
# Get price data for PnL calculation
|
||||
prices = self.dataframes[self.timeframes[0]]['close'].values
|
||||
|
||||
# Split into train/validation (80/20)
|
||||
split_idx = int(len(X) * 0.8)
|
||||
return (X[:split_idx], y[:split_idx], X[split_idx:], y[split_idx:],
|
||||
prices[:split_idx], prices[split_idx:])
|
||||
|
||||
def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
|
||||
"""
|
||||
Prepare a single input sample from the most recent data for real-time inference.
|
||||
|
Reference in New Issue
Block a user