This commit is contained in:
Dobromir Popov 2025-03-29 04:09:03 +02:00
parent 43803caaf1
commit 8b3db10a85
3 changed files with 307 additions and 267 deletions

View File

@ -11,6 +11,7 @@ import logging
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from datetime import datetime from datetime import datetime
import math
import torch import torch
import torch.nn as nn import torch.nn as nn
@ -24,79 +25,84 @@ logger = logging.getLogger(__name__)
class CNNPyTorch(nn.Module): class CNNPyTorch(nn.Module):
"""PyTorch CNN model for time series analysis""" """PyTorch CNN model for time series analysis"""
def __init__(self, input_shape, output_size=5): def __init__(self, input_shape, output_size=3):
""" """
Initialize the enhanced CNN model. Initialize the CNN model.
Args: Args:
input_shape (tuple): Shape of input data (window_size, features) input_shape (tuple): Shape of input data (window_size, features)
output_size (int): Always 5 for our trading signals output_size (int): Size of the output (3 for BUY/HOLD/SELL)
""" """
super(CNNPyTorch, self).__init__() super(CNNPyTorch, self).__init__()
window_size, num_features = input_shape window_size, num_features = input_shape
kernel_size = 5 kernel_size = min(5, window_size) # Ensure kernel size doesn't exceed window size
dropout_rate = 0.3 dropout_rate = 0.3
# Enhanced CNN Architecture # Calculate initial channel size based on number of features
initial_channels = max(32, num_features * 2) # Scale channels with features
# CNN Architecture
self.conv_layers = nn.Sequential( self.conv_layers = nn.Sequential(
# Block 1 # Block 1
nn.Conv1d(num_features, 64, kernel_size, padding='same'), nn.Conv1d(num_features, initial_channels, kernel_size, padding='same'),
nn.BatchNorm1d(64), nn.BatchNorm1d(initial_channels),
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout_rate),
# Block 2 # Block 2
nn.Conv1d(64, 128, kernel_size, padding='same'), nn.Conv1d(initial_channels, initial_channels * 2, kernel_size, padding='same'),
nn.BatchNorm1d(128), nn.BatchNorm1d(initial_channels * 2),
nn.ReLU(), nn.ReLU(),
nn.MaxPool1d(2), nn.MaxPool1d(2),
nn.Dropout(dropout_rate),
# Block 3 # Block 3
nn.Conv1d(128, 256, kernel_size, padding='same'), nn.Conv1d(initial_channels * 2, initial_channels * 4, kernel_size, padding='same'),
nn.BatchNorm1d(256), nn.BatchNorm1d(initial_channels * 4),
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout_rate),
# Block 4 # Block 4
nn.Conv1d(256, 512, kernel_size, padding='same'), nn.Conv1d(initial_channels * 4, initial_channels * 8, kernel_size, padding='same'),
nn.BatchNorm1d(512), nn.BatchNorm1d(initial_channels * 8),
nn.ReLU(), nn.ReLU(),
nn.MaxPool1d(2) nn.MaxPool1d(2),
nn.Dropout(dropout_rate)
) )
# Calculate flattened size after conv and pooling # Calculate flattened size after conv and pooling
conv_output_size = 512 * (window_size // 4) conv_output_size = (initial_channels * 8) * (window_size // 4)
# Dense layers with scaled sizes
dense_size = min(2048, conv_output_size) # Cap dense layer size
# Enhanced dense layers
self.dense_block = nn.Sequential( self.dense_block = nn.Sequential(
nn.Flatten(), nn.Flatten(),
nn.Linear(conv_output_size, 512), nn.Linear(conv_output_size, dense_size),
nn.BatchNorm1d(512), nn.BatchNorm1d(dense_size),
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout_rate), nn.Dropout(dropout_rate),
nn.Linear(512, 256), nn.Linear(dense_size, dense_size // 2),
nn.BatchNorm1d(256), nn.BatchNorm1d(dense_size // 2),
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout_rate), nn.Dropout(dropout_rate),
nn.Linear(256, 128), nn.Linear(dense_size // 2, dense_size // 4),
nn.BatchNorm1d(128), nn.BatchNorm1d(dense_size // 4),
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout_rate),
nn.Linear(128, output_size) nn.Linear(dense_size // 4, output_size)
) )
# Activation based on output size # Activation for output
if output_size == 1: self.activation = nn.Softmax(dim=1)
self.activation = nn.Sigmoid() # Binary classification or regression
elif output_size > 1:
self.activation = nn.Softmax(dim=1) # Multi-class classification
else:
self.activation = nn.Identity() # No activation
def forward(self, x): def forward(self, x):
""" """
Forward pass through enhanced network. Forward pass through the network.
Args: Args:
x: Input tensor of shape [batch_size, window_size, features] x: Input tensor of shape [batch_size, window_size, features]
@ -107,14 +113,16 @@ class CNNPyTorch(nn.Module):
# Transpose for conv1d: [batch, features, window] # Transpose for conv1d: [batch, features, window]
x_t = x.transpose(1, 2) x_t = x.transpose(1, 2)
# Process through all CNN layers # Process through CNN layers
conv_out = self.conv_layers(x_t) conv_out = self.conv_layers(x_t)
# Process through dense layers # Process through dense layers
output = self.dense_block(conv_out) dense_out = self.dense_block(conv_out)
return self.activation(output) # Apply activation
output = self.activation(dense_out)
return output
class CNNModelPyTorch: class CNNModelPyTorch:
""" """
@ -124,14 +132,14 @@ class CNNModelPyTorch:
predictions with the CNN model. predictions with the CNN model.
""" """
def __init__(self, window_size, num_features, output_size=5, timeframes=None): def __init__(self, window_size, num_features, output_size=3, timeframes=None):
""" """
Initialize the CNN model. Initialize the CNN model.
Args: Args:
window_size (int): Size of the input window window_size (int): Size of the input window
num_features (int): Number of features in the input data num_features (int): Number of features in the input data
output_size (int): Size of the output (1 for regression, 3 for classification) output_size (int): Size of the output (default: 3 for BUY/HOLD/SELL)
timeframes (list): List of timeframes used (for logging) timeframes (list): List of timeframes used (for logging)
""" """
# Action tracking # Action tracking
@ -171,26 +179,22 @@ class CNNModelPyTorch:
output_size=self.output_size output_size=self.output_size
).to(self.device) ).to(self.device)
# Initialize optimizer # Initialize optimizer with learning rate schedule
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001) self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
self.optimizer, mode='max', factor=0.5, patience=10, verbose=True
)
# Initialize loss function based on output size # Initialize loss function with class weights
if self.output_size == 1: class_weights = torch.tensor([1.0, 0.5, 1.0]).to(self.device) # Lower weight for HOLD
self.criterion = nn.BCELoss() # Binary classification self.criterion = nn.CrossEntropyLoss(weight=class_weights)
elif self.output_size > 1:
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
else:
self.criterion = nn.MSELoss() # Regression
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters") logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
def train_epoch(self, X_train, y_train, batch_size=32): def train_epoch(self, X_train, y_train, future_prices=None, batch_size=32):
"""Train for one epoch and return loss and accuracy""" """Train for one epoch and return loss and accuracy"""
# Convert to PyTorch tensors # Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device) X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
if self.output_size == 1:
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
else:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device) y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
# Create DataLoader # Create DataLoader
@ -210,18 +214,18 @@ class CNNModelPyTorch:
outputs = self.model(inputs) outputs = self.model(inputs)
# Calculate loss # Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets) loss = self.criterion(outputs, targets)
# Backward pass and optimize # Backward pass and optimize
loss.backward() loss.backward()
# Clip gradients to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
self.optimizer.step() self.optimizer.step()
# Statistics # Statistics
running_loss += loss.item() running_loss += loss.item()
if self.output_size > 1:
_, predicted = torch.max(outputs, 1) _, predicted = torch.max(outputs, 1)
total += targets.size(0) total += targets.size(0)
correct += (predicted == targets).sum().item() correct += (predicted == targets).sum().item()
@ -229,21 +233,25 @@ class CNNModelPyTorch:
epoch_loss = running_loss / len(train_loader) epoch_loss = running_loss / len(train_loader)
epoch_acc = correct / total if total > 0 else 0 epoch_acc = correct / total if total > 0 else 0
return epoch_loss, epoch_acc # Update learning rate scheduler
self.scheduler.step(epoch_acc)
def evaluate(self, X_val, y_val): # To maintain compatibility with the updated training code, we'll return 3 values
# But the price_loss will be zero since we're not using that in this model
return epoch_loss, 0.0, epoch_acc
def evaluate(self, X_val, y_val, future_prices=None):
"""Evaluate on validation data and return loss and accuracy""" """Evaluate on validation data and return loss and accuracy"""
# Convert to PyTorch tensors
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device) X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
if self.output_size == 1:
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
else:
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device) y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
# Create DataLoader
val_dataset = TensorDataset(X_val_tensor, y_val_tensor) val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32) val_loader = DataLoader(val_dataset, batch_size=32)
self.model.eval() self.model.eval()
val_loss = 0.0 running_loss = 0.0
correct = 0 correct = 0
total = 0 total = 0
@ -253,20 +261,20 @@ class CNNModelPyTorch:
outputs = self.model(inputs) outputs = self.model(inputs)
# Calculate loss # Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets) loss = self.criterion(outputs, targets)
running_loss += loss.item()
val_loss += loss.item()
# Calculate accuracy # Calculate accuracy
if self.output_size > 1:
_, predicted = torch.max(outputs, 1) _, predicted = torch.max(outputs, 1)
total += targets.size(0) total += targets.size(0)
correct += (predicted == targets).sum().item() correct += (predicted == targets).sum().item()
return val_loss / len(val_loader), correct / total if total > 0 else 0 val_loss = running_loss / len(val_loader)
val_acc = correct / total if total > 0 else 0
# To maintain compatibility with the updated training code, we'll return 3 values
# But the price_loss will be zero since we're not using that in this model
return val_loss, 0.0, val_acc
def predict(self, X): def predict(self, X):
"""Make predictions on input data""" """Make predictions on input data"""
@ -275,15 +283,13 @@ class CNNModelPyTorch:
with torch.no_grad(): with torch.no_grad():
outputs = self.model(X_tensor) outputs = self.model(X_tensor)
if self.output_size > 1: # To maintain compatibility with the transformer model, return the action probs
_, predicted = torch.max(outputs, 1) # And a dummy price prediction of zeros
return predicted.cpu().numpy() return outputs.cpu().numpy(), np.zeros((len(X), 1))
else:
return outputs.cpu().numpy()
def predict_next_candles(self, X, n_candles=3): def predict_next_candles(self, X, n_candles=3):
""" """
Predict the next n candles for each timeframe. Predict the next n candles.
Args: Args:
X: Input data of shape [batch_size, window_size, features] X: Input data of shape [batch_size, window_size, features]
@ -296,33 +302,14 @@ class CNNModelPyTorch:
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
with torch.no_grad(): with torch.no_grad():
# Get the last window of data # Get predictions for the input window
last_window = X_tensor[-1:] # [1, window_size, features] action_probs = self.model(X_tensor)
# Initialize predictions # For compatibility, we'll return a dictionary with the timeframes
predictions = {} predictions = {}
# For each timeframe, predict next n candles
for i, tf in enumerate(self.timeframes): for i, tf in enumerate(self.timeframes):
# Extract features for this timeframe # Simple prediction: just repeat the current prediction for next n candles
tf_features = last_window[:, :, i*5:(i+1)*5] # [1, window_size, 5] predictions[tf] = np.tile(action_probs.cpu().numpy(), (n_candles, 1))
# Predict next n candles
tf_predictions = []
current_window = tf_features
for _ in range(n_candles):
# Get prediction for next candle
output = self.model(current_window)
tf_predictions.append(output.cpu().numpy())
# Update window for next prediction
current_window = torch.cat([
current_window[:, 1:, :],
output.unsqueeze(1)
], dim=1)
predictions[tf] = np.concatenate(tf_predictions, axis=0)
return predictions return predictions

View File

@ -148,19 +148,29 @@ def train(data_interface, model, args):
best_val_acc = 0 best_val_acc = 0
best_val_pnl = float('-inf') best_val_pnl = float('-inf')
best_win_rate = 0 best_win_rate = 0
best_price_mae = float('inf')
logger.info("Verifying data interface...") logger.info("Verifying data interface...")
X_sample, y_sample, _, _, _, _ = data_interface.prepare_training_data(refresh=True) X_sample, y_sample, _, _, _, _ = data_interface.prepare_training_data(refresh=True)
logger.info(f"Data validation - X shape: {X_sample.shape}, y shape: {y_sample.shape}") logger.info(f"Data validation - X shape: {X_sample.shape}, y shape: {y_sample.shape}")
# Calculate refresh intervals based on timeframes
min_timeframe = min(args.timeframes)
refresh_interval = {
'1s': 1,
'1m': 60,
'5m': 300,
'15m': 900,
'1h': 3600,
'4h': 14400,
'1d': 86400
}.get(min_timeframe, 60)
logger.info(f"Using refresh interval of {refresh_interval} seconds based on {min_timeframe} timeframe")
for epoch in range(args.epochs): for epoch in range(args.epochs):
# More frequent refresh for shorter timeframes # Always refresh for tick data or when using multiple timeframes
if '1s' in args.timeframes: refresh = '1s' in args.timeframes or len(args.timeframes) > 1
refresh = True # Always refresh for tick data
refresh_interval = 30 # 30 seconds for tick data
else:
refresh = epoch % 1 == 0 # Refresh every epoch
refresh_interval = 120 # 2 minutes for other timeframes
logger.info(f"\nStarting epoch {epoch+1}/{args.epochs}") logger.info(f"\nStarting epoch {epoch+1}/{args.epochs}")
X_train, y_train, X_val, y_val, train_prices, val_prices = data_interface.prepare_training_data( X_train, y_train, X_val, y_val, train_prices, val_prices = data_interface.prepare_training_data(
@ -170,55 +180,84 @@ def train(data_interface, model, args):
logger.info(f"Training data - X shape: {X_train.shape}, y shape: {y_train.shape}") logger.info(f"Training data - X shape: {X_train.shape}, y shape: {y_train.shape}")
logger.info(f"Validation data - X shape: {X_val.shape}, y shape: {y_val.shape}") logger.info(f"Validation data - X shape: {X_val.shape}, y shape: {y_val.shape}")
# Get future prices for retrospective training
train_future_prices = data_interface.get_future_prices(train_prices, n_candles=3)
val_future_prices = data_interface.get_future_prices(val_prices, n_candles=3)
# Train and validate # Train and validate
try: try:
train_loss, train_acc = model.train_epoch(X_train, y_train, args.batch_size) train_action_loss, train_price_loss, train_acc = model.train_epoch(
val_loss, val_acc = model.evaluate(X_val, y_val) X_train, y_train, train_future_prices, args.batch_size
)
val_action_loss, val_price_loss, val_acc = model.evaluate(
X_val, y_val, val_future_prices
)
# Get predictions for PnL calculation # Get predictions for PnL calculation
train_preds = model.predict(X_train) train_action_probs, train_price_preds = model.predict(X_train)
val_preds = model.predict(X_val) val_action_probs, val_price_preds = model.predict(X_val)
# Calculate PnL and win rates # Calculate PnL and win rates
train_pnl, train_win_rate, train_trades = data_interface.calculate_pnl( train_pnl, train_win_rate, train_trades = data_interface.calculate_pnl(
train_preds, train_prices, position_size=1.0 train_action_probs, train_prices, position_size=1.0
) )
val_pnl, val_win_rate, val_trades = data_interface.calculate_pnl( val_pnl, val_win_rate, val_trades = data_interface.calculate_pnl(
val_preds, val_prices, position_size=1.0 val_action_probs, val_prices, position_size=1.0
) )
# Calculate price prediction error
train_price_mae = np.mean(np.abs(train_price_preds - train_future_prices))
val_price_mae = np.mean(np.abs(val_price_preds - val_future_prices))
# Monitor action distribution # Monitor action distribution
train_actions = np.bincount(train_preds, minlength=3) train_actions = np.bincount(np.argmax(train_action_probs, axis=1), minlength=3)
val_actions = np.bincount(val_preds, minlength=3) val_actions = np.bincount(np.argmax(val_action_probs, axis=1), minlength=3)
# Log metrics # Log metrics
writer.add_scalar('Loss/train', train_loss, epoch) writer.add_scalar('Loss/action_train', train_action_loss, epoch)
writer.add_scalar('Loss/price_train', train_price_loss, epoch)
writer.add_scalar('Loss/action_val', val_action_loss, epoch)
writer.add_scalar('Loss/price_val', val_price_loss, epoch)
writer.add_scalar('Accuracy/train', train_acc, epoch) writer.add_scalar('Accuracy/train', train_acc, epoch)
writer.add_scalar('Loss/val', val_loss, epoch)
writer.add_scalar('Accuracy/val', val_acc, epoch) writer.add_scalar('Accuracy/val', val_acc, epoch)
writer.add_scalar('PnL/train', train_pnl, epoch) writer.add_scalar('PnL/train', train_pnl, epoch)
writer.add_scalar('PnL/val', val_pnl, epoch) writer.add_scalar('PnL/val', val_pnl, epoch)
writer.add_scalar('WinRate/train', train_win_rate, epoch) writer.add_scalar('WinRate/train', train_win_rate, epoch)
writer.add_scalar('WinRate/val', val_win_rate, epoch) writer.add_scalar('WinRate/val', val_win_rate, epoch)
writer.add_scalar('PriceMAE/train', train_price_mae, epoch)
writer.add_scalar('PriceMAE/val', val_price_mae, epoch)
# Log action distribution # Log action distribution
for i, action in enumerate(['SELL', 'HOLD', 'BUY']): for i, action in enumerate(['SELL', 'HOLD', 'BUY']):
writer.add_scalar(f'Actions/train_{action}', train_actions[i], epoch) writer.add_scalar(f'Actions/train_{action}', train_actions[i], epoch)
writer.add_scalar(f'Actions/val_{action}', val_actions[i], epoch) writer.add_scalar(f'Actions/val_{action}', val_actions[i], epoch)
# Save best model based on validation PnL # Save best model based on validation metrics
if val_pnl > best_val_pnl: if val_pnl > best_val_pnl or (val_pnl == best_val_pnl and val_acc > best_val_acc):
best_val_pnl = val_pnl best_val_pnl = val_pnl
best_val_acc = val_acc best_val_acc = val_acc
best_win_rate = val_win_rate best_win_rate = val_win_rate
best_price_mae = val_price_mae
model.save(f"models/{args.model_type}_best.pt") model.save(f"models/{args.model_type}_best.pt")
logger.info("Saved new best model based on validation metrics")
# Log detailed metrics # Log detailed metrics
logger.info(f"Epoch {epoch+1}/{args.epochs} - " logger.info(f"Epoch {epoch+1}/{args.epochs}")
f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f}, " logger.info("Training Metrics:")
f"PnL: {train_pnl:.2%}, Win Rate: {train_win_rate:.2%} - " logger.info(f" Action Loss: {train_action_loss:.4f}")
f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}, " logger.info(f" Price Loss: {train_price_loss:.4f}")
f"PnL: {val_pnl:.2%}, Win Rate: {val_win_rate:.2%}") logger.info(f" Accuracy: {train_acc:.2f}")
logger.info(f" PnL: {train_pnl:.2%}")
logger.info(f" Win Rate: {train_win_rate:.2%}")
logger.info(f" Price MAE: {train_price_mae:.2f}")
logger.info("Validation Metrics:")
logger.info(f" Action Loss: {val_action_loss:.4f}")
logger.info(f" Price Loss: {val_price_loss:.4f}")
logger.info(f" Accuracy: {val_acc:.2f}")
logger.info(f" PnL: {val_pnl:.2%}")
logger.info(f" Win Rate: {val_win_rate:.2%}")
logger.info(f" Price MAE: {val_price_mae:.2f}")
# Log action distribution # Log action distribution
logger.info("Action Distribution:") logger.info("Action Distribution:")
@ -226,41 +265,33 @@ def train(data_interface, model, args):
logger.info(f" {action}: Train={train_actions[i]}, Val={val_actions[i]}") logger.info(f" {action}: Train={train_actions[i]}, Val={val_actions[i]}")
# Log trade statistics # Log trade statistics
if train_trades: logger.info("Trade Statistics:")
logger.info(f" Training trades: {len(train_trades)}") logger.info(f" Training trades: {len(train_trades)}")
logger.info(f" Validation trades: {len(val_trades)}") logger.info(f" Validation trades: {len(val_trades)}")
# Retrospective fine-tuning # Log next candle predictions
if epoch > 0 and val_pnl > 0: # Only fine-tune if we're making profit if epoch % 10 == 0: # Every 10 epochs
logger.info("Performing retrospective fine-tuning...") logger.info("\nNext Candle Predictions:")
# Get predictions for next few candles
next_candles = model.predict_next_candles(X_val[-1:], n_candles=3) next_candles = model.predict_next_candles(X_val[-1:], n_candles=3)
for tf in args.timeframes:
# Log predictions for each timeframe if tf in next_candles:
for tf, preds in next_candles.items(): logger.info(f"\n{tf} timeframe predictions:")
logger.info(f"Next 3 candles for {tf}:") for i, pred in enumerate(next_candles[tf]):
for i, pred in enumerate(preds):
action = ['SELL', 'HOLD', 'BUY'][np.argmax(pred)] action = ['SELL', 'HOLD', 'BUY'][np.argmax(pred)]
confidence = np.max(pred) confidence = np.max(pred)
logger.info(f" Candle {i+1}: {action} (confidence: {confidence:.2f})") logger.info(f" Candle {i+1}: {action} (confidence: {confidence:.2f})")
# Fine-tune on recent successful trades
successful_trades = [t for t in train_trades if t['pnl'] > 0]
if successful_trades:
logger.info(f"Fine-tuning on {len(successful_trades)} successful trades")
# TODO: Implement fine-tuning logic here
except Exception as e: except Exception as e:
logger.error(f"Error during epoch {epoch+1}: {str(e)}") logger.error(f"Error during epoch {epoch+1}: {str(e)}")
continue continue
# Save final model # Save final model
model.save(f"models/{args.model_type}_final_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt") model.save(f"models/{args.model_type}_final_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt")
logger.info(f"Training complete. Best validation metrics:") logger.info(f"\nTraining complete. Best validation metrics:")
logger.info(f"Accuracy: {best_val_acc:.2f}") logger.info(f"Accuracy: {best_val_acc:.2f}")
logger.info(f"PnL: {best_val_pnl:.2%}") logger.info(f"PnL: {best_val_pnl:.2%}")
logger.info(f"Win Rate: {best_win_rate:.2%}") logger.info(f"Win Rate: {best_win_rate:.2%}")
logger.info(f"Price MAE: {best_price_mae:.2f}")
except Exception as e: except Exception as e:
logger.error(f"Error in training: {str(e)}") logger.error(f"Error in training: {str(e)}")

View File

@ -61,6 +61,10 @@ class DataInterface:
""" """
cache_file = os.path.join(self.data_dir, f"{self.symbol.replace('/', '_')}_{timeframe}.csv") cache_file = os.path.join(self.data_dir, f"{self.symbol.replace('/', '_')}_{timeframe}.csv")
# For 1s timeframe, always fetch fresh data
if timeframe == '1s':
use_cache = False
# Check if cached data exists and is recent # Check if cached data exists and is recent
if use_cache and os.path.exists(cache_file): if use_cache and os.path.exists(cache_file):
try: try:
@ -74,18 +78,18 @@ class DataInterface:
logger.error(f"Error reading cached data: {str(e)}") logger.error(f"Error reading cached data: {str(e)}")
# If we get here, we need to fetch data # If we get here, we need to fetch data
# For now, we'll use a placeholder for fetching data from an exchange
try: try:
# In a real implementation, we would fetch data from an exchange or API here
# For this example, we'll create dummy data if we can't load from cache
logger.info(f"Fetching historical data for {self.symbol} {timeframe}") logger.info(f"Fetching historical data for {self.symbol} {timeframe}")
# For 1s timeframe, we need more data points
if timeframe == '1s':
n_candles = min(n_candles * 60, 10000) # Up to 10k ticks
# Placeholder for real data fetching # Placeholder for real data fetching
# In a real implementation, this would be replaced with API calls
self._fetch_data_from_exchange(timeframe, n_candles) self._fetch_data_from_exchange(timeframe, n_candles)
# Save to cache # Save to cache (except for 1s timeframe)
if self.dataframes[timeframe] is not None: if self.dataframes[timeframe] is not None and timeframe != '1s':
self.dataframes[timeframe].to_csv(cache_file, index=False) self.dataframes[timeframe].to_csv(cache_file, index=False)
return self.dataframes[timeframe] return self.dataframes[timeframe]
else: else:
@ -122,6 +126,7 @@ class DataInterface:
""" """
# Map timeframe to seconds # Map timeframe to seconds
tf_seconds = { tf_seconds = {
'1s': 1, # Added 1s timeframe
'1m': 60, '1m': 60,
'5m': 300, '5m': 300,
'15m': 900, '15m': 900,
@ -207,59 +212,62 @@ class DataInterface:
# Get data for all requested timeframes # Get data for all requested timeframes
dfs = {} dfs = {}
min_length = float('inf')
for tf in timeframes: for tf in timeframes:
df = self.get_historical_data(timeframe=tf, n_candles=n_candles) # For 1s timeframe, we need more data points
tf_candles = n_candles * 60 if tf == '1s' else n_candles
df = self.get_historical_data(timeframe=tf, n_candles=tf_candles)
if df is not None and not df.empty: if df is not None and not df.empty:
dfs[tf] = df dfs[tf] = df
# Keep track of minimum length across all timeframes
min_length = min(min_length, len(df))
if not dfs: if not dfs:
logger.error("No data available for feature creation") logger.error("No data available for feature creation")
return None, None, None return None, None, None
# Align all dataframes to the same length
for tf in dfs:
dfs[tf] = dfs[tf].tail(min_length)
# Create features for each timeframe # Create features for each timeframe
features = [] features = []
targets = [] targets = None
timestamps = [] timestamps = None
for tf in timeframes: for tf in timeframes:
if tf in dfs: if tf in dfs:
X, y, ts = self._create_features(dfs[tf], window_size) X, y, ts = self._create_features(dfs[tf], window_size)
if X is not None and y is not None: if X is not None and y is not None:
features.append(X) features.append(X)
if len(targets) == 0: # Only need targets from one timeframe if targets is None: # Only need targets from one timeframe
targets = y targets = y
timestamps = ts timestamps = ts
if not features: if not features or targets is None:
logger.error("Failed to create features for any timeframe") logger.error("Failed to create features for any timeframe")
return None, None, None return None, None, None
# Stack features from all timeframes along the time dimension # Ensure all feature arrays have the same length
# Reshape each timeframe's features to [samples, window, 1, features] min_samples = min(f.shape[0] for f in features)
reshaped_features = [f.reshape(f.shape[0], f.shape[1], 1, f.shape[2]) features = [f[-min_samples:] for f in features]
for f in features] targets = targets[-min_samples:]
# Concatenate along the channel dimension timestamps = timestamps[-min_samples:]
X = np.concatenate(reshaped_features, axis=2)
# Reshape to [samples, window, features*timeframes] # Stack features from all timeframes
X = X.reshape(X.shape[0], X.shape[1], -1) X = np.concatenate([f.reshape(min_samples, window_size, -1) for f in features], axis=2)
# Validate data # Validate data
if np.any(np.isnan(X)) or np.any(np.isinf(X)): if np.any(np.isnan(X)) or np.any(np.isinf(X)):
logger.error("Generated features contain NaN or infinite values") logger.error("Generated features contain NaN or infinite values")
return None, None, None return None, None, None
# Ensure all values are finite and normalized logger.info(f"Prepared input data - X shape: {X.shape}, y shape: {targets.shape}")
X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
X = np.clip(X, -1e6, 1e6) # Clip extreme values
# Log data shapes for debugging
logger.info(f"Prepared input data - X shape: {X.shape}, y shape: {np.array(targets).shape}")
return X, targets, timestamps return X, targets, timestamps
def _create_features(self, df, window_size): def _create_features(self, df, window_size):
""" """
Create features from OHLCV data using a sliding window approach. Create features from OHLCV data using a sliding window.
Args: Args:
df (pd.DataFrame): DataFrame with OHLCV data df (pd.DataFrame): DataFrame with OHLCV data
@ -267,76 +275,34 @@ class DataInterface:
Returns: Returns:
tuple: (X, y, timestamps) where: tuple: (X, y, timestamps) where:
X is the input features array X is the feature array
y is the target array y is the target array
timestamps is an array of timestamps for each sample timestamps is the array of timestamps
""" """
# Extract OHLCV columns if len(df) < window_size + 1:
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values logger.error(f"Not enough data for feature creation (need {window_size + 1}, got {len(df)})")
# Validate data before scaling
if np.any(np.isnan(ohlcv)) or np.any(np.isinf(ohlcv)):
logger.error("Input data contains NaN or infinite values")
return None, None, None return None, None, None
# Handle potential constant columns (avoid division by zero in scaler) # Extract OHLCV data
ohlcv = np.nan_to_num(ohlcv, nan=0.0) data = df[['open', 'high', 'low', 'close', 'volume']].values
ranges = np.ptp(ohlcv, axis=0) timestamps = df['timestamp'].values
for i in range(len(ranges)):
if ranges[i] == 0: # Constant column
ohlcv[:, i] = 1 if i == 3 else 0 # Set close to 1, others to 0
# Scale the data with safety checks
try:
scaler = MinMaxScaler()
ohlcv_scaled = scaler.fit_transform(ohlcv)
if np.any(np.isnan(ohlcv_scaled)) or np.any(np.isinf(ohlcv_scaled)):
logger.error("Scaling produced invalid values")
return None, None, None
except Exception as e:
logger.error(f"Scaling failed: {str(e)}")
return None, None, None
# Store the scaler for later use
timeframe = next((tf for tf in self.timeframes if self.dataframes.get(tf) is not None and
self.dataframes[tf].equals(df)), 'unknown')
self.scalers[timeframe] = scaler
# Create sliding windows # Create sliding windows
X = [] X = np.array([data[i:i+window_size] for i in range(len(data)-window_size)])
y = []
timestamps = []
for i in range(len(ohlcv_scaled) - window_size): # Create targets (next candle's movement: 0=down, 1=neutral, 2=up)
# Input: window_size candles of OHLCV data next_close = data[window_size:, 3] # Close prices
window = ohlcv_scaled[i:i+window_size] curr_close = data[window_size-1:-1, 3]
price_changes = (next_close - curr_close) / curr_close
# Validate window data # Define thresholds for price movement classification
if np.any(np.isnan(window)) or np.any(np.isinf(window)): threshold = 0.001 # 0.1% threshold
continue y = np.zeros(len(price_changes), dtype=int)
y[price_changes > threshold] = 2 # Up
y[(price_changes >= -threshold) & (price_changes <= threshold)] = 1 # Neutral
X.append(window)
# Target: binary classification - price goes up (1) or down (0)
# 1 if close price increases in the next candle, 0 otherwise
price_change = ohlcv[i+window_size, 3] - ohlcv[i+window_size-1, 3]
y.append(1 if price_change > 0 else 0)
# Store timestamp for reference
timestamps.append(df['timestamp'].iloc[i+window_size])
if not X:
logger.error("No valid windows created")
return None, None, None
X = np.array(X)
y = np.array(y)
timestamps = np.array(timestamps)
# Log shapes for debugging
logger.info(f"Created features - X shape: {X.shape}, y shape: {y.shape}") logger.info(f"Created features - X shape: {X.shape}, y shape: {y.shape}")
return X, y, timestamps[window_size:]
return X, y, timestamps
def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20): def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
""" """
@ -406,21 +372,28 @@ class DataInterface:
return dataset_info return dataset_info
def get_feature_count(self): def get_feature_count(self):
"""Get the number of features per input sample""" """
# OHLCV (5 features) per timeframe Calculate total number of features across all timeframes.
return 5 * len(self.timeframes)
Returns:
int: Total number of features (5 features per timeframe)
"""
return len(self.timeframes) * 5 # OHLCV features for each timeframe
def calculate_pnl(self, predictions, actual_prices, position_size=1.0): def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
""" """
Calculate PnL based on predictions and actual price movements. Calculate PnL and win rates based on predictions and actual price movements.
Args: Args:
predictions (np.array): Model predictions (0: sell, 1: hold, 2: buy) predictions: Array of predicted actions (0=SELL, 1=HOLD, 2=BUY) or probabilities
actual_prices (np.array): Actual price data actual_prices: Array of actual close prices
position_size (float): Size of the position to trade position_size: Position size for each trade
Returns: Returns:
tuple: (total_pnl, win_rate, trade_history) tuple: (pnl, win_rate, trades) where:
pnl is the total profit and loss
win_rate is the ratio of winning trades
trades is a list of trade dictionaries
""" """
if len(predictions) != len(actual_prices) - 1: if len(predictions) != len(actual_prices) - 1:
logger.error("Predictions and prices length mismatch") logger.error("Predictions and prices length mismatch")
@ -468,36 +441,85 @@ class DataInterface:
win_rate = wins / trades if trades > 0 else 0.0 win_rate = wins / trades if trades > 0 else 0.0
return pnl, win_rate, trade_history return pnl, win_rate, trade_history
def prepare_training_data(self, refresh=False, refresh_interval=300): def get_future_prices(self, prices, n_candles=3):
""" """
Prepare training and validation data with optional refresh. Extract future prices for use in retrospective training.
Args: Args:
refresh (bool): Whether to force refresh data prices: Array of close prices
refresh_interval (int): Minimum seconds between refreshes n_candles: Number of future candles to predict
Returns: Returns:
tuple: (X_train, y_train, X_val, y_val, prices) numpy arrays numpy.ndarray: Array of future prices for each sample
"""
if prices is None or len(prices) <= n_candles:
logger.warning(f"Not enough price data for future prediction: {len(prices) if prices is not None else 0} prices")
# Return zeros if not enough data
return np.zeros((len(prices) if prices is not None else 0, 1))
# For each price point i, get the price at i+n_candles
future_prices = np.zeros((len(prices), 1))
for i in range(len(prices) - n_candles):
future_prices[i, 0] = prices[i + n_candles]
# For the last n_candles positions, we don't have future data
# We'll use the last known price as a placeholder
for i in range(len(prices) - n_candles, len(prices)):
future_prices[i, 0] = prices[-1]
return future_prices
def prepare_training_data(self, refresh=False, refresh_interval=300):
"""
Prepare data for training, including splitting into train/validation sets.
Args:
refresh (bool): Whether to refresh the data cache
refresh_interval (int): Interval in seconds to refresh data
Returns:
tuple: (X_train, y_train, X_val, y_val, train_prices, val_prices)
""" """
current_time = datetime.now() current_time = datetime.now()
if refresh or (current_time - getattr(self, 'last_refresh', datetime.min)).total_seconds() > refresh_interval:
logger.info("Refreshing training data...")
for tf in self.timeframes:
self.get_historical_data(timeframe=tf, n_candles=1000, use_cache=False)
self.last_refresh = current_time
# Get all data # Check if we should refresh the data
if refresh or not hasattr(self, 'last_refresh_time') or \
(current_time - self.last_refresh_time).total_seconds() > refresh_interval:
logger.info("Refreshing training data...")
self.last_refresh_time = current_time
else:
# Use cached data
if hasattr(self, 'cached_train_data'):
return self.cached_train_data
# Prepare input data
X, y, _ = self.prepare_nn_input() X, y, _ = self.prepare_nn_input()
if X is None: if X is None:
return None, None, None, None, None return None, None, None, None, None, None
# Get price data for PnL calculation # Get price data for PnL calculation
prices = self.dataframes[self.timeframes[0]]['close'].values raw_prices = []
for tf in self.timeframes:
if tf in self.dataframes and self.dataframes[tf] is not None:
# Get the close prices for the same period as X
prices = self.dataframes[tf]['close'].values[-len(X):]
if len(prices) == len(X):
raw_prices = prices
break
# Split into train/validation (80/20) if len(raw_prices) != len(X):
raw_prices = np.zeros(len(X)) # Fallback if no prices available
# Split data into training and validation sets (80/20)
split_idx = int(len(X) * 0.8) split_idx = int(len(X) * 0.8)
return (X[:split_idx], y[:split_idx], X[split_idx:], y[split_idx:], X_train, X_val = X[:split_idx], X[split_idx:]
prices[:split_idx], prices[split_idx:]) y_train, y_val = y[:split_idx], y[split_idx:]
train_prices, val_prices = raw_prices[:split_idx], raw_prices[split_idx:]
# Cache the data
self.cached_train_data = (X_train, y_train, X_val, y_val, train_prices, val_prices)
return X_train, y_train, X_val, y_val, train_prices, val_prices
def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20): def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
""" """