#!/usr/bin/env python3 """ CNN Model - PyTorch Implementation (Optimized for Short-Term High-Leverage Trading) This module implements an enhanced CNN model using PyTorch for time series analysis with a focus on detecting short-term high-leverage trading opportunities. Key improvements include attention mechanisms, rapid pattern detection, and optimized decision thresholds for trading signals. """ import os import logging import numpy as np import matplotlib.pyplot as plt from datetime import datetime import math import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score import torch.nn.functional as F # Configure logging logger = logging.getLogger(__name__) class AttentionLayer(nn.Module): """Self-attention layer for time series data""" def __init__(self, input_dim): super(AttentionLayer, self).__init__() self.query = nn.Linear(input_dim, input_dim) self.key = nn.Linear(input_dim, input_dim) self.value = nn.Linear(input_dim, input_dim) self.scale = math.sqrt(input_dim) def forward(self, x): # x shape: [batch, channels, seq_len] batch, channels, seq_len = x.size() # Reshape for attention computation x_reshaped = x.transpose(1, 2) # [batch, seq_len, channels] # Compute query, key, value q = self.query(x_reshaped) # [batch, seq_len, channels] k = self.key(x_reshaped) # [batch, seq_len, channels] v = self.value(x_reshaped) # [batch, seq_len, channels] # Compute attention scores attn_scores = torch.bmm(q, k.transpose(1, 2)) / self.scale # [batch, seq_len, seq_len] attn_weights = F.softmax(attn_scores, dim=2) # Apply attention out = torch.bmm(attn_weights, v) # [batch, seq_len, channels] out = out.transpose(1, 2) # [batch, channels, seq_len] return out class CNNPyTorch(nn.Module): """ CNN model for time series analysis using PyTorch. """ def __init__(self, input_shape, output_size=3): """ Initialize the CNN architecture. Args: input_shape (tuple): Shape of input data (window_size, features) output_size (int): Number of output classes """ super(CNNPyTorch, self).__init__() window_size, num_features = input_shape self.window_size = window_size # Increased dropout for better generalization dropout_rate = 0.25 # Convolutional layers with wider kernels for better pattern detection self.conv1 = nn.Sequential( nn.Conv1d(num_features, 64, kernel_size=5, padding=2), nn.BatchNorm1d(64), nn.LeakyReLU(0.1), nn.Dropout(dropout_rate) ) self.conv2 = nn.Sequential( nn.Conv1d(64, 128, kernel_size=5, padding=2), nn.BatchNorm1d(128), nn.LeakyReLU(0.1), nn.Dropout(dropout_rate) ) # Micro-movement detection with smaller kernels self.micro_conv = nn.Sequential( nn.Conv1d(num_features, 32, kernel_size=3, padding=1), nn.BatchNorm1d(32), nn.LeakyReLU(0.1), nn.Conv1d(32, 64, kernel_size=3, padding=1), nn.BatchNorm1d(64), nn.LeakyReLU(0.1), nn.Dropout(dropout_rate) ) # Attention mechanism for pattern importance weighting self.attention = nn.Conv1d(64, 1, kernel_size=1) self.softmax = nn.Softmax(dim=2) # Define a fixed output size for conv features to avoid dimension mismatch fixed_conv_size = 10 # This should match the expected size in forward pass # Use adaptive pooling to get fixed size regardless of input self.adaptive_pool = nn.AdaptiveAvgPool1d(fixed_conv_size) # Calculate input size for fully connected layer # After adaptive pooling, dimensions are [batch_size, channels, fixed_conv_size] conv2_flat_size = 128 * fixed_conv_size # From conv2 micro_flat_size = 64 * fixed_conv_size # From micro_conv fc_input_size = conv2_flat_size + micro_flat_size # Shared fully connected layers self.shared_fc = nn.Sequential( nn.Linear(fc_input_size, 256), nn.BatchNorm1d(256), nn.LeakyReLU(0.1), nn.Dropout(dropout_rate) ) # Action prediction head self.action_fc = nn.Sequential( nn.Linear(256, 64), nn.BatchNorm1d(64), nn.LeakyReLU(0.1), nn.Dropout(dropout_rate), nn.Linear(64, output_size) ) # Price prediction head self.price_fc = nn.Sequential( nn.Linear(256, 64), nn.BatchNorm1d(64), nn.LeakyReLU(0.1), nn.Dropout(dropout_rate), nn.Linear(64, 1) # Predict price change percentage ) # Confidence thresholds for decision making self.buy_threshold = 0.55 # Higher threshold for BUY signals self.sell_threshold = 0.55 # Higher threshold for SELL signals def forward(self, x): """ Forward pass through the network with enhanced pattern detection. Args: x: Input tensor of shape [batch_size, window_size, features] Returns: Tuple of (action_probs, price_pred) """ # Transpose for conv1d: [batch, features, window] x = x.transpose(1, 2) # Main convolutional layers conv1_out = self.conv1(x) conv2_out = self.conv2(conv1_out) # Use conv1_out as input to conv2 # Micro-movement pattern detection micro_out = self.micro_conv(x) # Apply adaptive pooling to ensure fixed size output for both paths # This ensures both tensors have the same size at dimension 2 micro_out = self.adaptive_pool(micro_out) # Output: [batch, 64, 10] conv2_out = self.adaptive_pool(conv2_out) # Output: [batch, 128, 10] # Apply attention to conv1 output to detect important patterns attention = self.attention(conv1_out) attention = self.softmax(attention) # Flatten and concatenate features conv2_flat = conv2_out.reshape(conv2_out.size(0), -1) # [batch, 128*10] micro_flat = micro_out.reshape(micro_out.size(0), -1) # [batch, 64*10] features = torch.cat([conv2_flat, micro_flat], dim=1) # Shared layers shared_features = self.shared_fc(features) # Action head action_logits = self.action_fc(shared_features) action_probs = F.softmax(action_logits, dim=1) # Price prediction head price_pred = self.price_fc(shared_features) # Adjust confidence thresholds to favor decisive trading actions with torch.no_grad(): # Reduce HOLD probabilities more aggressively for short-term trading action_probs[:, 1] *= 0.4 # More aggressive reduction of HOLD (index 1) probabilities # Identify high-confidence signals and boost them further sell_mask = action_probs[:, 0] > self.sell_threshold buy_mask = action_probs[:, 2] > self.buy_threshold # Boost high-confidence signals even more action_probs[sell_mask, 0] *= 1.8 # Higher boost for high-confidence SELL signals action_probs[buy_mask, 2] *= 1.8 # Higher boost for high-confidence BUY signals # For other cases, provide moderate boost action_probs[:, 0] *= 1.4 # Boost SELL probabilities action_probs[:, 2] *= 1.4 # Boost BUY probabilities # Re-normalize to sum to 1 action_probs = action_probs / action_probs.sum(dim=1, keepdim=True) return action_probs, price_pred class CNNModelPyTorch: """ CNN model wrapper class for time series analysis using PyTorch. This class provides methods for building, training, evaluating, and making predictions with the CNN model, optimized for short-term trading opportunities. """ def __init__(self, window_size, num_features, output_size=3, timeframes=None): """ Initialize the CNN model. Args: window_size (int): Size of the input window num_features (int): Number of features in the input data output_size (int): Size of the output (default: 3 for BUY/HOLD/SELL) timeframes (list): List of timeframes used (for logging) """ # Action tracking self.action_counts = { 'BUY': 0, 'SELL': 0, 'HOLD': 0 } self.window_size = window_size self.num_features = num_features self.output_size = output_size self.timeframes = timeframes or [] # Determine device (GPU or CPU) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger.info(f"Using device: {self.device}") # Initialize model self.model = None self.build_model() # Initialize training history self.history = { 'loss': [], 'val_loss': [], 'accuracy': [], 'val_accuracy': [] } # Sensitivity parameters for high-leverage trading self.confidence_threshold = 0.65 # Minimum confidence for trading actions self.max_consecutive_same_action = 3 # Limit consecutive identical actions self.last_actions = [] # Track recent actions def build_model(self): """Build the CNN model architecture""" logger.info(f"Building PyTorch CNN model with window_size={self.window_size}, " f"num_features={self.num_features}, output_size={self.output_size}") # Ensure window size is not less than the actual input input_window_size = max(self.window_size, 20) # Use at least 20 as minimum window size self.model = CNNPyTorch( input_shape=(input_window_size, self.num_features), output_size=self.output_size ).to(self.device) # Initialize optimizer with higher learning rate for faster adaptation self.optimizer = optim.Adam(self.model.parameters(), lr=0.002) # Learning rate scheduler with faster decay self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='max', factor=0.6, patience=6, verbose=True ) # Initialize loss function with higher weights for BUY/SELL class_weights = torch.tensor([7.0, 1.0, 7.0]).to(self.device) # Even higher weights for BUY/SELL self.criterion = nn.CrossEntropyLoss(weight=class_weights) logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters") def compute_trading_loss(self, action_probs, price_pred, targets, future_prices=None): """ Custom loss function that prioritizes profitable trades Args: action_probs: Predicted action probabilities [batch_size, 3] price_pred: Predicted price changes [batch_size, 1] targets: Target actions [batch_size] future_prices: Actual future price changes [batch_size] Returns: Total loss value """ batch_size = action_probs.size(0) # Base classification loss action_loss = self.criterion(action_probs, targets) # Initialize price and profitability losses price_loss = torch.tensor(0.0, device=self.device) profit_loss = torch.tensor(0.0, device=self.device) diversity_loss = torch.tensor(0.0, device=self.device) # Get predicted actions pred_actions = torch.argmax(action_probs, dim=1) # Calculate signal diversity loss to prevent model from always predicting the same action # Count actions in the batch buy_count = (pred_actions == 2).float().sum() / batch_size sell_count = (pred_actions == 0).float().sum() / batch_size hold_count = (pred_actions == 1).float().sum() / batch_size # Enhanced diversity mechanism # For short-term high-leverage trading, we want a more balanced distribution # with a slight preference for actions over holds, but still maintaining diversity # Ideal distribution varies based on market conditions and training phase # Start with more conservative distribution and gradually shift to more aggressive if hasattr(self, 'training_progress'): self.training_progress += 1 else: self.training_progress = 0 # Early training phase - more balanced with higher HOLD if self.training_progress < 500: ideal_buy = 0.3 ideal_sell = 0.3 ideal_hold = 0.4 # Mid training phase - balanced trading signals elif self.training_progress < 1500: ideal_buy = 0.35 ideal_sell = 0.35 ideal_hold = 0.3 # Late training phase - more aggressive with tactical HOLDs else: ideal_buy = 0.4 ideal_sell = 0.4 ideal_hold = 0.2 # Calculate diversity loss using Kullback-Leibler divergence approximation # Plus an additional penalty for extreme imbalance actual_dist = torch.tensor([sell_count, hold_count, buy_count], device=self.device) ideal_dist = torch.tensor([ideal_sell, ideal_hold, ideal_buy], device=self.device) # KL divergence component (approximation) eps = 1e-8 # Small constant to avoid division by zero kl_div = torch.sum(actual_dist * torch.log((actual_dist + eps) / (ideal_dist + eps))) # Add strong penalty for extreme predictions (all same class) max_ratio = torch.max(actual_dist) if max_ratio > 0.9: # If more than 90% of predictions are the same class diversity_loss = kl_div + (max_ratio - 0.9) * 5.0 # Stronger penalty elif max_ratio > 0.7: # If more than 70% predictions are the same class diversity_loss = kl_div + (max_ratio - 0.7) * 2.0 # Moderate penalty else: diversity_loss = kl_div # Add additional penalty if any class has zero predictions # This is critical for avoiding scenarios where model never predicts a certain class zero_class_penalty = 0.0 min_class_ratio = 0.1 # We want at least 10% of each class if buy_count < min_class_ratio: zero_class_penalty += (min_class_ratio - buy_count) * 3.0 if sell_count < min_class_ratio: zero_class_penalty += (min_class_ratio - sell_count) * 3.0 if hold_count < min_class_ratio: zero_class_penalty += (min_class_ratio - hold_count) * 2.0 # Slightly lower penalty for HOLD diversity_loss += zero_class_penalty # If we have future prices, calculate profitability-based losses if future_prices is not None and future_prices.numel() > 0: # Calculate price direction loss - penalize wrong direction predictions if price_pred is not None: # For each sample where future price is available valid_mask = ~torch.isnan(future_prices) & (future_prices != 0) if valid_mask.any(): valid_future = future_prices[valid_mask] valid_price_pred = price_pred.view(-1)[valid_mask] # Mean squared error for price prediction price_loss = F.mse_loss(valid_price_pred, valid_future) # Direction loss - penalize wrong direction predictions more heavily pred_direction = torch.sign(valid_price_pred) true_direction = torch.sign(valid_future) direction_loss = ((pred_direction != true_direction) & (true_direction != 0)).float().mean() # Add direction loss to price loss with higher weight price_loss = price_loss + direction_loss * 2.0 # Calculate trade profitability loss # This penalizes unprofitable trades more than just wrong classifications profitable_trades = 0 unprofitable_trades = 0 for i in range(batch_size): if i < future_prices.size(0) and not torch.isnan(future_prices[i]) and future_prices[i] != 0: price_change = future_prices[i].item() # Calculate expected profit/loss based on action if pred_actions[i] == 0: # SELL expected_pnl = -price_change # Negative price change is profit for SELL elif pred_actions[i] == 2: # BUY expected_pnl = price_change # Positive price change is profit for BUY else: # HOLD expected_pnl = 0 # No profit/loss for HOLD # Enhanced profit/loss penalties with larger gradient for bad trades if expected_pnl < 0: # Exponential penalty for larger losses severity = abs(expected_pnl) ** 1.5 # Higher exponent for short-term trading profit_loss = profit_loss + torch.tensor(severity, device=self.device) * 2.5 unprofitable_trades += 1 elif expected_pnl > 0: # Reward for profitable trades (negative loss contribution) # Higher reward for larger profits reward = expected_pnl * 0.9 profit_loss = profit_loss - torch.tensor(reward, device=self.device) profitable_trades += 1 # Calculate win rate and further adjust profit loss if profitable_trades + unprofitable_trades > 0: win_rate = profitable_trades / (profitable_trades + unprofitable_trades) # Add extra penalty if win rate is less than 50% if win_rate < 0.5: profit_loss = profit_loss * (1.0 + (0.5 - win_rate) * 2.5) # Add small reward if win rate is high elif win_rate > 0.6: profit_loss = profit_loss * (1.0 - (win_rate - 0.6) * 0.5) # Combine all loss components with dynamic weighting # Adjust weights based on training progress # Early training focuses more on classification accuracy if self.training_progress < 500: action_weight = 1.0 price_weight = 0.2 profit_weight = 0.5 diversity_weight = 0.3 # Mid training balances all components elif self.training_progress < 1500: action_weight = 0.8 price_weight = 0.3 profit_weight = 0.8 diversity_weight = 0.5 # Late training emphasizes profitability and diversity else: action_weight = 0.6 price_weight = 0.3 profit_weight = 1.0 diversity_weight = 0.7 total_loss = (action_weight * action_loss + price_weight * price_loss + profit_weight * profit_loss + diversity_weight * diversity_loss) return total_loss, action_loss, price_loss def train_epoch(self, X_train, y_train, future_prices, batch_size): """Train the model for one epoch with focus on short-term pattern recognition""" self.model.train() total_action_loss = 0 total_price_loss = 0 total_correct = 0 total_samples = 0 # Convert inputs to tensors and create DataLoader X_train_tensor = torch.FloatTensor(X_train).to(self.device) y_train_tensor = torch.LongTensor(y_train).to(self.device) future_prices_tensor = torch.FloatTensor(future_prices).to(self.device) if future_prices is not None else None # Create dataset and dataloader if future_prices_tensor is not None: dataset = TensorDataset(X_train_tensor, y_train_tensor, future_prices_tensor) else: dataset = TensorDataset(X_train_tensor, y_train_tensor) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # Training loop for batch_data in train_loader: self.optimizer.zero_grad() # Extract batch data if len(batch_data) == 3: batch_X, batch_y, batch_future_prices = batch_data else: batch_X, batch_y = batch_data batch_future_prices = None # Forward pass action_probs, price_pred = self.model(batch_X) # Calculate loss using custom trading loss function total_loss, action_loss, price_loss = self.compute_trading_loss( action_probs, price_pred, batch_y, batch_future_prices ) # Backward pass and optimization total_loss.backward() # Apply gradient clipping to prevent exploding gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0) self.optimizer.step() # Update metrics total_action_loss += action_loss.item() total_price_loss += price_loss.item() if hasattr(price_loss, 'item') else 0 predictions = torch.argmax(action_probs, dim=1) total_correct += (predictions == batch_y).sum().item() total_samples += batch_y.size(0) # Track trading signals for logging buy_count = (predictions == 2).sum().item() sell_count = (predictions == 0).sum().item() hold_count = (predictions == 1).sum().item() buy_correct = ((predictions == 2) & (batch_y == 2)).sum().item() sell_correct = ((predictions == 0) & (batch_y == 0)).sum().item() # Calculate average losses and accuracy avg_action_loss = total_action_loss / len(train_loader) avg_price_loss = total_price_loss / len(train_loader) accuracy = total_correct / total_samples # Log trading signals logger.info(f"Trading signals: BUY={buy_count}, SELL={sell_count}, HOLD={hold_count}") logger.info(f"Signal precision: BUY={buy_correct/max(1, buy_count):.4f}, SELL={sell_correct/max(1, sell_count):.4f}") # Update learning rate self.scheduler.step(accuracy) return avg_action_loss, avg_price_loss, accuracy def evaluate(self, X_val, y_val, future_prices=None): """Evaluate the model with focus on short-term trading performance metrics""" self.model.eval() total_action_loss = 0 total_price_loss = 0 total_correct = 0 total_samples = 0 # Additional metrics for trading performance trade_signals = {'BUY': 0, 'SELL': 0, 'HOLD': 0} correct_signals = {'BUY': 0, 'SELL': 0, 'HOLD': 0} # Convert inputs to tensors X_val_tensor = torch.FloatTensor(X_val).to(self.device) y_val_tensor = torch.LongTensor(y_val).to(self.device) future_prices_tensor = torch.FloatTensor(future_prices).to(self.device) if future_prices is not None else None with torch.no_grad(): # Forward pass action_probs, price_pred = self.model(X_val_tensor) # Calculate loss using custom trading loss function total_loss, action_loss, price_loss = self.compute_trading_loss( action_probs, price_pred, y_val_tensor, future_prices_tensor ) # Calculate predictions and accuracy predictions = torch.argmax(action_probs, dim=1) # Count prediction types and correct predictions for i in range(predictions.shape[0]): pred = predictions[i].item() if pred == 0: trade_signals['SELL'] += 1 if y_val_tensor[i].item() == pred: correct_signals['SELL'] += 1 elif pred == 1: trade_signals['HOLD'] += 1 if y_val_tensor[i].item() == pred: correct_signals['HOLD'] += 1 elif pred == 2: trade_signals['BUY'] += 1 if y_val_tensor[i].item() == pred: correct_signals['BUY'] += 1 # Update metrics total_action_loss = action_loss.item() total_price_loss = price_loss.item() if hasattr(price_loss, 'item') else 0 total_correct = (predictions == y_val_tensor).sum().item() total_samples = y_val_tensor.size(0) # Calculate accuracy accuracy = total_correct / total_samples if total_samples > 0 else 0 # Calculate signal precision (crucial for short-term trading) buy_precision = correct_signals['BUY'] / trade_signals['BUY'] if trade_signals['BUY'] > 0 else 0 sell_precision = correct_signals['SELL'] / trade_signals['SELL'] if trade_signals['SELL'] > 0 else 0 # Log trading-specific metrics logger.info(f"Trading signals: BUY={trade_signals['BUY']}, SELL={trade_signals['SELL']}, HOLD={trade_signals['HOLD']}") logger.info(f"Signal precision: BUY={buy_precision:.4f}, SELL={sell_precision:.4f}") # Return combined loss, accuracy and volatility factor for adaptive training return total_action_loss, total_price_loss, accuracy def predict(self, X): """Make predictions optimized for short-term high-leverage trading signals""" self.model.eval() # Convert to tensor if not already if not isinstance(X, torch.Tensor): X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) else: X_tensor = X.to(self.device) with torch.no_grad(): action_probs, price_pred = self.model(X_tensor) # Post-processing optimized for short-term trading signals action_probs_np = action_probs.cpu().numpy() # Apply more aggressive HOLD reduction for short-term trading action_probs_np[:, 1] *= 0.5 # More aggressive HOLD reduction # Apply boosting for BUY/SELL signals action_probs_np[:, 0] *= 1.3 # Boost SELL probabilities action_probs_np[:, 2] *= 1.3 # Boost BUY probabilities # Implement signal filtering based on previous actions to avoid oscillation if len(self.last_actions) >= self.max_consecutive_same_action: # Check for too many consecutive identical actions if all(a == 0 for a in self.last_actions[-self.max_consecutive_same_action:]): # Too many consecutive SELL - reduce sell probability action_probs_np[:, 0] *= 0.7 elif all(a == 2 for a in self.last_actions[-self.max_consecutive_same_action:]): # Too many consecutive BUY - reduce buy probability action_probs_np[:, 2] *= 0.7 # Apply confidence threshold to reduce noise max_probs = np.max(action_probs_np, axis=1) for i in range(len(action_probs_np)): if max_probs[i] < self.confidence_threshold: # If confidence is too low, force HOLD action_probs_np[i] = np.array([0.1, 0.8, 0.1]) # Re-normalize action_probs_np = action_probs_np / action_probs_np.sum(axis=1, keepdims=True) # Store the predicted action for the most recent input if action_probs_np.shape[0] > 0: latest_action = np.argmax(action_probs_np[-1]) self.last_actions.append(int(latest_action)) # Keep only the most recent actions self.last_actions = self.last_actions[-10:] # Store last 10 actions # Update action counts for stats actions = np.argmax(action_probs_np, axis=1) unique, counts = np.unique(actions, return_counts=True) action_dict = dict(zip(unique, counts)) if 0 in action_dict: self.action_counts['SELL'] += action_dict[0] if 1 in action_dict: self.action_counts['HOLD'] += action_dict[1] if 2 in action_dict: self.action_counts['BUY'] += action_dict[2] # Get the current close prices from the input current_prices = X_tensor[:, -1, 3].cpu().numpy() if X_tensor.shape[2] > 3 else np.zeros(X_tensor.shape[0]) # Calculate price directions based on probabilities price_directions = action_probs_np[:, 2] - action_probs_np[:, 0] # BUY - SELL # Scale the price change based on signal strength price_preds = current_prices * (1 + price_directions * 0.002) return action_probs_np, price_preds.reshape(-1, 1) def predict_next_candles(self, X, n_candles=3): """ Predict the next n candles with focus on short-term signals. Args: X: Input data of shape [batch_size, window_size, features] n_candles: Number of future candles to predict Returns: Dictionary of predictions for each timeframe """ self.model.eval() X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) with torch.no_grad(): # Get initial predictions action_probs, price_pred = self.model(X_tensor) action_probs_np = action_probs.cpu().numpy() # Apply more aggressive processing for short-term signals action_probs_np[:, 1] *= 0.5 # Reduce HOLD action_probs_np[:, 0] *= 1.3 # Boost SELL action_probs_np[:, 2] *= 1.3 # Boost BUY # Re-normalize action_probs_np = action_probs_np / action_probs_np.sum(axis=1, keepdims=True) # For short-term predictions, implement decay of signal over time # First candle: full signal, then gradually decay predictions = {} for i, tf in enumerate(self.timeframes): tf_preds = np.zeros((n_candles, action_probs_np.shape[0], 3)) for j in range(n_candles): # Apply decay factor to move signals toward HOLD over time # (short-term signals shouldn't persist too long) decay_factor = max(0.1, 1.0 - j * 0.3) # First, move probabilities toward HOLD with decay decayed_probs = action_probs_np.copy() decayed_probs[:, 0] = action_probs_np[:, 0] * decay_factor # Decay SELL decayed_probs[:, 2] = action_probs_np[:, 2] * decay_factor # Decay BUY # Increase HOLD probability to compensate hold_increase = (1.0 - decay_factor) * (action_probs_np[:, 0] + action_probs_np[:, 2]) decayed_probs[:, 1] = action_probs_np[:, 1] + hold_increase # Re-normalize decayed_probs = decayed_probs / decayed_probs.sum(axis=1, keepdims=True) # Store in predictions array tf_preds[j] = decayed_probs # Store in output dictionary predictions[tf] = tf_preds return predictions def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100): """ Train the CNN model. Args: X_train: Training input data y_train: Training target data X_val: Validation input data y_val: Validation target data batch_size: Batch size for training epochs: Number of training epochs Returns: Training history """ logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, " f"batch_size={batch_size}, epochs={epochs}") # Convert numpy arrays to PyTorch tensors X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device) # Handle different output sizes for y_train if self.output_size == 1: y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device) else: y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device) # Create DataLoader for training data train_dataset = TensorDataset(X_train_tensor, y_train_tensor) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # Create DataLoader for validation data if provided if X_val is not None and y_val is not None: X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device) if self.output_size == 1: y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device) else: y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device) val_dataset = TensorDataset(X_val_tensor, y_val_tensor) val_loader = DataLoader(val_dataset, batch_size=batch_size) else: val_loader = None # Training loop for epoch in range(epochs): # Training phase self.model.train() running_loss = 0.0 correct = 0 total = 0 for inputs, targets in train_loader: # Zero the parameter gradients self.optimizer.zero_grad() # Forward pass action_probs, price_pred = self.model(inputs) # Calculate loss if self.output_size == 1: loss = self.criterion(action_probs, targets.unsqueeze(1)) else: loss = self.criterion(action_probs, targets) # Backward pass and optimize loss.backward() self.optimizer.step() # Statistics running_loss += loss.item() _, predicted = torch.max(action_probs, 1) total += targets.size(0) correct += (predicted == targets).sum().item() epoch_loss = running_loss / len(train_loader) epoch_acc = correct / total if total > 0 else 0 # Validation phase if val_loader is not None: val_loss, val_acc = self.evaluate(X_val, y_val) logger.info(f"Epoch {epoch+1}/{epochs} - " f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - " f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}") # Update history self.history['loss'].append(epoch_loss) self.history['accuracy'].append(epoch_acc) self.history['val_loss'].append(val_loss) self.history['val_accuracy'].append(val_acc) else: logger.info(f"Epoch {epoch+1}/{epochs} - " f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}") # Update history without validation self.history['loss'].append(epoch_loss) self.history['accuracy'].append(epoch_acc) logger.info("Training completed") return self.history def evaluate_metrics(self, X_test, y_test): """ Calculate and return comprehensive evaluation metrics as dict """ X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device) self.model.eval() with torch.no_grad(): y_pred = self.model(X_test_tensor) if self.output_size > 1: _, y_pred_class = torch.max(y_pred, 1) y_pred_class = y_pred_class.cpu().numpy() else: y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten() metrics = { 'accuracy': accuracy_score(y_test, y_pred_class), 'precision': precision_score(y_test, y_pred_class, average='weighted', zero_division=0), 'recall': recall_score(y_test, y_pred_class, average='weighted', zero_division=0), 'f1_score': f1_score(y_test, y_pred_class, average='weighted', zero_division=0) } return metrics def save(self, filepath): """ Save the model to a file with trading configuration. Args: filepath: Path to save the model """ # Create directory if it doesn't exist os.makedirs(os.path.dirname(filepath), exist_ok=True) # Save the model state with additional trading parameters model_state = { 'model_state_dict': self.model.state_dict(), 'optimizer_state_dict': self.optimizer.state_dict(), 'history': self.history, 'window_size': self.window_size, 'num_features': self.num_features, 'output_size': self.output_size, 'timeframes': self.timeframes, # Save trading configuration 'confidence_threshold': self.confidence_threshold, 'max_consecutive_same_action': self.max_consecutive_same_action, 'action_counts': self.action_counts, 'last_actions': self.last_actions, # Save model version information 'model_version': 'short_term_optimized_v1.0', 'timestamp': datetime.now().strftime('%Y%m%d_%H%M%S') } torch.save(model_state, f"{filepath}.pt") logger.info(f"Model saved to {filepath}.pt with short-term trading optimizations") # Save a backup of the model periodically if not os.path.exists(f"{filepath}_backup"): os.makedirs(f"{filepath}_backup", exist_ok=True) backup_path = os.path.join(f"{filepath}_backup", f"model_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt") torch.save(model_state, backup_path) logger.info(f"Backup saved to {backup_path}") def load(self, filepath): """Load model weights from file""" if not os.path.exists(f"{filepath}.pt"): logger.error(f"Model file {filepath}.pt not found") return False try: # Load the model state model_state = torch.load(f"{filepath}.pt", map_location=self.device) # Update model parameters self.window_size = model_state['window_size'] self.num_features = model_state['num_features'] self.output_size = model_state['output_size'] self.timeframes = model_state.get('timeframes', ["1m"]) # Load model state dict self.load_state_dict(model_state['model_state_dict']) # Load optimizer state if available if 'optimizer_state_dict' in model_state: self.optimizer.load_state_dict(model_state['optimizer_state_dict']) # Load trading configuration if available if 'confidence_threshold' in model_state: self.confidence_threshold = model_state['confidence_threshold'] if 'max_consecutive_same_action' in model_state: self.max_consecutive_same_action = model_state['max_consecutive_same_action'] # Log model version information if available if 'model_version' in model_state: logger.info(f"Model version: {model_state['model_version']}") if 'timestamp' in model_state: logger.info(f"Model timestamp: {model_state['timestamp']}") return True except Exception as e: logger.error(f"Error loading model: {str(e)}") return False def plot_training_history(self, metrics_file="NN/models/saved/training_metrics.json"): """ Plot training history from saved metrics. Args: metrics_file: Path to the saved metrics JSON file """ try: import json import matplotlib.pyplot as plt import matplotlib.dates as mdates from datetime import datetime # Load metrics with open(metrics_file, 'r') as f: metrics = json.load(f) # Create plots directory plots_dir = os.path.join(os.path.dirname(metrics_file), 'plots') os.makedirs(plots_dir, exist_ok=True) # Convert timestamps to datetime objects timestamps = [datetime.fromisoformat(ts) for ts in metrics['timestamps']] # 1. Plot Loss and Accuracy fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True) # Loss plot ax1.plot(timestamps, metrics['train_loss'], 'b-', label='Training Loss') ax1.plot(timestamps, metrics['val_loss'], 'r-', label='Validation Loss') ax1.set_title('Model Loss Over Time') ax1.set_ylabel('Loss') ax1.legend() ax1.grid(True) # Accuracy plot ax2.plot(timestamps, metrics['train_acc'], 'g-', label='Training Accuracy') ax2.plot(timestamps, metrics['val_acc'], 'm-', label='Validation Accuracy') ax2.set_title('Model Accuracy Over Time') ax2.set_ylabel('Accuracy') ax2.legend() ax2.grid(True) # Format x-axis ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) plt.xticks(rotation=45) # Save the plot plt.tight_layout() plt.savefig(os.path.join(plots_dir, 'loss_accuracy.png')) plt.close() # 2. Plot PnL and Win Rate fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True) # PnL plot ax1.plot(timestamps, metrics['train_pnl'], 'g-', label='Training PnL') ax1.plot(timestamps, metrics['val_pnl'], 'r-', label='Validation PnL') ax1.set_title('PnL Over Time') ax1.set_ylabel('PnL') ax1.legend() ax1.grid(True) # Win Rate plot ax2.plot(timestamps, metrics['train_win_rate'], 'b-', label='Training Win Rate') ax2.plot(timestamps, metrics['val_win_rate'], 'm-', label='Validation Win Rate') ax2.set_title('Win Rate Over Time') ax2.set_ylabel('Win Rate') ax2.legend() ax2.grid(True) # Format x-axis ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) plt.xticks(rotation=45) # Save the plot plt.tight_layout() plt.savefig(os.path.join(plots_dir, 'pnl_winrate.png')) plt.close() print(f"Performance visualizations saved to {plots_dir}") return True except Exception as e: print(f"Error generating plots: {str(e)}") import traceback print(traceback.format_exc()) return False def extract_hidden_features(self, X): """ Extract hidden features from the model - outputs from last dense layer before output. Args: X: Input data Returns: Hidden features (output from penultimate dense layer) """ # Convert to PyTorch tensor X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) # Forward pass through the model self.model.eval() with torch.no_grad(): # Get features through CNN layers x_t = X_tensor.transpose(1, 2) conv_out = self.model.conv_layers(x_t) # Process through all dense layers except the output layer features = conv_out for layer in self.model.dense_block[:-2]: # Exclude last linear layer and dropout features = layer(features) return features.cpu().numpy()