#!/usr/bin/env python3 """ CNN Model - PyTorch Implementation (Optimized for Short-Term High-Leverage Trading) This module implements an enhanced CNN model using PyTorch for time series analysis with a focus on detecting short-term high-leverage trading opportunities. Key improvements include attention mechanisms, rapid pattern detection, and optimized decision thresholds for trading signals. """ import os import logging import numpy as np import matplotlib.pyplot as plt from datetime import datetime import math import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score import torch.nn.functional as F # Configure logging logger = logging.getLogger(__name__) class AttentionLayer(nn.Module): """Self-attention layer for time series data""" def __init__(self, input_dim): super(AttentionLayer, self).__init__() self.query = nn.Linear(input_dim, input_dim) self.key = nn.Linear(input_dim, input_dim) self.value = nn.Linear(input_dim, input_dim) self.scale = math.sqrt(input_dim) def forward(self, x): # x shape: [batch, channels, seq_len] batch, channels, seq_len = x.size() # Reshape for attention computation x_reshaped = x.transpose(1, 2) # [batch, seq_len, channels] # Compute query, key, value q = self.query(x_reshaped) # [batch, seq_len, channels] k = self.key(x_reshaped) # [batch, seq_len, channels] v = self.value(x_reshaped) # [batch, seq_len, channels] # Compute attention scores attn_scores = torch.bmm(q, k.transpose(1, 2)) / self.scale # [batch, seq_len, seq_len] attn_weights = F.softmax(attn_scores, dim=2) # Apply attention out = torch.bmm(attn_weights, v) # [batch, seq_len, channels] out = out.transpose(1, 2) # [batch, channels, seq_len] return out class CNNPyTorch(nn.Module): """ CNN model for time series analysis using PyTorch. """ def __init__(self, input_shape, output_size=3): """ Initialize the CNN architecture. Args: input_shape (tuple): Shape of input data (window_size, features) output_size (int): Number of output classes """ super(CNNPyTorch, self).__init__() # Set device self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') window_size, num_features = input_shape self.window_size = window_size # Increased complexity self.conv1 = nn.Sequential( nn.Conv1d(num_features, 64, kernel_size=3, padding=1), # Increased filters nn.BatchNorm1d(64), nn.ReLU(), nn.Dropout(0.2) ) self.conv2 = nn.Sequential( nn.Conv1d(64, 128, kernel_size=3, padding=1), # Increased filters nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2) ) # Added third conv layer self.conv3 = nn.Sequential( nn.Conv1d(128, 128, kernel_size=3, padding=1), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2) ) # Global average pooling to handle variable length sequences self.global_pool = nn.AdaptiveAvgPool1d(1) # Fully connected layers (updated input size and hidden size) self.fc = nn.Sequential( nn.Linear(128, 64), # Updated input size from conv3, increased hidden size nn.ReLU(), nn.Dropout(0.2), nn.Linear(64, output_size) ) def forward(self, x): """ Forward pass through the network. Args: x: Input tensor of shape [batch_size, window_size, features] Returns: action_probs: Action probabilities """ # Transpose for conv1d: [batch, features, window] x = x.transpose(1, 2) # Convolutional layers x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) # Added conv3 pass # Global pooling x = self.global_pool(x) x = x.squeeze(-1) # Shape becomes [batch, 128] # Fully connected layers action_logits = self.fc(x) # Apply class weights to reduce HOLD bias # This helps overcome the dataset imbalance that often favors HOLD class_weights = torch.tensor([2.5, 0.4, 2.5], device=self.device) # Higher weights for BUY/SELL weighted_logits = action_logits * class_weights # Add random perturbation during training to encourage exploration if self.training: # Add small noise to encourage exploration noise = torch.randn_like(weighted_logits) * 0.3 weighted_logits = weighted_logits + noise # Softmax to get probabilities action_probs = F.softmax(weighted_logits, dim=1) return action_probs, None # Return None for price_pred as we're focusing on actions class CNNModelPyTorch: """ High-level wrapper for the CNN model with training and evaluation functionality. """ def __init__(self, window_size=20, timeframes=None, output_size=3, num_pairs=3): """ Initialize the model. Args: window_size (int): Size of the input window timeframes (list): List of timeframes to use output_size (int): Number of output classes num_pairs (int): Number of trading pairs """ self.window_size = window_size self.timeframes = timeframes or ["1m", "5m", "15m"] self.output_size = output_size self.num_pairs = num_pairs # Set device self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f"Using device: {self.device}") # Initialize the underlying CNN model input_shape = (window_size, len(self.timeframes) * 5) # 5 features per timeframe self.model = CNNPyTorch(input_shape, output_size).to(self.device) # Initialize optimizer with lower learning rate for stability self.optimizer = optim.Adam(self.model.parameters(), lr=0.0001, weight_decay=0.01) # Initialize loss functions self.action_criterion = nn.CrossEntropyLoss() # Training history self.history = { 'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [] } # For compatibility with older code self.train_losses = [] self.val_losses = [] self.train_accuracies = [] self.val_accuracies = [] # Initialize action counts self.action_counts = { 'BUY': [0, 0], # [total, correct] 'SELL': [0, 0], # [total, correct] 'HOLD': [0, 0] # [total, correct] } logger.info(f"Building PyTorch CNN model with window_size={window_size}, output_size={output_size}") # Learning rate scheduler self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='min', factor=0.5, patience=5, verbose=True ) # Sensitivity parameters for high-leverage trading self.confidence_threshold = 0.65 self.max_consecutive_same_action = 3 self.last_actions = [[] for _ in range(num_pairs)] # Track recent actions per pair def train_epoch(self, X_train, y_train, future_prices, batch_size): # Add a call to predict_extrema here self.predict_extrema(X_train) """Train the model for one epoch with focus on short-term pattern recognition""" self.model.train() total_loss = 0 total_correct = 0 total_samples = 0 # Convert inputs to tensors and create DataLoader X_train_tensor = torch.FloatTensor(X_train).to(self.device) y_train_tensor = torch.LongTensor(y_train).to(self.device) # Create dataset and dataloader dataset = TensorDataset(X_train_tensor, y_train_tensor) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # Training loop for batch_X, batch_y in train_loader: self.optimizer.zero_grad() # Forward pass action_probs, _ = self.model(batch_X) # Calculate loss loss = self.action_criterion(action_probs, batch_y) # Backward pass and optimization loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0) self.optimizer.step() # Update metrics total_loss += loss.item() predictions = torch.argmax(action_probs, dim=1) total_correct += (predictions == batch_y).sum().item() total_samples += batch_y.size(0) # Update action counts for i, (pred, target) in enumerate(zip(predictions, batch_y)): pred_action = ['SELL', 'HOLD', 'BUY'][pred.item()] self.action_counts[pred_action][0] += 1 if pred.item() == target.item(): self.action_counts[pred_action][1] += 1 # Calculate average loss and accuracy avg_loss = total_loss / len(train_loader) accuracy = total_correct / total_samples # Update training history self.history['train_loss'].append(avg_loss) self.history['train_acc'].append(accuracy) self.train_losses.append(avg_loss) self.train_accuracies.append(accuracy) # Log trading signals for action in ['BUY', 'SELL', 'HOLD']: total = self.action_counts[action][0] correct = self.action_counts[action][1] precision = correct / total if total > 0 else 0 logger.info(f"Trading signals - {action}: {total}, Precision: {precision:.4f}") return avg_loss, 0, accuracy # Return 0 for price_loss as we're not using it def evaluate(self, X_val, y_val, future_prices=None): """Evaluate the model with focus on short-term trading performance metrics""" self.model.eval() total_loss = 0 total_correct = 0 total_samples = 0 # Convert inputs to tensors X_val_tensor = torch.FloatTensor(X_val).to(self.device) y_val_tensor = torch.LongTensor(y_val).to(self.device) # Create dataset and dataloader dataset = TensorDataset(X_val_tensor, y_val_tensor) val_loader = DataLoader(dataset, batch_size=32) with torch.no_grad(): for batch_X, batch_y in val_loader: # Forward pass action_probs, _ = self.model(batch_X) # Calculate loss loss = self.action_criterion(action_probs, batch_y) # Update metrics total_loss += loss.item() predictions = torch.argmax(action_probs, dim=1) total_correct += (predictions == batch_y).sum().item() total_samples += batch_y.size(0) # Calculate average loss and accuracy avg_loss = total_loss / len(val_loader) accuracy = total_correct / total_samples # Update validation history self.history['val_loss'].append(avg_loss) self.history['val_acc'].append(accuracy) self.val_losses.append(avg_loss) self.val_accuracies.append(accuracy) # Update learning rate scheduler self.scheduler.step(avg_loss) return avg_loss, 0, accuracy # Return 0 for price_loss as we're not using it def predict_extrema(self, X): # Predict local extrema (lows and highs) based on input data """Make predictions optimized for short-term high-leverage trading signals""" self.model.eval() # Convert to tensor if not already if not isinstance(X, torch.Tensor): X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) else: X_tensor = X.to(self.device) with torch.no_grad(): action_probs, price_pred = self.model(X_tensor) # Post-processing optimized for short-term trading signals action_probs_np = action_probs.cpu().numpy() # Apply more aggressive HOLD reduction for short-term trading action_probs_np[:, 1] *= 0.3 # More aggressive HOLD reduction # Apply boosting for BUY/SELL signals action_probs_np[:, 0] *= 2.0 # Boost SELL probabilities action_probs_np[:, 2] *= 2.0 # Boost BUY probabilities # Re-normalize action_probs_np = action_probs_np / action_probs_np.sum(axis=1, keepdims=True) # Store the predicted action for the most recent input if action_probs_np.shape[0] > 0: latest_action = np.argmax(action_probs_np[-1]) self.last_actions[0].append(int(latest_action)) # Keep only the most recent actions self.last_actions[0] = self.last_actions[0][-10:] # Store last 10 actions # Update action counts for stats actions = np.argmax(action_probs_np, axis=1) unique, counts = np.unique(actions, return_counts=True) action_dict = dict(zip(unique, counts)) if 0 in action_dict: self.action_counts['SELL'][0] += action_dict[0] if 1 in action_dict: self.action_counts['HOLD'][0] += action_dict[1] if 2 in action_dict: self.action_counts['BUY'][0] += action_dict[2] # If price_pred is None, create a dummy array of zeros if price_pred is None: # Get the current close prices from the input if available current_prices = X_tensor[:, -1, 3].cpu().numpy() if X_tensor.shape[2] > 3 else np.zeros(X_tensor.shape[0]) # Calculate price directions based on probabilities price_directions = action_probs_np[:, 2] - action_probs_np[:, 0] # BUY - SELL # Scale the price change based on signal strength price_preds = current_prices * (1 + price_directions * 0.002) return action_probs_np, price_preds.reshape(-1, 1) else: return action_probs_np, price_pred.cpu().numpy() def predict_next_candles(self, X, n_candles=3): """ Predict the next n candles with focus on short-term signals. Args: X: Input data of shape [batch_size, window_size, features] n_candles: Number of future candles to predict Returns: Dictionary of predictions for each timeframe """ self.model.eval() X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) with torch.no_grad(): # Get initial predictions action_probs, price_pred = self.model(X_tensor) action_probs_np = action_probs.cpu().numpy() # Apply more aggressive processing for short-term signals action_probs_np[:, 1] *= 0.5 # Reduce HOLD action_probs_np[:, 0] *= 1.3 # Boost SELL action_probs_np[:, 2] *= 1.3 # Boost BUY # Re-normalize action_probs_np = action_probs_np / action_probs_np.sum(axis=1, keepdims=True) # For short-term predictions, implement decay of signal over time # First candle: full signal, then gradually decay predictions = {} for i, tf in enumerate(self.timeframes): tf_preds = np.zeros((n_candles, action_probs_np.shape[0], 3)) for j in range(n_candles): # Apply decay factor to move signals toward HOLD over time # (short-term signals shouldn't persist too long) decay_factor = max(0.1, 1.0 - j * 0.3) # First, move probabilities toward HOLD with decay decayed_probs = action_probs_np.copy() decayed_probs[:, 0] = action_probs_np[:, 0] * decay_factor # Decay SELL decayed_probs[:, 2] = action_probs_np[:, 2] * decay_factor # Decay BUY # Increase HOLD probability to compensate hold_increase = (1.0 - decay_factor) * (action_probs_np[:, 0] + action_probs_np[:, 2]) decayed_probs[:, 1] = action_probs_np[:, 1] + hold_increase # Re-normalize decayed_probs = decayed_probs / decayed_probs.sum(axis=1, keepdims=True) # Store in predictions array tf_preds[j] = decayed_probs # Store in output dictionary predictions[tf] = tf_preds return predictions def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100): """ Train the CNN model. Args: X_train: Training input data y_train: Training target data X_val: Validation input data y_val: Validation target data batch_size: Batch size for training epochs: Number of training epochs Returns: Training history """ logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, " f"batch_size={batch_size}, epochs={epochs}") # Convert numpy arrays to PyTorch tensors X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device) # Handle different output sizes for y_train if self.output_size == 1: y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device) else: y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device) # Create DataLoader for training data train_dataset = TensorDataset(X_train_tensor, y_train_tensor) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # Create DataLoader for validation data if provided if X_val is not None and y_val is not None: X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device) if self.output_size == 1: y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device) else: y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device) val_dataset = TensorDataset(X_val_tensor, y_val_tensor) val_loader = DataLoader(val_dataset, batch_size=batch_size) else: val_loader = None # Training loop for epoch in range(epochs): # Training phase self.model.train() running_loss = 0.0 correct = 0 total = 0 for inputs, targets in train_loader: # Zero the parameter gradients self.optimizer.zero_grad() # Forward pass action_probs, price_pred = self.model(inputs) # Calculate loss if self.output_size == 1: loss = self.criterion(action_probs, targets.unsqueeze(1)) else: loss = self.criterion(action_probs, targets) # Backward pass and optimize loss.backward() self.optimizer.step() # Statistics running_loss += loss.item() _, predicted = torch.max(action_probs, 1) total += targets.size(0) correct += (predicted == targets).sum().item() epoch_loss = running_loss / len(train_loader) epoch_acc = correct / total if total > 0 else 0 # Validation phase if val_loader is not None: val_loss, val_acc = self.evaluate(X_val, y_val) logger.info(f"Epoch {epoch+1}/{epochs} - " f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - " f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}") # Update history self.train_losses.append(epoch_loss) self.train_accuracies.append(epoch_acc) self.val_losses.append(val_loss) self.val_accuracies.append(val_acc) else: logger.info(f"Epoch {epoch+1}/{epochs} - " f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}") # Update history without validation self.train_losses.append(epoch_loss) self.train_accuracies.append(epoch_acc) logger.info("Training completed") return { 'loss': self.train_losses, 'accuracy': self.train_accuracies, 'val_loss': self.val_losses, 'val_accuracy': self.val_accuracies } def evaluate_metrics(self, X_test, y_test): """ Calculate and return comprehensive evaluation metrics as dict """ X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device) self.model.eval() with torch.no_grad(): y_pred = self.model(X_test_tensor) if self.output_size > 1: _, y_pred_class = torch.max(y_pred, 1) y_pred_class = y_pred_class.cpu().numpy() else: y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten() metrics = { 'accuracy': accuracy_score(y_test, y_pred_class), 'precision': precision_score(y_test, y_pred_class, average='weighted', zero_division=0), 'recall': recall_score(y_test, y_pred_class, average='weighted', zero_division=0), 'f1_score': f1_score(y_test, y_pred_class, average='weighted', zero_division=0) } return metrics def save(self, filepath): """ Save the model to a file with trading configuration. Args: filepath: Path to save the model """ # Create directory if it doesn't exist os.makedirs(os.path.dirname(filepath), exist_ok=True) # Save the model state with additional trading parameters model_state = { 'model_state_dict': self.model.state_dict(), 'optimizer_state_dict': self.optimizer.state_dict(), 'history': self.history, 'window_size': self.window_size, 'num_features': len(self.timeframes) * 5, # 5 features per timeframe 'output_size': self.output_size, 'timeframes': self.timeframes, # Save trading configuration 'confidence_threshold': self.confidence_threshold, 'max_consecutive_same_action': self.max_consecutive_same_action, 'action_counts': self.action_counts, 'last_actions': self.last_actions, # Save model version information 'model_version': 'short_term_optimized_v2.0', 'timestamp': datetime.now().strftime('%Y%m%d_%H%M%S') } torch.save(model_state, f"{filepath}.pt") logger.info(f"Model saved to {filepath}.pt with short-term trading optimizations") # Save a backup of the model periodically backup_dir = f"{filepath}_backup" os.makedirs(backup_dir, exist_ok=True) backup_path = os.path.join(backup_dir, f"model_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt") torch.save(model_state, backup_path) logger.info(f"Backup saved to {backup_path}") def load(self, filepath): """Load model weights from file""" if not os.path.exists(f"{filepath}.pt"): logger.error(f"Model file {filepath}.pt not found") return False try: # Load the model state model_state = torch.load(f"{filepath}.pt", map_location=self.device) # Update model parameters self.window_size = model_state['window_size'] self.total_features = model_state['num_features'] self.output_size = model_state['output_size'] self.timeframes = model_state.get('timeframes', ["1m"]) # Load model state dict self.model.load_state_dict(model_state['model_state_dict']) # Load optimizer state if available if 'optimizer_state_dict' in model_state: self.optimizer.load_state_dict(model_state['optimizer_state_dict']) # Load trading configuration if available if 'confidence_threshold' in model_state: self.confidence_threshold = model_state['confidence_threshold'] if 'max_consecutive_same_action' in model_state: self.max_consecutive_same_action = model_state['max_consecutive_same_action'] # Log model version information if available if 'model_version' in model_state: logger.info(f"Model version: {model_state['model_version']}") if 'timestamp' in model_state: logger.info(f"Model timestamp: {model_state['timestamp']}") return True except Exception as e: logger.error(f"Error loading model: {str(e)}") return False def plot_training_history(self, metrics_file="NN/models/saved/training_metrics.json"): """ Plot training history from saved metrics. Args: metrics_file: Path to the saved metrics JSON file """ try: import json import matplotlib.pyplot as plt import matplotlib.dates as mdates from datetime import datetime # Load metrics with open(metrics_file, 'r') as f: metrics = json.load(f) # Create plots directory plots_dir = os.path.join(os.path.dirname(metrics_file), 'plots') os.makedirs(plots_dir, exist_ok=True) # Convert timestamps to datetime objects timestamps = [datetime.fromisoformat(ts) for ts in metrics['timestamps']] # 1. Plot Loss and Accuracy fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True) # Loss plot ax1.plot(timestamps, metrics['train_loss'], 'b-', label='Training Loss') ax1.plot(timestamps, metrics['val_loss'], 'r-', label='Validation Loss') ax1.set_title('Model Loss Over Time') ax1.set_ylabel('Loss') ax1.legend() ax1.grid(True) # Accuracy plot ax2.plot(timestamps, metrics['train_acc'], 'g-', label='Training Accuracy') ax2.plot(timestamps, metrics['val_acc'], 'm-', label='Validation Accuracy') ax2.set_title('Model Accuracy Over Time') ax2.set_ylabel('Accuracy') ax2.legend() ax2.grid(True) # Format x-axis ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) plt.xticks(rotation=45) # Save the plot plt.tight_layout() plt.savefig(os.path.join(plots_dir, 'loss_accuracy.png')) plt.close() # 2. Plot PnL and Win Rate fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True) # PnL plot ax1.plot(timestamps, metrics['train_pnl'], 'g-', label='Training PnL') ax1.plot(timestamps, metrics['val_pnl'], 'r-', label='Validation PnL') ax1.set_title('PnL Over Time') ax1.set_ylabel('PnL') ax1.legend() ax1.grid(True) # Win Rate plot ax2.plot(timestamps, metrics['train_win_rate'], 'b-', label='Training Win Rate') ax2.plot(timestamps, metrics['val_win_rate'], 'm-', label='Validation Win Rate') ax2.set_title('Win Rate Over Time') ax2.set_ylabel('Win Rate') ax2.legend() ax2.grid(True) # Format x-axis ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) plt.xticks(rotation=45) # Save the plot plt.tight_layout() plt.savefig(os.path.join(plots_dir, 'pnl_winrate.png')) plt.close() print(f"Performance visualizations saved to {plots_dir}") return True except Exception as e: print(f"Error generating plots: {str(e)}") import traceback print(traceback.format_exc()) return False def extract_hidden_features(self, X): """ Extract hidden features from the model - outputs from last dense layer before output. Args: X: Input data Returns: Hidden features (output from penultimate dense layer) """ # Convert to PyTorch tensor X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) # Forward pass through the model self.model.eval() with torch.no_grad(): # Get features through CNN layers x_t = X_tensor.transpose(1, 2) conv_out = self.model.conv_layers(x_t) # Process through all dense layers except the output layer features = conv_out for layer in self.model.dense_block[:-2]: # Exclude last linear layer and dropout features = layer(features) return features.cpu().numpy()