import torch import torch.nn as nn import torch.optim as optim import numpy as np import os import logging import torch.nn.functional as F from typing import List, Tuple # Configure logger logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class PricePatternAttention(nn.Module): """ Attention mechanism specifically designed to focus on price patterns that might indicate local extrema or trend reversals """ def __init__(self, input_dim, hidden_dim=64): super(PricePatternAttention, self).__init__() self.query = nn.Linear(input_dim, hidden_dim) self.key = nn.Linear(input_dim, hidden_dim) self.value = nn.Linear(input_dim, hidden_dim) self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float32)) def forward(self, x): """Apply attention to input sequence""" # x shape: [batch_size, seq_len, features] batch_size, seq_len, _ = x.size() # Project input to query, key, value q = self.query(x) # [batch_size, seq_len, hidden_dim] k = self.key(x) # [batch_size, seq_len, hidden_dim] v = self.value(x) # [batch_size, seq_len, hidden_dim] # Calculate attention scores scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len] # Apply softmax to get attention weights attn_weights = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len] # Apply attention to values output = torch.matmul(attn_weights, v) # [batch_size, seq_len, hidden_dim] return output, attn_weights class CNNModelPyTorch(nn.Module): """ CNN model for trading with multiple timeframes """ def __init__(self, window_size, num_features, output_size, timeframes): super(CNNModelPyTorch, self).__init__() self.window_size = window_size self.num_features = num_features self.output_size = output_size self.timeframes = timeframes # Calculate total input features across all timeframes self.total_features = num_features * len(timeframes) # Device configuration self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f"Using device: {self.device}") # Create model architecture self._create_layers() # Move model to device self.to(self.device) def _create_layers(self): """Create all model layers with current feature dimensions""" # Convolutional layers - use total_features as input channels self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1) self.bn1 = nn.BatchNorm1d(64) self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1) self.bn2 = nn.BatchNorm1d(128) self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1) self.bn3 = nn.BatchNorm1d(256) # Add price pattern attention layer self.attention = PricePatternAttention(256) # Extrema detection specialized convolutional layer self.extrema_conv = nn.Conv1d(256, 128, kernel_size=5, padding=2) self.extrema_bn = nn.BatchNorm1d(128) # Calculate size after convolutions - adjusted for attention output conv_output_size = self.window_size * 256 # Fully connected layers self.fc1 = nn.Linear(conv_output_size, 512) self.fc2 = nn.Linear(512, 256) # Advantage and Value streams (Dueling DQN architecture) self.fc3 = nn.Linear(256, self.output_size) # Advantage stream self.value_fc = nn.Linear(256, 1) # Value stream # Additional prediction head for extrema detection (tops/bottoms) self.extrema_fc = nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither # Initialize optimizer and scheduler self.optimizer = optim.Adam(self.parameters(), lr=0.001) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='max', factor=0.5, patience=5, verbose=True ) def rebuild_conv_layers(self, input_channels): """ Rebuild convolutional layers for different input dimensions Args: input_channels: Number of input channels (features) in the data """ logger.info(f"Rebuilding convolutional layers for {input_channels} input channels") # Update total features self.total_features = input_channels # Recreate all layers with new dimensions self._create_layers() # Move layers to device self.to(self.device) def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """Forward pass through the network""" # Ensure input is on the correct device x = x.to(self.device) # Check and handle if input dimensions don't match model expectations batch_size, window_len, feature_dim = x.size() if feature_dim != self.total_features: logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers") self.rebuild_conv_layers(feature_dim) # Reshape input: [batch, window_size, features] -> [batch, channels, window_size] x = x.permute(0, 2, 1) # Convolutional layers x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) # Store conv features for extrema detection conv_features = x # Reshape for attention: [batch, channels, window_size] -> [batch, window_size, channels] x_attention = x.permute(0, 2, 1) # Apply attention attention_output, attention_weights = self.attention(x_attention) # We'll use attention directly without the residual connection # to avoid dimension mismatch issues attention_reshaped = attention_output.permute(0, 2, 1) # [batch, channels, window_size] # Apply extrema detection specialized layer extrema_features = F.relu(self.extrema_bn(self.extrema_conv(conv_features))) # Use attention features directly instead of residual connection # to avoid dimension mismatches x = conv_features # Just use the convolutional features # Flatten x = x.view(batch_size, -1) # Fully connected layers x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) # Split into advantage and value streams advantage = self.fc3(x) value = self.value_fc(x) # Combine value and advantage q_values = value + (advantage - advantage.mean(dim=1, keepdim=True)) # Also compute extrema prediction from the same features extrema_flat = extrema_features.view(batch_size, -1) extrema_pred = self.extrema_fc(x) # Use the same features for extrema prediction return q_values, extrema_pred def predict(self, X): """Make predictions""" self.eval() # Convert to tensor if not already if not isinstance(X, torch.Tensor): X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) else: X_tensor = X.to(self.device) with torch.no_grad(): q_values, extrema_pred = self(X_tensor) q_values_np = q_values.cpu().numpy() actions = np.argmax(q_values_np, axis=1) # Also return extrema predictions extrema_np = extrema_pred.cpu().numpy() extrema_classes = np.argmax(extrema_np, axis=1) return actions, q_values_np, extrema_classes def save(self, path: str): """Save model weights""" os.makedirs(os.path.dirname(path), exist_ok=True) torch.save(self.state_dict(), f"{path}.pt") logger.info(f"Model saved to {path}.pt") def load(self, path: str): """Load model weights""" self.load_state_dict(torch.load(f"{path}.pt", map_location=self.device)) self.eval() logger.info(f"Model loaded from {path}.pt")