import torch import torch.nn as nn import torch.optim as optim import numpy as np import os import logging import torch.nn.functional as F from typing import List, Tuple # Configure logger logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class PricePatternAttention(nn.Module): """ Attention mechanism specifically designed to focus on price patterns that might indicate local extrema or trend reversals """ def __init__(self, input_dim, hidden_dim=64): super(PricePatternAttention, self).__init__() self.query = nn.Linear(input_dim, hidden_dim) self.key = nn.Linear(input_dim, hidden_dim) self.value = nn.Linear(input_dim, hidden_dim) self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float32)) def forward(self, x): """Apply attention to input sequence""" # x shape: [batch_size, seq_len, features] batch_size, seq_len, _ = x.size() # Project input to query, key, value q = self.query(x) # [batch_size, seq_len, hidden_dim] k = self.key(x) # [batch_size, seq_len, hidden_dim] v = self.value(x) # [batch_size, seq_len, hidden_dim] # Calculate attention scores scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len] # Apply softmax to get attention weights attn_weights = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len] # Apply attention to values output = torch.matmul(attn_weights, v) # [batch_size, seq_len, hidden_dim] return output, attn_weights class AdaptiveNorm(nn.Module): """ Adaptive normalization layer that chooses between different normalization methods based on input dimensions """ def __init__(self, num_features): super(AdaptiveNorm, self).__init__() self.batch_norm = nn.BatchNorm1d(num_features, affine=True) self.group_norm = nn.GroupNorm(min(32, num_features), num_features) self.layer_norm = nn.LayerNorm([num_features, 1]) def forward(self, x): # Check input dimensions batch_size, channels, seq_len = x.size() # Choose normalization method: # - Batch size > 1 and seq_len > 1: BatchNorm # - Batch size == 1 or seq_len == 1: GroupNorm # - Fallback for extreme cases: LayerNorm if batch_size > 1 and seq_len > 1: return self.batch_norm(x) elif seq_len > 1: return self.group_norm(x) else: # For 1D inputs (seq_len=1), we need to adjust the layer norm # to the actual input size if not hasattr(self, 'layer_norm_1d') or self.layer_norm_1d.normalized_shape[0] != channels: self.layer_norm_1d = nn.LayerNorm([channels, seq_len]).to(x.device) return self.layer_norm_1d(x) class SimpleCNN(nn.Module): """ Simple CNN model for reinforcement learning with image-like state inputs """ def __init__(self, input_shape, n_actions): super(SimpleCNN, self).__init__() # Store dimensions self.input_shape = input_shape self.n_actions = n_actions # Calculate input dimensions if len(input_shape) == 3: # [channels, height, width] self.channels, self.height, self.width = input_shape self.feature_dim = self.height * self.width elif len(input_shape) == 2: # [timeframes, features] self.channels = input_shape[0] self.features = input_shape[1] self.feature_dim = self.features elif len(input_shape) == 1: # [features] self.channels = 1 self.features = input_shape[0] self.feature_dim = self.features else: raise ValueError(f"Unsupported input shape: {input_shape}") # Build network self._build_network() # Initialize device self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.to(self.device) logger.info(f"SimpleCNN initialized with input shape: {input_shape}, actions: {n_actions}") def _build_network(self): """Build the neural network with current feature dimensions""" # Create a flexible architecture that adapts to input dimensions # Increased complexity self.fc_layers = nn.Sequential( nn.Linear(self.feature_dim, 512), # Increased size nn.ReLU(), nn.Dropout(0.2), # Added dropout nn.Linear(512, 512), # Increased size nn.ReLU(), nn.Dropout(0.2), # Added dropout nn.Linear(512, 512), # Added layer nn.ReLU(), nn.Dropout(0.2) # Added dropout ) # Output heads (Dueling DQN architecture) self.advantage_head = nn.Linear(512, self.n_actions) # Updated input size self.value_head = nn.Linear(512, 1) # Updated input size # Extrema detection head self.extrema_head = nn.Linear(512, 3) # 0=bottom, 1=top, 2=neither, Updated input size # Price prediction heads for different timeframes self.price_pred_immediate = nn.Linear(512, 3) # Updated input size self.price_pred_midterm = nn.Linear(512, 3) # Updated input size self.price_pred_longterm = nn.Linear(512, 3) # Updated input size # Regression heads for exact price prediction self.price_pred_value = nn.Linear(512, 4) # Updated input size def _check_rebuild_network(self, features): """Check if network needs to be rebuilt for different feature dimensions""" if features != self.feature_dim: logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})") self.feature_dim = features self._build_network() # Move to device after rebuilding self.to(self.device) return True return False def forward(self, x): """Forward pass through the network""" # Flatten input if needed to ensure it matches the expected feature dimension batch_size = x.size(0) # Reshape input if needed if len(x.shape) > 2: # Handle multi-dimensional input # For 3D input: [batch, seq_len, features] or [batch, channels, features] x = x.reshape(batch_size, -1) # Flatten to [batch, seq_len*features] # Check if the feature dimension matches and rebuild if necessary if x.size(1) != self.feature_dim: self._check_rebuild_network(x.size(1)) # Apply fully connected layers with ReLU activation x = self.fc_layers(x) # Branch 1: Action values (Q-values) action_values = self.advantage_head(x) # Branch 2: Extrema detection (market top/bottom classification) extrema_pred = self.extrema_head(x) # Branch 3: Price movement prediction over different timeframes # Split into three timeframes: immediate, midterm, longterm price_immediate = self.price_pred_immediate(x) price_midterm = self.price_pred_midterm(x) price_longterm = self.price_pred_longterm(x) # Branch 4: Value prediction (regression for expected price changes) price_values = self.price_pred_value(x) # Package price predictions price_predictions = { 'immediate': price_immediate, # Classification (up/down/sideways) 'midterm': price_midterm, # Classification (up/down/sideways) 'longterm': price_longterm, # Classification (up/down/sideways) 'values': price_values # Regression (expected % change) } # Return all outputs and the hidden feature representation return action_values, extrema_pred, price_predictions, x def extract_features(self, x): """Extract hidden features from the input and return both action values and features""" # Flatten input if needed to ensure it matches the expected feature dimension batch_size = x.size(0) # Reshape input if needed if len(x.shape) > 2: # Handle multi-dimensional input # For 3D input: [batch, seq_len, features] or [batch, channels, features] x = x.reshape(batch_size, -1) # Flatten to [batch, seq_len*features] # Check if the feature dimension matches and rebuild if necessary if x.size(1) != self.feature_dim: self._check_rebuild_network(x.size(1)) # Apply fully connected layers with ReLU activation x_features = self.fc_layers(x) # Branch 1: Action values (Q-values) action_values = self.advantage_head(x_features) # Return action values and the hidden feature representation return action_values, x_features def save(self, path): """Save model weights and architecture""" os.makedirs(os.path.dirname(path), exist_ok=True) torch.save({ 'state_dict': self.state_dict(), 'input_shape': self.input_shape, 'n_actions': self.n_actions, 'feature_dim': self.feature_dim }, f"{path}.pt") logger.info(f"Model saved to {path}.pt") def load(self, path): """Load model weights and architecture""" try: checkpoint = torch.load(f"{path}.pt", map_location=self.device) self.input_shape = checkpoint['input_shape'] self.n_actions = checkpoint['n_actions'] self.feature_dim = checkpoint['feature_dim'] self._build_network() self.load_state_dict(checkpoint['state_dict']) self.to(self.device) logger.info(f"Model loaded from {path}.pt") return True except Exception as e: logger.error(f"Error loading model: {str(e)}") return False class CNNModelPyTorch(nn.Module): """ CNN model for trading with multiple timeframes """ def __init__(self, window_size=20, num_features=5, output_size=3, timeframes=None): super(CNNModelPyTorch, self).__init__() if timeframes is None: timeframes = [1] self.window_size = window_size self.num_features = num_features self.output_size = output_size self.timeframes = timeframes # num_features should already be the total features across all timeframes self.total_features = num_features logger.info(f"CNNModelPyTorch initialized with window_size={window_size}, num_features={num_features}, " f"total_features={self.total_features}, output_size={output_size}, timeframes={timeframes}") # Device configuration self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f"Using device: {self.device}") # Create model architecture self._create_layers() # Move model to device self.to(self.device) def _create_layers(self): """Create all model layers with current feature dimensions""" # Convolutional layers - use total_features as input channels self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1) self.norm1 = AdaptiveNorm(64) self.dropout1 = nn.Dropout(0.2) self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1) self.norm2 = AdaptiveNorm(128) self.dropout2 = nn.Dropout(0.3) self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1) self.norm3 = AdaptiveNorm(256) self.dropout3 = nn.Dropout(0.4) # Add price pattern attention layer self.attention = PricePatternAttention(256) # Extrema detection specialized convolutional layer self.extrema_conv = nn.Conv1d(256, 128, kernel_size=3, padding=1) # Smaller kernel for small inputs self.extrema_norm = AdaptiveNorm(128) # Fully connected layers - input size will be determined dynamically self.fc1 = None # Will be initialized in forward pass self.fc2 = nn.Linear(512, 256) self.dropout_fc = nn.Dropout(0.5) # Advantage and Value streams (Dueling DQN architecture) self.fc3 = nn.Linear(256, self.output_size) # Advantage stream self.value_fc = nn.Linear(256, 1) # Value stream # Additional prediction head for extrema detection (tops/bottoms) self.extrema_fc = nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither # Initialize optimizer and scheduler self.optimizer = optim.Adam(self.parameters(), lr=0.001) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='max', factor=0.5, patience=5, verbose=True ) def rebuild_conv_layers(self, input_channels): """ Rebuild convolutional layers for different input dimensions Args: input_channels: Number of input channels (features) in the data """ logger.info(f"Rebuilding convolutional layers for {input_channels} input channels") # Update total features self.total_features = input_channels # Recreate all layers with new dimensions self._create_layers() # Move layers to device self.to(self.device) def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """Forward pass through the network""" # Ensure input is on the correct device x = x.to(self.device) # Log input tensor shape for debugging input_shape = x.size() logger.debug(f"Input tensor shape: {input_shape}") # Check input dimensions and reshape as needed if len(x.size()) == 2: # If input is [batch_size, features], reshape to [batch_size, features, 1] batch_size, feature_dim = x.size() # Check and handle if input features don't match model expectations if feature_dim != self.total_features: logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})") if not hasattr(self, 'rebuild_warning_shown'): logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}") self.rebuild_warning_shown = True # Don't rebuild - instead adapt the input # If features are fewer, pad with zeros. If more, truncate if feature_dim < self.total_features: padding = torch.zeros(batch_size, self.total_features - feature_dim, device=self.device) x = torch.cat([x, padding], dim=1) else: x = x[:, :self.total_features] # For 1D input, use a sequence length of 1 seq_len = 1 x = x.unsqueeze(2) # Reshape to [batch, features, 1] elif len(x.size()) == 3: # Standard case: [batch_size, window_size, features] batch_size, seq_len, feature_dim = x.size() # Check and handle if input dimensions don't match model expectations if feature_dim != self.total_features: logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})") if not hasattr(self, 'rebuild_warning_shown'): logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}") self.rebuild_warning_shown = True # Don't rebuild - instead adapt the input # If features are fewer, pad with zeros. If more, truncate if feature_dim < self.total_features: padding = torch.zeros(batch_size, seq_len, self.total_features - feature_dim, device=self.device) x = torch.cat([x, padding], dim=2) else: x = x[:, :, :self.total_features] # Reshape input: [batch, window_size, features] -> [batch, features, window_size] x = x.permute(0, 2, 1) else: raise ValueError(f"Unexpected input shape: {x.size()}, expected 2D or 3D tensor") # Log reshaped tensor for debugging logger.debug(f"Reshaped tensor for convolution: {x.size()}") # Convolutional layers with dropout - safely handle small spatial dimensions try: x = self.dropout1(F.relu(self.norm1(self.conv1(x)))) x = self.dropout2(F.relu(self.norm2(self.conv2(x)))) x = self.dropout3(F.relu(self.norm3(self.conv3(x)))) except Exception as e: logger.warning(f"Error in convolutional layers: {str(e)}") # Fallback for very small inputs: skip some convolutions if seq_len < 3: # Apply a simpler convolution for very small inputs x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) # Skip last conv if we get dimension errors try: x = F.relu(self.conv3(x)) except: pass # Store conv features for extrema detection conv_features = x # Get the current shape after convolutions _, channels, conv_seq_len = x.size() # Initialize fc1 if not created yet or if the shape has changed if self.fc1 is None: flattened_size = channels * conv_seq_len logger.info(f"Initializing fc1 with input size {flattened_size}") self.fc1 = nn.Linear(flattened_size, 512).to(self.device) # Apply extrema detection safely try: extrema_features = F.relu(self.extrema_norm(self.extrema_conv(conv_features))) except Exception as e: logger.warning(f"Error in extrema detection: {str(e)}") extrema_features = conv_features # Fallback # Handle attention for small sequence lengths if conv_seq_len > 1: # Reshape for attention: [batch, channels, seq_len] -> [batch, seq_len, channels] x_attention = x.permute(0, 2, 1) # Apply attention try: attention_output, attention_weights = self.attention(x_attention) except Exception as e: logger.warning(f"Error in attention layer: {str(e)}") # Fallback: don't use attention # Flatten - get the actual shape for this batch flattened_size = channels * conv_seq_len x = x.view(batch_size, flattened_size) # Check if we need to recreate fc1 with the correct size if self.fc1.in_features != flattened_size: logger.info(f"Recreating fc1 layer to match input size {flattened_size}") self.fc1 = nn.Linear(flattened_size, 512).to(self.device) # Reinitialize optimizer after changing the model self.optimizer = optim.Adam(self.parameters(), lr=0.001) # Fully connected layers with dropout x = F.relu(self.fc1(x)) x = self.dropout_fc(F.relu(self.fc2(x))) # Split into advantage and value streams advantage = self.fc3(x) value = self.value_fc(x) # Combine value and advantage q_values = value + (advantage - advantage.mean(dim=1, keepdim=True)) # Also compute extrema prediction from the same features extrema_flat = extrema_features.view(batch_size, -1) extrema_pred = self.extrema_fc(x) # Use the same features for extrema prediction return q_values, extrema_pred def predict(self, X): """Make predictions""" self.eval() # Convert to tensor if not already if not isinstance(X, torch.Tensor): X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) else: X_tensor = X.to(self.device) with torch.no_grad(): q_values, extrema_pred = self(X_tensor) q_values_np = q_values.cpu().numpy() actions = np.argmax(q_values_np, axis=1) # Also return extrema predictions extrema_np = extrema_pred.cpu().numpy() extrema_classes = np.argmax(extrema_np, axis=1) return actions, q_values_np, extrema_classes def save(self, path: str): """Save model weights""" os.makedirs(os.path.dirname(path), exist_ok=True) torch.save(self.state_dict(), f"{path}.pt") logger.info(f"Model saved to {path}.pt") def load(self, path: str): """Load model weights""" self.load_state_dict(torch.load(f"{path}.pt", map_location=self.device)) self.eval() logger.info(f"Model loaded from {path}.pt")