import torch import torch.nn as nn import torch.optim as optim import numpy as np import os import logging import torch.nn.functional as F from typing import List, Tuple, Dict, Any, Optional, Union # Configure logger logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class ResidualBlock(nn.Module): """ Residual block with pre-activation (BatchNorm -> ReLU -> Conv) """ def __init__(self, in_channels, out_channels, stride=1): super(ResidualBlock, self).__init__() self.bn1 = nn.BatchNorm1d(in_channels) self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm1d(out_channels) self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False) # Shortcut connection to match dimensions self.shortcut = nn.Sequential() if stride != 1 or in_channels != out_channels: self.shortcut = nn.Sequential( nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class SelfAttention(nn.Module): """ Self-attention mechanism for sequential data """ def __init__(self, dim): super(SelfAttention, self).__init__() self.query = nn.Linear(dim, dim) self.key = nn.Linear(dim, dim) self.value = nn.Linear(dim, dim) self.scale = torch.sqrt(torch.tensor(dim, dtype=torch.float32)) def forward(self, x): # x shape: [batch_size, seq_len, dim] batch_size, seq_len, dim = x.size() q = self.query(x) # [batch_size, seq_len, dim] k = self.key(x) # [batch_size, seq_len, dim] v = self.value(x) # [batch_size, seq_len, dim] # Calculate attention scores scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len] # Apply softmax to get attention weights attention = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len] # Apply attention to values out = torch.matmul(attention, v) # [batch_size, seq_len, dim] return out, attention class EnhancedCNN(nn.Module): """ Enhanced CNN model with residual connections and attention mechanisms for improved trading decision making """ def __init__(self, input_shape, n_actions, confidence_threshold=0.5): super(EnhancedCNN, self).__init__() # Store dimensions self.input_shape = input_shape self.n_actions = n_actions self.confidence_threshold = confidence_threshold # Calculate input dimensions if isinstance(input_shape, (list, tuple)): if len(input_shape) == 3: # [channels, height, width] self.channels, self.height, self.width = input_shape self.feature_dim = self.height * self.width elif len(input_shape) == 2: # [timeframes, features] self.channels = input_shape[0] self.features = input_shape[1] self.feature_dim = self.features * self.channels elif len(input_shape) == 1: # [features] self.channels = 1 self.features = input_shape[0] self.feature_dim = self.features else: raise ValueError(f"Unsupported input shape: {input_shape}") else: # single integer self.channels = 1 self.features = input_shape self.feature_dim = input_shape # Build network self._build_network() # Initialize device self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.to(self.device) logger.info(f"EnhancedCNN initialized with input shape: {input_shape}, actions: {n_actions}") def _build_network(self): """Build the enhanced neural network with current feature dimensions""" # 1D CNN for sequential data if self.channels > 1: # Reshape expected: [batch, timeframes, features] self.conv_layers = nn.Sequential( nn.Conv1d(self.channels, 64, kernel_size=3, padding=1), nn.BatchNorm1d(64), nn.ReLU(), nn.Dropout(0.2), ResidualBlock(64, 128), nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.3), ResidualBlock(128, 256), nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.4), ResidualBlock(256, 512), nn.AdaptiveAvgPool1d(1) # Global average pooling ) # Feature dimension after conv layers self.conv_features = 512 else: # For 1D vectors, skip the convolutional part self.conv_layers = None self.conv_features = 0 # Fully connected layers for all cases # We'll use deeper layers with skip connections if self.conv_layers is None: # For 1D inputs without conv preprocessing self.fc1 = nn.Linear(self.feature_dim, 512) self.features_dim = 512 else: # For data processed by conv layers self.fc1 = nn.Linear(self.conv_features, 512) self.features_dim = 512 # Common feature extraction layers self.fc_layers = nn.Sequential( self.fc1, nn.ReLU(), nn.Dropout(0.4), nn.Linear(512, 512), nn.ReLU(), nn.Dropout(0.4), nn.Linear(512, 256), nn.ReLU() ) # Dueling architecture self.advantage_stream = nn.Sequential( nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, self.n_actions) ) self.value_stream = nn.Sequential( nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 1) ) # Extrema detection head with increased capacity self.extrema_head = nn.Sequential( nn.Linear(256, 128), nn.ReLU(), nn.Dropout(0.3), nn.Linear(128, 3) # 0=bottom, 1=top, 2=neither ) # Price prediction heads with increased capacity self.price_pred_immediate = nn.Sequential( nn.Linear(256, 64), nn.ReLU(), nn.Linear(64, 3) # Up, Down, Sideways ) self.price_pred_midterm = nn.Sequential( nn.Linear(256, 64), nn.ReLU(), nn.Linear(64, 3) # Up, Down, Sideways ) self.price_pred_longterm = nn.Sequential( nn.Linear(256, 64), nn.ReLU(), nn.Linear(64, 3) # Up, Down, Sideways ) # Value prediction with increased capacity self.price_pred_value = nn.Sequential( nn.Linear(256, 128), nn.ReLU(), nn.Dropout(0.3), nn.Linear(128, 4) # % change for different timeframes ) # Additional attention layer for feature refinement self.attention = SelfAttention(256) def _check_rebuild_network(self, features): """Check if network needs to be rebuilt for different feature dimensions""" if features != self.feature_dim: logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})") self.feature_dim = features self._build_network() # Move to device after rebuilding self.to(self.device) return True return False def forward(self, x): """Forward pass through the network""" batch_size = x.size(0) # Process different input shapes if len(x.shape) > 2: # Handle 3D input [batch, timeframes, features] if self.conv_layers is not None: # Reshape for 1D convolution: # [batch, timeframes, features] -> [batch, timeframes, features*1] if len(x.shape) == 3: x = x.permute(0, 1, 2) # Ensure shape is [batch, timeframes, features] x_reshaped = x.permute(0, 1, 2) # [batch, timeframes, features] # Check if the feature dimension has changed and rebuild if necessary if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim: total_features = x_reshaped.size(1) * x_reshaped.size(2) self._check_rebuild_network(total_features) # Apply convolutions x_conv = self.conv_layers(x_reshaped) # Flatten: [batch, channels, 1] -> [batch, channels] x_flat = x_conv.view(batch_size, -1) else: # If no conv layers, just flatten x_flat = x.view(batch_size, -1) else: # For 2D input [batch, features] x_flat = x # Check if dimensions have changed if x_flat.size(1) != self.feature_dim: self._check_rebuild_network(x_flat.size(1)) # Apply FC layers features = self.fc_layers(x_flat) # Add attention for feature refinement features_3d = features.unsqueeze(1) # [batch, 1, features] features_attended, _ = self.attention(features_3d) features_refined = features_attended.squeeze(1) # [batch, features] # Calculate advantage and value advantage = self.advantage_stream(features_refined) value = self.value_stream(features_refined) # Combine for Q-values (Dueling architecture) q_values = value + advantage - advantage.mean(dim=1, keepdim=True) # Get extrema predictions extrema_pred = self.extrema_head(features_refined) # Price movement predictions price_immediate = self.price_pred_immediate(features_refined) price_midterm = self.price_pred_midterm(features_refined) price_longterm = self.price_pred_longterm(features_refined) price_values = self.price_pred_value(features_refined) # Package price predictions price_predictions = { 'immediate': price_immediate, 'midterm': price_midterm, 'longterm': price_longterm, 'values': price_values } return q_values, extrema_pred, price_predictions, features_refined def act(self, state, explore=True): """ Choose action based on state with confidence thresholding """ state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device) with torch.no_grad(): q_values, _, _, _ = self(state_tensor) # Apply softmax to get action probabilities action_probs = F.softmax(q_values, dim=1) # Get action with highest probability action = action_probs.argmax(dim=1).item() action_confidence = action_probs[0, action].item() # Check if confidence exceeds threshold if action_confidence < self.confidence_threshold: # Force HOLD action (typically action 2) action = 2 # Assume 2 is HOLD logger.info(f"Action {action} confidence {action_confidence:.4f} below threshold {self.confidence_threshold}, forcing HOLD") return action, action_confidence def save(self, path): """Save model weights and architecture""" os.makedirs(os.path.dirname(path), exist_ok=True) torch.save({ 'state_dict': self.state_dict(), 'input_shape': self.input_shape, 'n_actions': self.n_actions, 'feature_dim': self.feature_dim, 'confidence_threshold': self.confidence_threshold }, f"{path}.pt") logger.info(f"Enhanced CNN model saved to {path}.pt") def load(self, path): """Load model weights and architecture""" try: checkpoint = torch.load(f"{path}.pt", map_location=self.device) self.input_shape = checkpoint['input_shape'] self.n_actions = checkpoint['n_actions'] self.feature_dim = checkpoint['feature_dim'] if 'confidence_threshold' in checkpoint: self.confidence_threshold = checkpoint['confidence_threshold'] self._build_network() self.load_state_dict(checkpoint['state_dict']) self.to(self.device) logger.info(f"Enhanced CNN model loaded from {path}.pt") return True except Exception as e: logger.error(f"Error loading model: {str(e)}") return False # Additional utility for example sifting class ExampleSiftingDataset: """ Dataset that selectively keeps high-quality examples for training to improve model performance """ def __init__(self, max_examples=50000): self.examples = [] self.labels = [] self.rewards = [] self.max_examples = max_examples self.min_reward_threshold = -0.05 # Minimum reward to keep an example def add_example(self, state, action, reward, next_state, done): """Add a new training example with reward-based filtering""" # Only keep examples with rewards above the threshold if reward > self.min_reward_threshold: self.examples.append((state, action, reward, next_state, done)) self.rewards.append(reward) # Sort by reward and keep only the top examples if len(self.examples) > self.max_examples: # Sort by reward (highest first) sorted_indices = np.argsort(self.rewards)[::-1] # Keep top examples self.examples = [self.examples[i] for i in sorted_indices[:self.max_examples]] self.rewards = [self.rewards[i] for i in sorted_indices[:self.max_examples]] # Update the minimum reward threshold to be the minimum in our kept examples self.min_reward_threshold = min(self.rewards) def get_batch(self, batch_size): """Get a batch of examples, prioritizing better examples""" if not self.examples: return None # Calculate selection probabilities based on rewards rewards = np.array(self.rewards) # Shift rewards to be positive for probability calculation min_reward = min(rewards) shifted_rewards = rewards - min_reward + 0.1 # Add small constant probs = shifted_rewards / shifted_rewards.sum() # Sample batch indices with reward-based probabilities indices = np.random.choice( len(self.examples), size=min(batch_size, len(self.examples)), p=probs, replace=False ) # Create batch batch = [self.examples[i] for i in indices] states, actions, rewards, next_states, dones = zip(*batch) return { 'states': np.array(states), 'actions': np.array(actions), 'rewards': np.array(rewards), 'next_states': np.array(next_states), 'dones': np.array(dones) } def __len__(self): return len(self.examples)