import torch import torch.nn as nn import torch.optim as optim import numpy as np import os import logging import torch.nn.functional as F from typing import List, Tuple, Dict, Any, Optional, Union # Configure logger logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class ResidualBlock(nn.Module): """ Residual block with pre-activation (BatchNorm -> ReLU -> Conv) """ def __init__(self, in_channels, out_channels, stride=1): super(ResidualBlock, self).__init__() self.bn1 = nn.BatchNorm1d(in_channels) self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm1d(out_channels) self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False) # Shortcut connection to match dimensions self.shortcut = nn.Sequential() if stride != 1 or in_channels != out_channels: self.shortcut = nn.Sequential( nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class SelfAttention(nn.Module): """ Self-attention mechanism for sequential data """ def __init__(self, dim): super(SelfAttention, self).__init__() self.query = nn.Linear(dim, dim) self.key = nn.Linear(dim, dim) self.value = nn.Linear(dim, dim) self.scale = torch.sqrt(torch.tensor(dim, dtype=torch.float32)) def forward(self, x): # x shape: [batch_size, seq_len, dim] batch_size, seq_len, dim = x.size() q = self.query(x) # [batch_size, seq_len, dim] k = self.key(x) # [batch_size, seq_len, dim] v = self.value(x) # [batch_size, seq_len, dim] # Calculate attention scores scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len] # Apply softmax to get attention weights attention = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len] # Apply attention to values out = torch.matmul(attention, v) # [batch_size, seq_len, dim] return out, attention class EnhancedCNN(nn.Module): """ Enhanced CNN model with residual connections and attention mechanisms for improved trading decision making """ def __init__(self, input_shape, n_actions, confidence_threshold=0.5): super(EnhancedCNN, self).__init__() # Store dimensions self.input_shape = input_shape self.n_actions = n_actions self.confidence_threshold = confidence_threshold # Calculate input dimensions if isinstance(input_shape, (list, tuple)): if len(input_shape) == 3: # [channels, height, width] self.channels, self.height, self.width = input_shape self.feature_dim = self.height * self.width elif len(input_shape) == 2: # [timeframes, features] self.channels = input_shape[0] self.features = input_shape[1] self.feature_dim = self.features * self.channels elif len(input_shape) == 1: # [features] self.channels = 1 self.features = input_shape[0] self.feature_dim = self.features else: raise ValueError(f"Unsupported input shape: {input_shape}") else: # single integer self.channels = 1 self.features = input_shape self.feature_dim = input_shape # Build network self._build_network() # Initialize device self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.to(self.device) logger.info(f"EnhancedCNN initialized with input shape: {input_shape}, actions: {n_actions}") def _build_network(self): """Build the MASSIVELY enhanced neural network for 4GB VRAM budget""" # MASSIVELY SCALED ARCHITECTURE for 4GB VRAM (up to ~50M parameters) if self.channels > 1: # Massive convolutional backbone with deeper residual blocks self.conv_layers = nn.Sequential( # Initial large conv block nn.Conv1d(self.channels, 256, kernel_size=7, padding=3), # Much wider initial layer nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.1), # First residual stage - 256 channels ResidualBlock(256, 512), ResidualBlock(512, 512), ResidualBlock(512, 512), nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.2), # Second residual stage - 512 channels ResidualBlock(512, 1024), ResidualBlock(1024, 1024), ResidualBlock(1024, 1024), nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.25), # Third residual stage - 1024 channels ResidualBlock(1024, 1536), ResidualBlock(1536, 1536), ResidualBlock(1536, 1536), nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.3), # Fourth residual stage - 1536 channels (MASSIVE) ResidualBlock(1536, 2048), ResidualBlock(2048, 2048), ResidualBlock(2048, 2048), nn.AdaptiveAvgPool1d(1) # Global average pooling ) # Massive feature dimension after conv layers self.conv_features = 2048 else: # For 1D vectors, use massive dense preprocessing self.conv_layers = None self.conv_features = 0 # MASSIVE fully connected feature extraction layers if self.conv_layers is None: # For 1D inputs - massive feature extraction self.fc1 = nn.Linear(self.feature_dim, 2048) self.features_dim = 2048 else: # For data processed by massive conv layers self.fc1 = nn.Linear(self.conv_features, 2048) self.features_dim = 2048 # MASSIVE common feature extraction with multiple attention layers self.fc_layers = nn.Sequential( self.fc1, nn.ReLU(), nn.Dropout(0.3), nn.Linear(2048, 2048), # Keep massive width nn.ReLU(), nn.Dropout(0.3), nn.Linear(2048, 1536), # Still very wide nn.ReLU(), nn.Dropout(0.3), nn.Linear(1536, 1024), # Large hidden layer nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 768), # Final feature representation nn.ReLU() ) # Multiple attention mechanisms for different aspects self.price_attention = SelfAttention(768) self.volume_attention = SelfAttention(768) self.trend_attention = SelfAttention(768) self.volatility_attention = SelfAttention(768) # Attention fusion layer self.attention_fusion = nn.Sequential( nn.Linear(768 * 4, 1024), # Combine all attention outputs nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 768) ) # MASSIVE dueling architecture with deeper networks self.advantage_stream = nn.Sequential( nn.Linear(768, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, self.n_actions) ) self.value_stream = nn.Sequential( nn.Linear(768, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 1) ) # MASSIVE extrema detection head with ensemble predictions self.extrema_head = nn.Sequential( nn.Linear(768, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 3) # 0=bottom, 1=top, 2=neither ) # MASSIVE multi-timeframe price prediction heads self.price_pred_immediate = nn.Sequential( nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 3) # Up, Down, Sideways ) self.price_pred_midterm = nn.Sequential( nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 3) # Up, Down, Sideways ) self.price_pred_longterm = nn.Sequential( nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 3) # Up, Down, Sideways ) # MASSIVE value prediction with ensemble approaches self.price_pred_value = nn.Sequential( nn.Linear(768, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 8) # More granular % change predictions for different timeframes ) # Additional specialized prediction heads for better accuracy # Volatility prediction head self.volatility_head = nn.Sequential( nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 5) # Very low, low, medium, high, very high volatility ) # Support/Resistance level detection head self.support_resistance_head = nn.Sequential( nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 6) # Strong support, weak support, neutral, weak resistance, strong resistance, breakout ) # Market regime classification head self.market_regime_head = nn.Sequential( nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 7) # Bull trend, bear trend, sideways, volatile up, volatile down, accumulation, distribution ) # Risk assessment head self.risk_head = nn.Sequential( nn.Linear(768, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 4) # Low risk, medium risk, high risk, extreme risk ) def _check_rebuild_network(self, features): """Check if network needs to be rebuilt for different feature dimensions""" if features != self.feature_dim: logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})") self.feature_dim = features self._build_network() # Move to device after rebuilding self.to(self.device) return True return False def forward(self, x): """Forward pass through the MASSIVE network""" batch_size = x.size(0) # Process different input shapes if len(x.shape) > 2: # Handle 3D input [batch, timeframes, features] if self.conv_layers is not None: # Reshape for 1D convolution: # [batch, timeframes, features] -> [batch, timeframes, features*1] if len(x.shape) == 3: x = x.permute(0, 1, 2) # Ensure shape is [batch, timeframes, features] x_reshaped = x.permute(0, 1, 2) # [batch, timeframes, features] # Check if the feature dimension has changed and rebuild if necessary if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim: total_features = x_reshaped.size(1) * x_reshaped.size(2) self._check_rebuild_network(total_features) # Apply massive convolutions x_conv = self.conv_layers(x_reshaped) # Flatten: [batch, channels, 1] -> [batch, channels] x_flat = x_conv.view(batch_size, -1) else: # If no conv layers, just flatten x_flat = x.view(batch_size, -1) else: # For 2D input [batch, features] x_flat = x # Check if dimensions have changed if x_flat.size(1) != self.feature_dim: self._check_rebuild_network(x_flat.size(1)) # Apply MASSIVE FC layers to get base features features = self.fc_layers(x_flat) # [batch, 768] # Apply multiple specialized attention mechanisms features_3d = features.unsqueeze(1) # [batch, 1, 768] # Get attention-refined features for different aspects price_features, _ = self.price_attention(features_3d) price_features = price_features.squeeze(1) # [batch, 768] volume_features, _ = self.volume_attention(features_3d) volume_features = volume_features.squeeze(1) # [batch, 768] trend_features, _ = self.trend_attention(features_3d) trend_features = trend_features.squeeze(1) # [batch, 768] volatility_features, _ = self.volatility_attention(features_3d) volatility_features = volatility_features.squeeze(1) # [batch, 768] # Fuse all attention outputs combined_attention = torch.cat([ price_features, volume_features, trend_features, volatility_features ], dim=1) # [batch, 768*4] # Apply attention fusion to get final refined features features_refined = self.attention_fusion(combined_attention) # [batch, 768] # Calculate advantage and value (Dueling DQN architecture) advantage = self.advantage_stream(features_refined) value = self.value_stream(features_refined) # Combine for Q-values (Dueling architecture) q_values = value + advantage - advantage.mean(dim=1, keepdim=True) # Get massive ensemble of predictions # Extrema predictions (bottom/top/neither detection) extrema_pred = self.extrema_head(features_refined) # Multi-timeframe price movement predictions price_immediate = self.price_pred_immediate(features_refined) price_midterm = self.price_pred_midterm(features_refined) price_longterm = self.price_pred_longterm(features_refined) price_values = self.price_pred_value(features_refined) # Additional specialized predictions for enhanced accuracy volatility_pred = self.volatility_head(features_refined) support_resistance_pred = self.support_resistance_head(features_refined) market_regime_pred = self.market_regime_head(features_refined) risk_pred = self.risk_head(features_refined) # Package all price predictions price_predictions = { 'immediate': price_immediate, 'midterm': price_midterm, 'longterm': price_longterm, 'values': price_values } # Package additional predictions for enhanced decision making advanced_predictions = { 'volatility': volatility_pred, 'support_resistance': support_resistance_pred, 'market_regime': market_regime_pred, 'risk_assessment': risk_pred } return q_values, extrema_pred, price_predictions, features_refined, advanced_predictions def act(self, state, explore=True): """Enhanced action selection with massive model predictions""" if explore and np.random.random() < 0.1: # 10% random exploration return np.random.choice(self.n_actions) self.eval() state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device) with torch.no_grad(): q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor) # Apply softmax to get action probabilities action_probs = torch.softmax(q_values, dim=1) action = torch.argmax(action_probs, dim=1).item() # Log advanced predictions for better decision making if hasattr(self, '_log_predictions') and self._log_predictions: # Log volatility prediction volatility = torch.softmax(advanced_predictions['volatility'], dim=1) volatility_class = torch.argmax(volatility, dim=1).item() volatility_labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High'] # Log support/resistance prediction sr = torch.softmax(advanced_predictions['support_resistance'], dim=1) sr_class = torch.argmax(sr, dim=1).item() sr_labels = ['Strong Support', 'Weak Support', 'Neutral', 'Weak Resistance', 'Strong Resistance', 'Breakout'] # Log market regime prediction regime = torch.softmax(advanced_predictions['market_regime'], dim=1) regime_class = torch.argmax(regime, dim=1).item() regime_labels = ['Bull Trend', 'Bear Trend', 'Sideways', 'Volatile Up', 'Volatile Down', 'Accumulation', 'Distribution'] # Log risk assessment risk = torch.softmax(advanced_predictions['risk_assessment'], dim=1) risk_class = torch.argmax(risk, dim=1).item() risk_labels = ['Low Risk', 'Medium Risk', 'High Risk', 'Extreme Risk'] logger.info(f"MASSIVE Model Predictions:") logger.info(f" Volatility: {volatility_labels[volatility_class]} ({volatility[0, volatility_class]:.3f})") logger.info(f" Support/Resistance: {sr_labels[sr_class]} ({sr[0, sr_class]:.3f})") logger.info(f" Market Regime: {regime_labels[regime_class]} ({regime[0, regime_class]:.3f})") logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[0, risk_class]:.3f})") return action def save(self, path): """Save model weights and architecture""" os.makedirs(os.path.dirname(path), exist_ok=True) torch.save({ 'state_dict': self.state_dict(), 'input_shape': self.input_shape, 'n_actions': self.n_actions, 'feature_dim': self.feature_dim, 'confidence_threshold': self.confidence_threshold }, f"{path}.pt") logger.info(f"Enhanced CNN model saved to {path}.pt") def load(self, path): """Load model weights and architecture""" try: checkpoint = torch.load(f"{path}.pt", map_location=self.device) self.input_shape = checkpoint['input_shape'] self.n_actions = checkpoint['n_actions'] self.feature_dim = checkpoint['feature_dim'] if 'confidence_threshold' in checkpoint: self.confidence_threshold = checkpoint['confidence_threshold'] self._build_network() self.load_state_dict(checkpoint['state_dict']) self.to(self.device) logger.info(f"Enhanced CNN model loaded from {path}.pt") return True except Exception as e: logger.error(f"Error loading model: {str(e)}") return False # Additional utility for example sifting class ExampleSiftingDataset: """ Dataset that selectively keeps high-quality examples for training to improve model performance """ def __init__(self, max_examples=50000): self.examples = [] self.labels = [] self.rewards = [] self.max_examples = max_examples self.min_reward_threshold = -0.05 # Minimum reward to keep an example def add_example(self, state, action, reward, next_state, done): """Add a new training example with reward-based filtering""" # Only keep examples with rewards above the threshold if reward > self.min_reward_threshold: self.examples.append((state, action, reward, next_state, done)) self.rewards.append(reward) # Sort by reward and keep only the top examples if len(self.examples) > self.max_examples: # Sort by reward (highest first) sorted_indices = np.argsort(self.rewards)[::-1] # Keep top examples self.examples = [self.examples[i] for i in sorted_indices[:self.max_examples]] self.rewards = [self.rewards[i] for i in sorted_indices[:self.max_examples]] # Update the minimum reward threshold to be the minimum in our kept examples self.min_reward_threshold = min(self.rewards) def get_batch(self, batch_size): """Get a batch of examples, prioritizing better examples""" if not self.examples: return None # Calculate selection probabilities based on rewards rewards = np.array(self.rewards) # Shift rewards to be positive for probability calculation min_reward = min(rewards) shifted_rewards = rewards - min_reward + 0.1 # Add small constant probs = shifted_rewards / shifted_rewards.sum() # Sample batch indices with reward-based probabilities indices = np.random.choice( len(self.examples), size=min(batch_size, len(self.examples)), p=probs, replace=False ) # Create batch batch = [self.examples[i] for i in indices] states, actions, rewards, next_states, dones = zip(*batch) return { 'states': np.array(states), 'actions': np.array(actions), 'rewards': np.array(rewards), 'next_states': np.array(next_states), 'dones': np.array(dones) } def __len__(self): return len(self.examples)