import torch import torch.nn as nn import torch.optim as optim import numpy as np import os import time import logging import torch.nn.functional as F from typing import List, Tuple, Dict, Any, Optional, Union from datetime import datetime # Configure logger logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class ResidualBlock(nn.Module): """ Residual block with pre-activation (BatchNorm -> ReLU -> Conv) """ def __init__(self, in_channels, out_channels, stride=1): super(ResidualBlock, self).__init__() self.bn1 = nn.BatchNorm1d(in_channels) self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm1d(out_channels) self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False) # Shortcut connection to match dimensions self.shortcut = nn.Sequential() if stride != 1 or in_channels != out_channels: self.shortcut = nn.Sequential( nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class SelfAttention(nn.Module): """ Self-attention mechanism for sequential data """ def __init__(self, dim): super(SelfAttention, self).__init__() self.query = nn.Linear(dim, dim) self.key = nn.Linear(dim, dim) self.value = nn.Linear(dim, dim) self.scale = torch.sqrt(torch.tensor(dim, dtype=torch.float32)) def forward(self, x): # x shape: [batch_size, seq_len, dim] batch_size, seq_len, dim = x.size() q = self.query(x) # [batch_size, seq_len, dim] k = self.key(x) # [batch_size, seq_len, dim] v = self.value(x) # [batch_size, seq_len, dim] # Calculate attention scores scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len] # Apply softmax to get attention weights attention = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len] # Apply attention to values out = torch.matmul(attention, v) # [batch_size, seq_len, dim] return out, attention class EnhancedCNN(nn.Module): """ Enhanced CNN model with residual connections and attention mechanisms for improved trading decision making """ def __init__(self, input_shape, n_actions, confidence_threshold=0.5): super(EnhancedCNN, self).__init__() # Store dimensions self.input_shape = input_shape self.n_actions = n_actions self.confidence_threshold = confidence_threshold # Training data storage self.training_data = [] # Calculate input dimensions if isinstance(input_shape, (list, tuple)): if len(input_shape) == 3: # [channels, height, width] self.channels, self.height, self.width = input_shape self.feature_dim = self.height * self.width elif len(input_shape) == 2: # [timeframes, features] self.channels = input_shape[0] self.features = input_shape[1] self.feature_dim = self.features * self.channels elif len(input_shape) == 1: # [features] self.channels = 1 self.features = input_shape[0] self.feature_dim = self.features else: raise ValueError(f"Unsupported input shape: {input_shape}") else: # single integer self.channels = 1 self.features = input_shape self.feature_dim = input_shape # Build network self._build_network() # Initialize device self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.to(self.device) logger.info(f"EnhancedCNN initialized with input shape: {input_shape}, actions: {n_actions}") def _build_network(self): """Build the ULTRA MASSIVE enhanced neural network for maximum learning capacity""" # ULTRA MASSIVE SCALED ARCHITECTURE for maximum learning (up to ~100M parameters) if self.channels > 1: # Ultra massive convolutional backbone with much deeper residual blocks self.conv_layers = nn.Sequential( # Initial ultra large conv block nn.Conv1d(self.channels, 1024, kernel_size=7, padding=3), # Ultra wide initial layer (increased from 512) nn.BatchNorm1d(1024), nn.ReLU(), nn.Dropout(0.1), # First residual stage - 1024 channels (increased from 512) ResidualBlock(1024, 1536), # Increased from 768 ResidualBlock(1536, 1536), ResidualBlock(1536, 1536), ResidualBlock(1536, 1536), # Additional layer nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.2), # Second residual stage - 1536 to 2048 channels (increased from 768 to 1024) ResidualBlock(1536, 2048), ResidualBlock(2048, 2048), ResidualBlock(2048, 2048), ResidualBlock(2048, 2048), # Additional layer nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.25), # Third residual stage - 2048 to 3072 channels (increased from 1024 to 1536) ResidualBlock(2048, 3072), ResidualBlock(3072, 3072), ResidualBlock(3072, 3072), ResidualBlock(3072, 3072), # Additional layer nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.3), # Fourth residual stage - 3072 to 4096 channels (increased from 1536 to 2048) ResidualBlock(3072, 4096), ResidualBlock(4096, 4096), ResidualBlock(4096, 4096), ResidualBlock(4096, 4096), # Additional layer nn.MaxPool1d(kernel_size=2, stride=2), nn.Dropout(0.3), # Fifth residual stage - ULTRA MASSIVE 4096 to 6144 channels (increased from 2048 to 3072) ResidualBlock(4096, 6144), ResidualBlock(6144, 6144), ResidualBlock(6144, 6144), ResidualBlock(6144, 6144), nn.AdaptiveAvgPool1d(1) # Global average pooling ) # Ultra massive feature dimension after conv layers self.conv_features = 6144 # Increased from 3072 else: # For 1D vectors, use ultra massive dense preprocessing self.conv_layers = None self.conv_features = 0 # ULTRA MASSIVE fully connected feature extraction layers if self.conv_layers is None: # For 1D inputs - ultra massive feature extraction self.fc1 = nn.Linear(self.feature_dim, 6144) # Increased from 3072 self.features_dim = 6144 # Increased from 3072 else: # For data processed by ultra massive conv layers self.fc1 = nn.Linear(self.conv_features, 6144) # Increased from 3072 self.features_dim = 6144 # Increased from 3072 # ULTRA MASSIVE common feature extraction with multiple deep layers self.fc_layers = nn.Sequential( self.fc1, nn.ReLU(), nn.Dropout(0.3), nn.Linear(6144, 6144), # Keep ultra massive width (increased from 3072) nn.ReLU(), nn.Dropout(0.3), nn.Linear(6144, 4096), # Ultra wide hidden layer (increased from 2560) nn.ReLU(), nn.Dropout(0.3), nn.Linear(4096, 3072), # Still very wide (increased from 2048) nn.ReLU(), nn.Dropout(0.3), nn.Linear(3072, 2048), # Large hidden layer (increased from 1536) nn.ReLU(), nn.Dropout(0.3), nn.Linear(2048, 1024), # Final feature representation (increased from 1024, but keeping the same value to align with attention layers) nn.ReLU() ) # Multiple specialized attention mechanisms (larger capacity) self.price_attention = SelfAttention(1024) # Keeping 1024 self.volume_attention = SelfAttention(1024) self.trend_attention = SelfAttention(1024) self.volatility_attention = SelfAttention(1024) self.momentum_attention = SelfAttention(1024) # Additional attention self.microstructure_attention = SelfAttention(1024) # Additional attention # Ultra massive attention fusion layer self.attention_fusion = nn.Sequential( nn.Linear(1024 * 6, 4096), # Combine all 6 attention outputs (increased from 2048) nn.ReLU(), nn.Dropout(0.3), nn.Linear(4096, 3072), # Increased from 1536 nn.ReLU(), nn.Dropout(0.3), nn.Linear(3072, 1024) # Keeping 1024 ) # ULTRA MASSIVE dueling architecture with much deeper networks self.advantage_stream = nn.Sequential( nn.Linear(1024, 1536), # Increased from 768 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1536, 1024), # Increased from 512 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 512), # Increased from 256 nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), # Increased from 128 nn.ReLU(), nn.Linear(256, self.n_actions) ) self.value_stream = nn.Sequential( nn.Linear(1024, 1536), # Increased from 768 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1536, 1024), # Increased from 512 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 512), # Increased from 256 nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), # Increased from 128 nn.ReLU(), nn.Linear(256, 1) ) # ULTRA MASSIVE extrema detection head with deeper ensemble predictions self.extrema_head = nn.Sequential( nn.Linear(1024, 1536), # Increased from 768 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1536, 1024), # Increased from 512 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 512), # Increased from 256 nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), # Increased from 128 nn.ReLU(), nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither ) # ULTRA MASSIVE price direction prediction head # Outputs single direction and confidence values self.price_direction_head = nn.Sequential( nn.Linear(1024, 1024), # Increased from 512 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 512), # Increased from 256 nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), # Increased from 128 nn.ReLU(), nn.Linear(256, 2) # [direction, confidence] ) # MULTI-TIMEFRAME PRICE VECTOR PREDICTION HEADS # Short-term: 1-5 minutes prediction self.short_term_vector_head = nn.Sequential( nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.2), nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 4) # [direction, confidence, magnitude, volatility_risk] ) # Mid-term: 5-30 minutes prediction self.mid_term_vector_head = nn.Sequential( nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.2), nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 4) # [direction, confidence, magnitude, volatility_risk] ) # Long-term: 30-120 minutes prediction self.long_term_vector_head = nn.Sequential( nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.2), nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 4) # [direction, confidence, magnitude, volatility_risk] ) # Direction activation (tanh for -1 to 1) self.direction_activation = nn.Tanh() # Confidence activation (sigmoid for 0 to 1) self.confidence_activation = nn.Sigmoid() # Magnitude activation (sigmoid for 0 to 1, will be scaled) self.magnitude_activation = nn.Sigmoid() # Volatility risk activation (sigmoid for 0 to 1) self.volatility_activation = nn.Sigmoid() # INFERENCE RECORD STORAGE for long-term training self.inference_records = [] self.max_inference_records = 50 self.training_loss_history = [] # ULTRA MASSIVE value prediction with ensemble approaches self.price_pred_value = nn.Sequential( nn.Linear(1024, 1536), # Increased from 768 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1536, 1024), # Increased from 512 nn.ReLU(), nn.Dropout(0.3), nn.Linear(1024, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 8) # More granular % change predictions for different timeframes ) # Additional specialized prediction heads for better accuracy # Volatility prediction head self.volatility_head = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 5) # Very low, low, medium, high, very high volatility ) # Support/Resistance level detection head self.support_resistance_head = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 6) # Strong support, weak support, neutral, weak resistance, strong resistance, breakout ) # Market regime classification head self.market_regime_head = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 7) # Bull trend, bear trend, sideways, volatile up, volatile down, accumulation, distribution ) # Risk assessment head self.risk_head = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, 4) # Low risk, medium risk, high risk, extreme risk ) def _memory_barrier(self, tensor: torch.Tensor) -> torch.Tensor: """Create a memory barrier to prevent in-place operation issues""" return tensor.detach().clone().requires_grad_(tensor.requires_grad) def _check_rebuild_network(self, features): """DEPRECATED: Network should have fixed architecture - no runtime rebuilding""" if features != self.feature_dim: logger.error(f"CRITICAL: Input feature dimension mismatch! Expected {self.feature_dim}, got {features}") logger.error("This indicates a bug in data preprocessing - input should be fixed size!") logger.error("Network architecture should NOT change at runtime!") raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {features}") return False def forward(self, x): """Forward pass through the ULTRA MASSIVE network""" batch_size = x.size(0) # Validate input dimensions to prevent zero-element tensor issues if x.numel() == 0: logger.error(f"Forward pass received empty tensor with shape {x.shape}") # Return default outputs for all 5 expected values to prevent crash default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device) default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither default_price_pred = torch.zeros(batch_size, 1, device=x.device) default_features = torch.zeros(batch_size, 1024, device=x.device) default_advanced = torch.zeros(batch_size, 1, device=x.device) return default_q_values, default_extrema, default_price_pred, default_features, default_advanced # Check for zero feature dimensions if len(x.shape) > 1 and any(dim == 0 for dim in x.shape[1:]): logger.error(f"Forward pass received tensor with zero feature dimensions: {x.shape}") # Return default outputs for all 5 expected values to prevent crash default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device) default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither default_price_pred = torch.zeros(batch_size, 1, device=x.device) default_features = torch.zeros(batch_size, 1024, device=x.device) default_advanced = torch.zeros(batch_size, 1, device=x.device) return default_q_values, default_extrema, default_price_pred, default_features, default_advanced # Process different input shapes if len(x.shape) > 2: # Handle 4D input [batch, timeframes, window, features] or 3D input [batch, timeframes, features] if len(x.shape) == 4: # Flatten window and features: [batch, timeframes, window*features] x = x.reshape(batch_size, x.size(1), -1) if self.conv_layers is not None: # Now x is 3D: [batch, timeframes, features] x_reshaped = x # Validate input dimensions (should be fixed) total_features = x_reshaped.size(1) * x_reshaped.size(2) if total_features != self.feature_dim: logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}") raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}") # Apply ultra massive convolutions x_conv = self.conv_layers(x_reshaped) # Flatten: [batch, channels, 1] -> [batch, channels] x_flat = x_conv.reshape(batch_size, -1) else: # If no conv layers, just flatten x_flat = x.reshape(batch_size, -1) else: # For 2D input [batch, features] x_flat = x # Validate input dimensions (should be fixed) if x_flat.size(1) != self.feature_dim: logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}") raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}") # Apply ULTRA MASSIVE FC layers to get base features features = self.fc_layers(x_flat) # [batch, 1024] # Apply multiple specialized attention mechanisms features_3d = features.unsqueeze(1) # [batch, 1, 1024] # Get attention-refined features for different aspects price_features, _ = self.price_attention(features_3d) price_features = price_features.squeeze(1) # [batch, 1024] volume_features, _ = self.volume_attention(features_3d) volume_features = volume_features.squeeze(1) # [batch, 1024] trend_features, _ = self.trend_attention(features_3d) trend_features = trend_features.squeeze(1) # [batch, 1024] volatility_features, _ = self.volatility_attention(features_3d) volatility_features = volatility_features.squeeze(1) # [batch, 1024] momentum_features, _ = self.momentum_attention(features_3d) momentum_features = momentum_features.squeeze(1) # [batch, 1024] microstructure_features, _ = self.microstructure_attention(features_3d) microstructure_features = microstructure_features.squeeze(1) # [batch, 1024] # Fuse all attention outputs combined_attention = torch.cat([ price_features, volume_features, trend_features, volatility_features, momentum_features, microstructure_features ], dim=1) # [batch, 1024*6] # Apply attention fusion to get final refined features features_refined = self.attention_fusion(combined_attention) # [batch, 1024] # Calculate advantage and value (Dueling DQN architecture) advantage = self.advantage_stream(features_refined) value = self.value_stream(features_refined) # Combine for Q-values (Dueling architecture) q_values = value + advantage - advantage.mean(dim=1, keepdim=True) # Get ultra massive ensemble of predictions # Extrema predictions (bottom/top/neither detection) extrema_pred = self.extrema_head(features_refined) # Price direction predictions price_direction_raw = self.price_direction_head(features_refined) # Apply separate activations to direction and confidence direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1 confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1 price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2] # MULTI-TIMEFRAME PRICE VECTOR PREDICTIONS short_term_vector_pred = self.short_term_vector_head(features_refined) mid_term_vector_pred = self.mid_term_vector_head(features_refined) long_term_vector_pred = self.long_term_vector_head(features_refined) # Apply separate activations to direction, confidence, magnitude, volatility_risk short_term_direction = self.direction_activation(short_term_vector_pred[:, 0:1]) short_term_confidence = self.confidence_activation(short_term_vector_pred[:, 1:2]) short_term_magnitude = self.magnitude_activation(short_term_vector_pred[:, 2:3]) short_term_volatility_risk = self.volatility_activation(short_term_vector_pred[:, 3:4]) mid_term_direction = self.direction_activation(mid_term_vector_pred[:, 0:1]) mid_term_confidence = self.confidence_activation(mid_term_vector_pred[:, 1:2]) mid_term_magnitude = self.magnitude_activation(mid_term_vector_pred[:, 2:3]) mid_term_volatility_risk = self.volatility_activation(mid_term_vector_pred[:, 3:4]) long_term_direction = self.direction_activation(long_term_vector_pred[:, 0:1]) long_term_confidence = self.confidence_activation(long_term_vector_pred[:, 1:2]) long_term_magnitude = self.magnitude_activation(long_term_vector_pred[:, 2:3]) long_term_volatility_risk = self.volatility_activation(long_term_vector_pred[:, 3:4]) # Package multi-timeframe predictions into a single tensor multi_timeframe_predictions = torch.cat([ short_term_direction, short_term_confidence, short_term_magnitude, short_term_volatility_risk, mid_term_direction, mid_term_confidence, mid_term_magnitude, mid_term_volatility_risk, long_term_direction, long_term_confidence, long_term_magnitude, long_term_volatility_risk ], dim=1) # [batch, 4*3] price_values = self.price_pred_value(features_refined) # Additional specialized predictions for enhanced accuracy volatility_pred = self.volatility_head(features_refined) support_resistance_pred = self.support_resistance_head(features_refined) market_regime_pred = self.market_regime_head(features_refined) risk_pred = self.risk_head(features_refined) # Use the price direction prediction directly (already [batch, 2]) price_direction_tensor = price_direction_pred # Package additional predictions into a single tensor (use volatility as primary) # For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor advanced_pred_tensor = volatility_pred return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor, multi_timeframe_predictions def act(self, state, explore=True) -> Tuple[int, float, List[float]]: """Enhanced action selection with ultra massive model predictions""" self.eval() # Accept both NumPy arrays and already-built torch tensors if isinstance(state, torch.Tensor): state_tensor = state.detach().to(self.device) if state_tensor.dim() == 1: state_tensor = state_tensor.unsqueeze(0) else: # Convert to tensor **directly on the target device** to avoid intermediate CPU copies state_tensor = torch.as_tensor(state, dtype=torch.float32, device=self.device) if state_tensor.dim() == 1: state_tensor = state_tensor.unsqueeze(0) with torch.no_grad(): q_values, extrema_pred, price_direction_predictions, features, advanced_predictions, multi_timeframe_predictions = self(state_tensor) # Process price direction predictions if price_direction_predictions is not None: self.process_price_direction_predictions(price_direction_predictions) # Apply softmax to get action probabilities action_probs_tensor = torch.softmax(q_values, dim=1) action_idx = int(torch.argmax(action_probs_tensor, dim=1).item()) confidence = float(action_probs_tensor[0, action_idx].item()) # Confidence of the chosen action action_probs = action_probs_tensor.squeeze(0).tolist() # Convert to list of floats for return # Log advanced predictions for better decision making if hasattr(self, '_log_predictions') and self._log_predictions: # Log volatility prediction volatility = torch.softmax(advanced_predictions['volatility'], dim=1).squeeze(0) volatility_class = int(torch.argmax(volatility).item()) volatility_labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High'] # Log support/resistance prediction sr = torch.softmax(advanced_predictions['support_resistance'], dim=1).squeeze(0) sr_class = int(torch.argmax(sr).item()) sr_labels = ['Strong Support', 'Weak Support', 'Neutral', 'Weak Resistance', 'Strong Resistance', 'Breakout'] # Log market regime prediction regime = torch.softmax(advanced_predictions['market_regime'], dim=1).squeeze(0) regime_class = int(torch.argmax(regime).item()) regime_labels = ['Bull Trend', 'Bear Trend', 'Sideways', 'Volatile Up', 'Volatile Down', 'Accumulation', 'Distribution'] # Log risk assessment risk = torch.softmax(advanced_predictions['risk_assessment'], dim=1).squeeze(0) risk_class = int(torch.argmax(risk).item()) risk_labels = ['Low Risk', 'Medium Risk', 'High Risk', 'Extreme Risk'] logger.info(f"ULTRA MASSIVE Model Predictions:") logger.info(f" Volatility: {volatility_labels[volatility_class]} ({volatility[volatility_class]:.3f})") logger.info(f" Support/Resistance: {sr_labels[sr_class]} ({sr[sr_class]:.3f})") logger.info(f" Market Regime: {regime_labels[regime_class]} ({regime[regime_class]:.3f})") logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})") return action_idx, confidence, action_probs def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]: """ Process price direction predictions and convert to standardized format Args: price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence] Returns: Dict with direction (-1 to 1) and confidence (0 to 1) """ try: if price_direction_pred is None or price_direction_pred.numel() == 0: return {} # Extract direction and confidence values direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1 confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1 processed_directions = { 'direction': direction_value, 'confidence': confidence_value } # Store for later access self.last_price_direction = processed_directions return processed_directions except Exception as e: logger.error(f"Error processing price direction predictions: {e}") return {} def get_price_direction_vector(self) -> Dict[str, float]: """ Get the current price direction and confidence Returns: Dict with direction (-1 to 1) and confidence (0 to 1) """ return getattr(self, 'last_price_direction', {}) def get_price_direction_summary(self) -> Dict[str, Any]: """ Get a summary of price direction prediction Returns: Dict containing direction and confidence information """ try: last_direction = getattr(self, 'last_price_direction', {}) if not last_direction: return { 'direction_value': 0.0, 'confidence_value': 0.0, 'direction_label': "SIDEWAYS", 'discrete_direction': 0, 'strength': 0.0, 'weighted_strength': 0.0 } direction_value = last_direction['direction'] confidence_value = last_direction['confidence'] # Convert to discrete direction if direction_value > 0.1: direction_label = "UP" discrete_direction = 1 elif direction_value < -0.1: direction_label = "DOWN" discrete_direction = -1 else: direction_label = "SIDEWAYS" discrete_direction = 0 return { 'direction_value': float(direction_value), 'confidence_value': float(confidence_value), 'direction_label': direction_label, 'discrete_direction': discrete_direction, 'strength': abs(float(direction_value)), 'weighted_strength': abs(float(direction_value)) * float(confidence_value) } except Exception as e: logger.error(f"Error calculating price direction summary: {e}") return { 'direction_value': 0.0, 'confidence_value': 0.0, 'direction_label': "SIDEWAYS", 'discrete_direction': 0, 'strength': 0.0, 'weighted_strength': 0.0 } def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False): """ Add training data to the model's training buffer with position-based reward enhancement Args: state: Input state action: Action taken reward: Base reward received position_pnl: Current position P&L (0.0 if no position) has_position: Whether we currently have an open position """ try: # Enhance reward based on position status enhanced_reward = self._calculate_position_enhanced_reward( reward, action, position_pnl, has_position ) self.training_data.append({ 'state': state, 'action': action, 'reward': enhanced_reward, 'base_reward': reward, # Keep original reward for analysis 'position_pnl': position_pnl, 'has_position': has_position, 'timestamp': time.time() }) # Keep only the last 1000 training samples if len(self.training_data) > 1000: self.training_data = self.training_data[-1000:] except Exception as e: logger.error(f"Error adding training data: {e}") def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position): """ Calculate position-enhanced reward to incentivize profitable trades and closing losing ones Args: base_reward: Original reward from price prediction accuracy action: Action taken ('BUY', 'SELL', 'HOLD') position_pnl: Current position P&L has_position: Whether we have an open position Returns: Enhanced reward that incentivizes profitable behavior """ try: enhanced_reward = base_reward if has_position and position_pnl != 0.0: # Position-based reward adjustments pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale if position_pnl > 0: # Profitable position if action == "HOLD": # Reward holding profitable positions (let winners run) enhanced_reward += abs(pnl_factor) * 0.5 elif action in ["BUY", "SELL"]: # Moderate reward for taking action on profitable positions enhanced_reward += abs(pnl_factor) * 0.3 elif position_pnl < 0: # Losing position if action == "HOLD": # Penalty for holding losing positions (cut losses) enhanced_reward -= abs(pnl_factor) * 0.8 elif action in ["BUY", "SELL"]: # Reward for taking action to close losing positions enhanced_reward += abs(pnl_factor) * 0.6 # Ensure reward doesn't become extreme enhanced_reward = max(-5.0, min(5.0, enhanced_reward)) return enhanced_reward except Exception as e: logger.error(f"Error calculating position-enhanced reward: {e}") return base_reward def save(self, path): """Save model weights and architecture""" os.makedirs(os.path.dirname(path), exist_ok=True) torch.save({ 'state_dict': self.state_dict(), 'input_shape': self.input_shape, 'n_actions': self.n_actions, 'feature_dim': self.feature_dim, 'confidence_threshold': self.confidence_threshold }, f"{path}.pt") logger.info(f"Enhanced CNN model saved to {path}.pt") def load(self, path): """Load model weights and architecture""" try: checkpoint = torch.load(f"{path}.pt", map_location=self.device) self.input_shape = checkpoint['input_shape'] self.n_actions = checkpoint['n_actions'] self.feature_dim = checkpoint['feature_dim'] if 'confidence_threshold' in checkpoint: self.confidence_threshold = checkpoint['confidence_threshold'] self._build_network() self.load_state_dict(checkpoint['state_dict']) self.to(self.device) logger.info(f"Enhanced CNN model loaded from {path}.pt") return True except Exception as e: logger.error(f"Error loading model: {str(e)}") return False def store_inference_record(self, input_data, prediction_output, metadata=None): """Store inference record for long-term training""" try: record = { 'timestamp': datetime.now(), 'input_data': input_data.clone().detach() if isinstance(input_data, torch.Tensor) else input_data, 'prediction_output': { 'q_values': prediction_output[0].clone().detach() if prediction_output[0] is not None else None, 'extrema_pred': prediction_output[1].clone().detach() if prediction_output[1] is not None else None, 'price_direction': prediction_output[2].clone().detach() if prediction_output[2] is not None else None, 'multi_timeframe': prediction_output[5].clone().detach() if len(prediction_output) > 5 and prediction_output[5] is not None else None }, 'metadata': metadata or {} } self.inference_records.append(record) # Keep only the last max_inference_records if len(self.inference_records) > self.max_inference_records: self.inference_records = self.inference_records[-self.max_inference_records:] logger.debug(f"CNN: Stored inference record. Total records: {len(self.inference_records)}") except Exception as e: logger.error(f"Error storing CNN inference record: {e}") def calculate_price_vector_loss(self, predicted_vectors, actual_price_changes, time_diffs): """ Calculate price vector loss for multi-timeframe predictions Args: predicted_vectors: Dict with 'short_term', 'mid_term', 'long_term' predictions actual_price_changes: Dict with corresponding actual price changes time_diffs: Dict with time differences for each timeframe Returns: Total loss tensor for backpropagation """ try: total_loss = 0.0 loss_count = 0 timeframes = ['short_term', 'mid_term', 'long_term'] weights = [1.0, 0.8, 0.6] # Weight short-term predictions higher for timeframe, weight in zip(timeframes, weights): if timeframe in predicted_vectors and timeframe in actual_price_changes: pred_vector = predicted_vectors[timeframe] actual_change = actual_price_changes[timeframe] time_diff = time_diffs.get(timeframe, 1.0) # Extract prediction components [direction, confidence, magnitude, volatility_risk] pred_direction = pred_vector[0].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[0] pred_confidence = pred_vector[1].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[1] pred_magnitude = pred_vector[2].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[2] pred_volatility = pred_vector[3].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[3] # Calculate actual metrics actual_direction = 1.0 if actual_change > 0.05 else -1.0 if actual_change < -0.05 else 0.0 actual_magnitude = min(abs(actual_change) / 5.0, 1.0) # Normalize to 0-1, cap at 5% # Direction loss (most important) if actual_direction != 0.0: direction_error = abs(pred_direction - actual_direction) else: direction_error = abs(pred_direction) * 0.5 # Penalty for predicting movement when there's none # Magnitude loss magnitude_error = abs(pred_magnitude - actual_magnitude) # Confidence calibration loss (confidence should match accuracy) direction_accuracy = 1.0 - (direction_error / 2.0) # 0 to 1 confidence_error = abs(pred_confidence - direction_accuracy) # Time decay factor time_decay = max(0.1, 1.0 - (time_diff / 60.0)) # Decay over 1 hour # Combined loss for this timeframe timeframe_loss = ( direction_error * 2.0 + # Direction is most important magnitude_error * 1.5 + # Magnitude is important confidence_error * 1.0 # Confidence calibration ) * time_decay * weight total_loss += timeframe_loss loss_count += 1 logger.debug(f"CNN {timeframe.upper()} VECTOR LOSS: " f"dir_err={direction_error:.3f}, mag_err={magnitude_error:.3f}, " f"conf_err={confidence_error:.3f}, total={timeframe_loss:.3f}") if loss_count > 0: avg_loss = total_loss / loss_count return torch.tensor(avg_loss, dtype=torch.float32, device=self.device, requires_grad=True) else: return torch.tensor(0.0, dtype=torch.float32, device=self.device, requires_grad=True) except Exception as e: logger.error(f"Error calculating CNN price vector loss: {e}") return torch.tensor(0.0, dtype=torch.float32, device=self.device, requires_grad=True) def train_on_stored_records(self, optimizer, min_records=10): """ Train on stored inference records for long-term price vector prediction Args: optimizer: PyTorch optimizer min_records: Minimum number of records needed for training Returns: Average training loss """ try: if len(self.inference_records) < min_records: logger.debug(f"CNN: Not enough records for long-term training ({len(self.inference_records)} < {min_records})") return 0.0 self.train() total_loss = 0.0 trained_count = 0 # Process records in batches batch_size = min(8, len(self.inference_records)) for i in range(0, len(self.inference_records), batch_size): batch_records = self.inference_records[i:i+batch_size] batch_inputs = [] batch_targets = [] for record in batch_records: # Check if we have actual price movement data for this record if 'actual_price_changes' in record['metadata'] and 'time_diffs' in record['metadata']: batch_inputs.append(record['input_data']) batch_targets.append({ 'actual_price_changes': record['metadata']['actual_price_changes'], 'time_diffs': record['metadata']['time_diffs'] }) if not batch_inputs: continue # Stack inputs into batch tensor if isinstance(batch_inputs[0], torch.Tensor): batch_input_tensor = torch.stack(batch_inputs).to(self.device) else: batch_input_tensor = torch.tensor(batch_inputs, dtype=torch.float32, device=self.device) optimizer.zero_grad() # Forward pass q_values, extrema_pred, price_direction_pred, features, advanced_pred, multi_timeframe_pred = self(batch_input_tensor) # Calculate price vector losses for the batch batch_loss = 0.0 for j, target in enumerate(batch_targets): # Extract multi-timeframe predictions for this sample sample_multi_pred = multi_timeframe_pred[j] if multi_timeframe_pred is not None else None if sample_multi_pred is not None: predicted_vectors = { 'short_term': sample_multi_pred[0:4], # [direction, confidence, magnitude, volatility] 'mid_term': sample_multi_pred[4:8], # [direction, confidence, magnitude, volatility] 'long_term': sample_multi_pred[8:12] # [direction, confidence, magnitude, volatility] } sample_loss = self.calculate_price_vector_loss( predicted_vectors, target['actual_price_changes'], target['time_diffs'] ) batch_loss += sample_loss if batch_loss > 0: avg_batch_loss = batch_loss / len(batch_targets) avg_batch_loss.backward() # Gradient clipping torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0) optimizer.step() total_loss += avg_batch_loss.item() trained_count += 1 avg_loss = total_loss / max(trained_count, 1) self.training_loss_history.append(avg_loss) # Keep only last 100 loss values if len(self.training_loss_history) > 100: self.training_loss_history = self.training_loss_history[-100:] logger.info(f"CNN: Trained on {trained_count} batches from {len(self.inference_records)} stored records. Avg loss: {avg_loss:.4f}") return avg_loss except Exception as e: logger.error(f"Error training CNN on stored records: {e}") return 0.0 def process_price_direction_predictions(self, price_direction_tensor): """ Process price direction predictions into a standardized format Compatible with orchestrator's price vector system Args: price_direction_tensor: Tensor with [direction, confidence] or multi-timeframe predictions Returns: Dict with direction and confidence for compatibility """ try: if price_direction_tensor is None: return None if isinstance(price_direction_tensor, torch.Tensor): if price_direction_tensor.dim() > 1: price_direction_tensor = price_direction_tensor.squeeze(0) # Extract short-term prediction (most immediate) for compatibility direction = float(price_direction_tensor[0].item()) confidence = float(price_direction_tensor[1].item()) return { 'direction': direction, 'confidence': confidence } return None except Exception as e: logger.debug(f"Error processing CNN price direction predictions: {e}") return None def get_multi_timeframe_predictions(self, multi_timeframe_tensor): """ Extract multi-timeframe price vector predictions Args: multi_timeframe_tensor: Tensor with all timeframe predictions Returns: Dict with short_term, mid_term, long_term predictions """ try: if multi_timeframe_tensor is None: return {} if isinstance(multi_timeframe_tensor, torch.Tensor): if multi_timeframe_tensor.dim() > 1: multi_timeframe_tensor = multi_timeframe_tensor.squeeze(0) predictions = { 'short_term': { 'direction': float(multi_timeframe_tensor[0].item()), 'confidence': float(multi_timeframe_tensor[1].item()), 'magnitude': float(multi_timeframe_tensor[2].item()), 'volatility_risk': float(multi_timeframe_tensor[3].item()) }, 'mid_term': { 'direction': float(multi_timeframe_tensor[4].item()), 'confidence': float(multi_timeframe_tensor[5].item()), 'magnitude': float(multi_timeframe_tensor[6].item()), 'volatility_risk': float(multi_timeframe_tensor[7].item()) }, 'long_term': { 'direction': float(multi_timeframe_tensor[8].item()), 'confidence': float(multi_timeframe_tensor[9].item()), 'magnitude': float(multi_timeframe_tensor[10].item()), 'volatility_risk': float(multi_timeframe_tensor[11].item()) } } return predictions return {} except Exception as e: logger.debug(f"Error extracting multi-timeframe predictions: {e}") return {} # Additional utility for example sifting class ExampleSiftingDataset: """ Dataset that selectively keeps high-quality examples for training to improve model performance """ def __init__(self, max_examples=50000): self.examples = [] self.labels = [] self.rewards = [] self.max_examples = max_examples self.min_reward_threshold = -0.05 # Minimum reward to keep an example def add_example(self, state, action, reward, next_state, done): """Add a new training example with reward-based filtering""" # Only keep examples with rewards above the threshold if reward > self.min_reward_threshold: self.examples.append((state, action, reward, next_state, done)) self.rewards.append(reward) # Sort by reward and keep only the top examples if len(self.examples) > self.max_examples: # Sort by reward (highest first) sorted_indices = np.argsort(self.rewards)[::-1] # Keep top examples self.examples = [self.examples[i] for i in sorted_indices[:self.max_examples]] self.rewards = [self.rewards[i] for i in sorted_indices[:self.max_examples]] # Update the minimum reward threshold to be the minimum in our kept examples self.min_reward_threshold = min(self.rewards) def get_batch(self, batch_size): """Get a batch of examples, prioritizing better examples""" if not self.examples: return None # Calculate selection probabilities based on rewards rewards = np.array(self.rewards) # Shift rewards to be positive for probability calculation min_reward = min(rewards) shifted_rewards = rewards - min_reward + 0.1 # Add small constant probs = shifted_rewards / shifted_rewards.sum() # Sample batch indices with reward-based probabilities indices = np.random.choice( len(self.examples), size=min(batch_size, len(self.examples)), p=probs, replace=False ) # Create batch batch = [self.examples[i] for i in indices] states, actions, rewards, next_states, dones = zip(*batch) return { 'states': np.array(states), 'actions': np.array(actions), 'rewards': np.array(rewards), 'next_states': np.array(next_states), 'dones': np.array(dones) } def __len__(self): return len(self.examples)