gogo2/NN/models/enhanced_cnn.py

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import time
import logging
import torch.nn.functional as F
from typing import List, Tuple, Dict, Any, Optional, Union
from datetime import datetime

# Configure logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class ResidualBlock(nn.Module):
    """
    Residual block with pre-activation (BatchNorm -> ReLU -> Conv)
    """
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.bn1 = nn.BatchNorm1d(in_channels)
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)

        # Shortcut connection to match dimensions
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out)
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out += shortcut
        return out

class SelfAttention(nn.Module):
    """
    Self-attention mechanism for sequential data
    """
    def __init__(self, dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(dim, dim)
        self.key = nn.Linear(dim, dim)
        self.value = nn.Linear(dim, dim)
        self.scale = torch.sqrt(torch.tensor(dim, dtype=torch.float32))

    def forward(self, x):
        # x shape: [batch_size, seq_len, dim]
        batch_size, seq_len, dim = x.size()

        q = self.query(x)  # [batch_size, seq_len, dim]
        k = self.key(x)    # [batch_size, seq_len, dim]
        v = self.value(x)  # [batch_size, seq_len, dim]

        # Calculate attention scores
        scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale  # [batch_size, seq_len, seq_len]

        # Apply softmax to get attention weights
        attention = F.softmax(scores, dim=-1)  # [batch_size, seq_len, seq_len]

        # Apply attention to values
        out = torch.matmul(attention, v)  # [batch_size, seq_len, dim]

        return out, attention

class EnhancedCNN(nn.Module):
    """
    Enhanced CNN model with residual connections and attention mechanisms
    for improved trading decision making
    """
    def __init__(self, input_shape, n_actions, confidence_threshold=0.5):
        super(EnhancedCNN, self).__init__()

        # Store dimensions
        self.input_shape = input_shape
        self.n_actions = n_actions
        self.confidence_threshold = confidence_threshold

        # Training data storage
        self.training_data = []

        # Calculate input dimensions
        if isinstance(input_shape, (list, tuple)):
            if len(input_shape) == 3:  # [channels, height, width]
                self.channels, self.height, self.width = input_shape
                self.feature_dim = self.height * self.width
            elif len(input_shape) == 2:  # [timeframes, features]
                self.channels = input_shape[0]
                self.features = input_shape[1]
                self.feature_dim = self.features * self.channels
            elif len(input_shape) == 1:  # [features]
                self.channels = 1
                self.features = input_shape[0]
                self.feature_dim = self.features
            else:
                raise ValueError(f"Unsupported input shape: {input_shape}")
        else:  # single integer
            self.channels = 1
            self.features = input_shape
            self.feature_dim = input_shape

        # Build network
        self._build_network()

        # Initialize device
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

        logger.info(f"EnhancedCNN initialized with input shape: {input_shape}, actions: {n_actions}")

    def _build_network(self):
        """Build the ULTRA MASSIVE enhanced neural network for maximum learning capacity"""

        # ULTRA MASSIVE SCALED ARCHITECTURE for maximum learning (up to ~100M parameters)
        if self.channels > 1:
            # Ultra massive convolutional backbone with much deeper residual blocks
            self.conv_layers = nn.Sequential(
                # Initial ultra large conv block
                nn.Conv1d(self.channels, 1024, kernel_size=7, padding=3),  # Ultra wide initial layer (increased from 512)
                nn.BatchNorm1d(1024),
                nn.ReLU(),
                nn.Dropout(0.1),

                # First residual stage - 1024 channels (increased from 512)
                ResidualBlock(1024, 1536), # Increased from 768
                ResidualBlock(1536, 1536),
                ResidualBlock(1536, 1536),
                ResidualBlock(1536, 1536),  # Additional layer
                nn.MaxPool1d(kernel_size=2, stride=2),
                nn.Dropout(0.2),

                # Second residual stage - 1536 to 2048 channels  (increased from 768 to 1024)
                ResidualBlock(1536, 2048),
                ResidualBlock(2048, 2048),
                ResidualBlock(2048, 2048),
                ResidualBlock(2048, 2048),  # Additional layer
                nn.MaxPool1d(kernel_size=2, stride=2),
                nn.Dropout(0.25),

                # Third residual stage - 2048 to 3072 channels (increased from 1024 to 1536)
                ResidualBlock(2048, 3072),
                ResidualBlock(3072, 3072),
                ResidualBlock(3072, 3072),
                ResidualBlock(3072, 3072),  # Additional layer
                nn.MaxPool1d(kernel_size=2, stride=2),
                nn.Dropout(0.3),

                # Fourth residual stage - 3072 to 4096 channels (increased from 1536 to 2048)
                ResidualBlock(3072, 4096),
                ResidualBlock(4096, 4096),
                ResidualBlock(4096, 4096),
                ResidualBlock(4096, 4096),  # Additional layer
                nn.MaxPool1d(kernel_size=2, stride=2),
                nn.Dropout(0.3),

                # Fifth residual stage - ULTRA MASSIVE 4096 to 6144 channels (increased from 2048 to 3072)
                ResidualBlock(4096, 6144),
                ResidualBlock(6144, 6144),
                ResidualBlock(6144, 6144),
                ResidualBlock(6144, 6144),
                nn.AdaptiveAvgPool1d(1)  # Global average pooling
            )
            # Ultra massive feature dimension after conv layers
            self.conv_features = 6144 # Increased from 3072
        else:
            # For 1D vectors, use ultra massive dense preprocessing
            self.conv_layers = None
            self.conv_features = 0

        # ULTRA MASSIVE fully connected feature extraction layers
        if self.conv_layers is None:
            # For 1D inputs - ultra massive feature extraction
            self.fc1 = nn.Linear(self.feature_dim, 6144) # Increased from 3072
            self.features_dim = 6144 # Increased from 3072
        else:
            # For data processed by ultra massive conv layers
            self.fc1 = nn.Linear(self.conv_features, 6144) # Increased from 3072
            self.features_dim = 6144 # Increased from 3072

        # ULTRA MASSIVE common feature extraction with multiple deep layers
        self.fc_layers = nn.Sequential(
            self.fc1,
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(6144, 6144),  # Keep ultra massive width (increased from 3072)
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(6144, 4096),  # Ultra wide hidden layer (increased from 2560)
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(4096, 3072),  # Still very wide (increased from 2048)
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(3072, 2048),  # Large hidden layer (increased from 1536)
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(2048, 1024),  # Final feature representation (increased from 1024, but keeping the same value to align with attention layers)
            nn.ReLU()
        )

        # Multiple specialized attention mechanisms (larger capacity)
        self.price_attention = SelfAttention(1024)      # Keeping 1024
        self.volume_attention = SelfAttention(1024)
        self.trend_attention = SelfAttention(1024)
        self.volatility_attention = SelfAttention(1024)
        self.momentum_attention = SelfAttention(1024)   # Additional attention
        self.microstructure_attention = SelfAttention(1024)  # Additional attention

        # Ultra massive attention fusion layer
        self.attention_fusion = nn.Sequential(
            nn.Linear(1024 * 6, 4096),  # Combine all 6 attention outputs (increased from 2048)
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(4096, 3072), # Increased from 1536
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(3072, 1024) # Keeping 1024
        )

        # ULTRA MASSIVE dueling architecture with much deeper networks
        self.advantage_stream = nn.Sequential(
            nn.Linear(1024, 1536), # Increased from 768
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1536, 1024), # Increased from 512
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512), # Increased from 256
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256), # Increased from 128
            nn.ReLU(),
            nn.Linear(256, self.n_actions)
        )

        self.value_stream = nn.Sequential(
            nn.Linear(1024, 1536), # Increased from 768
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1536, 1024), # Increased from 512
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512), # Increased from 256
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256), # Increased from 128
            nn.ReLU(),
            nn.Linear(256, 1)
        )

        # ULTRA MASSIVE extrema detection head with deeper ensemble predictions
        self.extrema_head = nn.Sequential(
            nn.Linear(1024, 1536), # Increased from 768
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1536, 1024), # Increased from 512
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512), # Increased from 256
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256), # Increased from 128
            nn.ReLU(),
            nn.Linear(256, 3)  # 0=bottom, 1=top, 2=neither
        )

        # ULTRA MASSIVE price direction prediction head
        # Outputs single direction and confidence values
        self.price_direction_head = nn.Sequential(
            nn.Linear(1024, 1024), # Increased from 512
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512), # Increased from 256
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256), # Increased from 128
            nn.ReLU(),
            nn.Linear(256, 2)  # [direction, confidence]
        )

        # MULTI-TIMEFRAME PRICE VECTOR PREDICTION HEADS
        # Short-term: 1-5 minutes prediction
        self.short_term_vector_head = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 4)  # [direction, confidence, magnitude, volatility_risk]
        )

        # Mid-term: 5-30 minutes prediction
        self.mid_term_vector_head = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 4)  # [direction, confidence, magnitude, volatility_risk]
        )

        # Long-term: 30-120 minutes prediction
        self.long_term_vector_head = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 4)  # [direction, confidence, magnitude, volatility_risk]
        )

        # Direction activation (tanh for -1 to 1)
        self.direction_activation = nn.Tanh()
        # Confidence activation (sigmoid for 0 to 1)
        self.confidence_activation = nn.Sigmoid()
        # Magnitude activation (sigmoid for 0 to 1, will be scaled)
        self.magnitude_activation = nn.Sigmoid()
        # Volatility risk activation (sigmoid for 0 to 1)
        self.volatility_activation = nn.Sigmoid()

        # INFERENCE RECORD STORAGE for long-term training
        self.inference_records = []
        self.max_inference_records = 50
        self.training_loss_history = []

        # ULTRA MASSIVE value prediction with ensemble approaches
        self.price_pred_value = nn.Sequential(
            nn.Linear(1024, 1536), # Increased from 768
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1536, 1024), # Increased from 512
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 8)  # More granular % change predictions for different timeframes
        )

        # Additional specialized prediction heads for better accuracy
        # Volatility prediction head
        self.volatility_head = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 5)  # Very low, low, medium, high, very high volatility
        )

        # Support/Resistance level detection head
        self.support_resistance_head = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 6)  # Strong support, weak support, neutral, weak resistance, strong resistance, breakout
        )

        # Market regime classification head
        self.market_regime_head = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 7)  # Bull trend, bear trend, sideways, volatile up, volatile down, accumulation, distribution
        )

        # Risk assessment head
        self.risk_head = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 4)  # Low risk, medium risk, high risk, extreme risk
        )

    def _memory_barrier(self, tensor: torch.Tensor) -> torch.Tensor:
        """Create a memory barrier to prevent in-place operation issues"""
        return tensor.detach().clone().requires_grad_(tensor.requires_grad)

    def _check_rebuild_network(self, features):
        """DEPRECATED: Network should have fixed architecture - no runtime rebuilding"""
        if features != self.feature_dim:
            logger.error(f"CRITICAL: Input feature dimension mismatch! Expected {self.feature_dim}, got {features}")
            logger.error("This indicates a bug in data preprocessing - input should be fixed size!")
            logger.error("Network architecture should NOT change at runtime!")
            raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {features}")
        return False

    def forward(self, x):
        """Forward pass through the ULTRA MASSIVE network"""
        batch_size = x.size(0)

        # Validate input dimensions to prevent zero-element tensor issues
        if x.numel() == 0:
            logger.error(f"Forward pass received empty tensor with shape {x.shape}")
            # Return default outputs for all 5 expected values to prevent crash
            default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
            default_extrema = torch.zeros(batch_size, 3, device=x.device)  # bottom/top/neither
            default_price_pred = torch.zeros(batch_size, 1, device=x.device)
            default_features = torch.zeros(batch_size, 1024, device=x.device)
            default_advanced = torch.zeros(batch_size, 1, device=x.device)
            return default_q_values, default_extrema, default_price_pred, default_features, default_advanced

        # Check for zero feature dimensions
        if len(x.shape) > 1 and any(dim == 0 for dim in x.shape[1:]):
            logger.error(f"Forward pass received tensor with zero feature dimensions: {x.shape}")
            # Return default outputs for all 5 expected values to prevent crash
            default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
            default_extrema = torch.zeros(batch_size, 3, device=x.device)  # bottom/top/neither
            default_price_pred = torch.zeros(batch_size, 1, device=x.device)
            default_features = torch.zeros(batch_size, 1024, device=x.device)
            default_advanced = torch.zeros(batch_size, 1, device=x.device)
            return default_q_values, default_extrema, default_price_pred, default_features, default_advanced

        # Process different input shapes
        if len(x.shape) > 2:
            # Handle 4D input [batch, timeframes, window, features] or 3D input [batch, timeframes, features]
            if len(x.shape) == 4:
                # Flatten window and features: [batch, timeframes, window*features]
                x = x.reshape(batch_size, x.size(1), -1)

            if self.conv_layers is not None:
                # Now x is 3D: [batch, timeframes, features]
                x_reshaped = x

                # Validate input dimensions (should be fixed)
                total_features = x_reshaped.size(1) * x_reshaped.size(2)
                if total_features != self.feature_dim:
                    logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
                    raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")

                # Apply ultra massive convolutions
                x_conv = self.conv_layers(x_reshaped)
                # Flatten: [batch, channels, 1] -> [batch, channels]
                x_flat = x_conv.reshape(batch_size, -1)
            else:
                # If no conv layers, just flatten
                x_flat = x.reshape(batch_size, -1)
        else:
            # For 2D input [batch, features]
            x_flat = x

            # Validate input dimensions (should be fixed)
            if x_flat.size(1) != self.feature_dim:
                logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
                raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")

        # Apply ULTRA MASSIVE FC layers to get base features
        features = self.fc_layers(x_flat)  # [batch, 1024]

        # Apply multiple specialized attention mechanisms
        features_3d = features.unsqueeze(1)  # [batch, 1, 1024]

        # Get attention-refined features for different aspects
        price_features, _ = self.price_attention(features_3d)
        price_features = price_features.squeeze(1)  # [batch, 1024]

        volume_features, _ = self.volume_attention(features_3d)
        volume_features = volume_features.squeeze(1)  # [batch, 1024]

        trend_features, _ = self.trend_attention(features_3d)
        trend_features = trend_features.squeeze(1)  # [batch, 1024]

        volatility_features, _ = self.volatility_attention(features_3d)
        volatility_features = volatility_features.squeeze(1)  # [batch, 1024]

        momentum_features, _ = self.momentum_attention(features_3d)
        momentum_features = momentum_features.squeeze(1)  # [batch, 1024]

        microstructure_features, _ = self.microstructure_attention(features_3d)
        microstructure_features = microstructure_features.squeeze(1)  # [batch, 1024]

        # Fuse all attention outputs
        combined_attention = torch.cat([
            price_features, volume_features,
            trend_features, volatility_features,
            momentum_features, microstructure_features
        ], dim=1)  # [batch, 1024*6]

        # Apply attention fusion to get final refined features
        features_refined = self.attention_fusion(combined_attention)  # [batch, 1024]

        # Calculate advantage and value (Dueling DQN architecture)
        advantage = self.advantage_stream(features_refined)
        value = self.value_stream(features_refined)

        # Combine for Q-values (Dueling architecture)
        q_values = value + advantage - advantage.mean(dim=1, keepdim=True)

        # Get ultra massive ensemble of predictions

        # Extrema predictions (bottom/top/neither detection)
        extrema_pred = self.extrema_head(features_refined)

        # Price direction predictions
        price_direction_raw = self.price_direction_head(features_refined)

        # Apply separate activations to direction and confidence
        direction = self.direction_activation(price_direction_raw[:, 0:1])  # -1 to 1
        confidence = self.confidence_activation(price_direction_raw[:, 1:2])  # 0 to 1
        price_direction_pred = torch.cat([direction, confidence], dim=1)  # [batch, 2]

        # MULTI-TIMEFRAME PRICE VECTOR PREDICTIONS
        short_term_vector_pred = self.short_term_vector_head(features_refined)
        mid_term_vector_pred = self.mid_term_vector_head(features_refined)
        long_term_vector_pred = self.long_term_vector_head(features_refined)

        # Apply separate activations to direction, confidence, magnitude, volatility_risk
        short_term_direction = self.direction_activation(short_term_vector_pred[:, 0:1])
        short_term_confidence = self.confidence_activation(short_term_vector_pred[:, 1:2])
        short_term_magnitude = self.magnitude_activation(short_term_vector_pred[:, 2:3])
        short_term_volatility_risk = self.volatility_activation(short_term_vector_pred[:, 3:4])

        mid_term_direction = self.direction_activation(mid_term_vector_pred[:, 0:1])
        mid_term_confidence = self.confidence_activation(mid_term_vector_pred[:, 1:2])
        mid_term_magnitude = self.magnitude_activation(mid_term_vector_pred[:, 2:3])
        mid_term_volatility_risk = self.volatility_activation(mid_term_vector_pred[:, 3:4])

        long_term_direction = self.direction_activation(long_term_vector_pred[:, 0:1])
        long_term_confidence = self.confidence_activation(long_term_vector_pred[:, 1:2])
        long_term_magnitude = self.magnitude_activation(long_term_vector_pred[:, 2:3])
        long_term_volatility_risk = self.volatility_activation(long_term_vector_pred[:, 3:4])

        # Package multi-timeframe predictions into a single tensor
        multi_timeframe_predictions = torch.cat([
            short_term_direction, short_term_confidence, short_term_magnitude, short_term_volatility_risk,
            mid_term_direction, mid_term_confidence, mid_term_magnitude, mid_term_volatility_risk,
            long_term_direction, long_term_confidence, long_term_magnitude, long_term_volatility_risk
        ], dim=1) # [batch, 4*3]

        price_values = self.price_pred_value(features_refined)

        # Additional specialized predictions for enhanced accuracy
        volatility_pred = self.volatility_head(features_refined)
        support_resistance_pred = self.support_resistance_head(features_refined)
        market_regime_pred = self.market_regime_head(features_refined)
        risk_pred = self.risk_head(features_refined)

        # Use the price direction prediction directly (already [batch, 2])
        price_direction_tensor = price_direction_pred

        # Package additional predictions into a single tensor (use volatility as primary)
        # For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
        advanced_pred_tensor = volatility_pred

        return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor, multi_timeframe_predictions

    def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
        """Enhanced action selection with ultra massive model predictions"""
        self.eval()

        # Accept both NumPy arrays and already-built torch tensors
        if isinstance(state, torch.Tensor):
            state_tensor = state.detach().to(self.device)
            if state_tensor.dim() == 1:
                state_tensor = state_tensor.unsqueeze(0)
        else:
            # Convert to tensor **directly on the target device** to avoid intermediate CPU copies
            state_tensor = torch.as_tensor(state, dtype=torch.float32, device=self.device)
            if state_tensor.dim() == 1:
                state_tensor = state_tensor.unsqueeze(0)

        with torch.no_grad():
            q_values, extrema_pred, price_direction_predictions, features, advanced_predictions, multi_timeframe_predictions = self(state_tensor)

            # Process price direction predictions
            if price_direction_predictions is not None:
                self.process_price_direction_predictions(price_direction_predictions)

            # Apply softmax to get action probabilities
            action_probs_tensor = torch.softmax(q_values, dim=1)
            action_idx = int(torch.argmax(action_probs_tensor, dim=1).item())
            confidence = float(action_probs_tensor[0, action_idx].item())  # Confidence of the chosen action
            action_probs = action_probs_tensor.squeeze(0).tolist()  # Convert to list of floats for return

            # Log advanced predictions for better decision making
            if hasattr(self, '_log_predictions') and self._log_predictions:
                # Log volatility prediction
                volatility = torch.softmax(advanced_predictions['volatility'], dim=1).squeeze(0)
                volatility_class = int(torch.argmax(volatility).item())
                volatility_labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High']

                # Log support/resistance prediction
                sr = torch.softmax(advanced_predictions['support_resistance'], dim=1).squeeze(0)
                sr_class = int(torch.argmax(sr).item())
                sr_labels = ['Strong Support', 'Weak Support', 'Neutral', 'Weak Resistance', 'Strong Resistance', 'Breakout']

                # Log market regime prediction
                regime = torch.softmax(advanced_predictions['market_regime'], dim=1).squeeze(0)
                regime_class = int(torch.argmax(regime).item())
                regime_labels = ['Bull Trend', 'Bear Trend', 'Sideways', 'Volatile Up', 'Volatile Down', 'Accumulation', 'Distribution']

                # Log risk assessment
                risk = torch.softmax(advanced_predictions['risk_assessment'], dim=1).squeeze(0)
                risk_class = int(torch.argmax(risk).item())
                risk_labels = ['Low Risk', 'Medium Risk', 'High Risk', 'Extreme Risk']

                logger.info(f"ULTRA MASSIVE Model Predictions:")
                logger.info(f"  Volatility: {volatility_labels[volatility_class]} ({volatility[volatility_class]:.3f})")
                logger.info(f"  Support/Resistance: {sr_labels[sr_class]} ({sr[sr_class]:.3f})")
                logger.info(f"  Market Regime: {regime_labels[regime_class]} ({regime[regime_class]:.3f})")
                logger.info(f"  Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")

            return action_idx, confidence, action_probs

    def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
        """
        Process price direction predictions and convert to standardized format

        Args:
            price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]

        Returns:
            Dict with direction (-1 to 1) and confidence (0 to 1)
        """
        try:
            if price_direction_pred is None or price_direction_pred.numel() == 0:
                return {}

            # Extract direction and confidence values
            direction_value = float(price_direction_pred[0, 0].item())  # -1 to 1
            confidence_value = float(price_direction_pred[0, 1].item())  # 0 to 1

            processed_directions = {
                'direction': direction_value,
                'confidence': confidence_value
            }

            # Store for later access
            self.last_price_direction = processed_directions

            return processed_directions

        except Exception as e:
            logger.error(f"Error processing price direction predictions: {e}")
            return {}

    def get_price_direction_vector(self) -> Dict[str, float]:
        """
        Get the current price direction and confidence

        Returns:
            Dict with direction (-1 to 1) and confidence (0 to 1)
        """
        return getattr(self, 'last_price_direction', {})

    def get_price_direction_summary(self) -> Dict[str, Any]:
        """
        Get a summary of price direction prediction

        Returns:
            Dict containing direction and confidence information
        """
        try:
            last_direction = getattr(self, 'last_price_direction', {})
            if not last_direction:
                return {
                    'direction_value': 0.0,
                    'confidence_value': 0.0,
                    'direction_label': "SIDEWAYS",
                    'discrete_direction': 0,
                    'strength': 0.0,
                    'weighted_strength': 0.0
                }

            direction_value = last_direction['direction']
            confidence_value = last_direction['confidence']

            # Convert to discrete direction
            if direction_value > 0.1:
                direction_label = "UP"
                discrete_direction = 1
            elif direction_value < -0.1:
                direction_label = "DOWN"
                discrete_direction = -1
            else:
                direction_label = "SIDEWAYS"
                discrete_direction = 0

            return {
                'direction_value': float(direction_value),
                'confidence_value': float(confidence_value),
                'direction_label': direction_label,
                'discrete_direction': discrete_direction,
                'strength': abs(float(direction_value)),
                'weighted_strength': abs(float(direction_value)) * float(confidence_value)
            }

        except Exception as e:
            logger.error(f"Error calculating price direction summary: {e}")
            return {
                'direction_value': 0.0,
                'confidence_value': 0.0,
                'direction_label': "SIDEWAYS",
                'discrete_direction': 0,
                'strength': 0.0,
                'weighted_strength': 0.0
            }

    def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
        """
        Add training data to the model's training buffer with position-based reward enhancement

        Args:
            state: Input state
            action: Action taken
            reward: Base reward received
            position_pnl: Current position P&L (0.0 if no position)
            has_position: Whether we currently have an open position
        """
        try:
            # Enhance reward based on position status
            enhanced_reward = self._calculate_position_enhanced_reward(
                reward, action, position_pnl, has_position
            )

            self.training_data.append({
                'state': state,
                'action': action,
                'reward': enhanced_reward,
                'base_reward': reward,  # Keep original reward for analysis
                'position_pnl': position_pnl,
                'has_position': has_position,
                'timestamp': time.time()
            })

            # Keep only the last 1000 training samples
            if len(self.training_data) > 1000:
                self.training_data = self.training_data[-1000:]

        except Exception as e:
            logger.error(f"Error adding training data: {e}")

    def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
        """
        Calculate position-enhanced reward to incentivize profitable trades and closing losing ones

        Args:
            base_reward: Original reward from price prediction accuracy
            action: Action taken ('BUY', 'SELL', 'HOLD')
            position_pnl: Current position P&L
            has_position: Whether we have an open position

        Returns:
            Enhanced reward that incentivizes profitable behavior
        """
        try:
            enhanced_reward = base_reward

            if has_position and position_pnl != 0.0:
                # Position-based reward adjustments
                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale

                if position_pnl > 0:  # Profitable position
                    if action == "HOLD":
                        # Reward holding profitable positions (let winners run)
                        enhanced_reward += abs(pnl_factor) * 0.5
                    elif action in ["BUY", "SELL"]:
                        # Moderate reward for taking action on profitable positions
                        enhanced_reward += abs(pnl_factor) * 0.3

                elif position_pnl < 0:  # Losing position
                    if action == "HOLD":
                        # Penalty for holding losing positions (cut losses)
                        enhanced_reward -= abs(pnl_factor) * 0.8
                    elif action in ["BUY", "SELL"]:
                        # Reward for taking action to close losing positions
                        enhanced_reward += abs(pnl_factor) * 0.6

            # Ensure reward doesn't become extreme
            enhanced_reward = max(-5.0, min(5.0, enhanced_reward))

            return enhanced_reward

        except Exception as e:
            logger.error(f"Error calculating position-enhanced reward: {e}")
            return base_reward

    def save(self, path):
        """Save model weights and architecture"""
        os.makedirs(os.path.dirname(path), exist_ok=True)
        torch.save({
            'state_dict': self.state_dict(),
            'input_shape': self.input_shape,
            'n_actions': self.n_actions,
            'feature_dim': self.feature_dim,
            'confidence_threshold': self.confidence_threshold
        }, f"{path}.pt")
        logger.info(f"Enhanced CNN model saved to {path}.pt")

    def load(self, path):
        """Load model weights and architecture"""
        try:
            checkpoint = torch.load(f"{path}.pt", map_location=self.device)
            self.input_shape = checkpoint['input_shape']
            self.n_actions = checkpoint['n_actions']
            self.feature_dim = checkpoint['feature_dim']
            if 'confidence_threshold' in checkpoint:
                self.confidence_threshold = checkpoint['confidence_threshold']
            self._build_network()
            self.load_state_dict(checkpoint['state_dict'])
            self.to(self.device)
            logger.info(f"Enhanced CNN model loaded from {path}.pt")
            return True
        except Exception as e:
            logger.error(f"Error loading model: {str(e)}")
            return False

    def store_inference_record(self, input_data, prediction_output, metadata=None):
        """Store inference record for long-term training"""
        try:
            record = {
                'timestamp': datetime.now(),
                'input_data': input_data.clone().detach() if isinstance(input_data, torch.Tensor) else input_data,
                'prediction_output': {
                    'q_values': prediction_output[0].clone().detach() if prediction_output[0] is not None else None,
                    'extrema_pred': prediction_output[1].clone().detach() if prediction_output[1] is not None else None,
                    'price_direction': prediction_output[2].clone().detach() if prediction_output[2] is not None else None,
                    'multi_timeframe': prediction_output[5].clone().detach() if len(prediction_output) > 5 and prediction_output[5] is not None else None
                },
                'metadata': metadata or {}
            }

            self.inference_records.append(record)

            # Keep only the last max_inference_records
            if len(self.inference_records) > self.max_inference_records:
                self.inference_records = self.inference_records[-self.max_inference_records:]

            logger.debug(f"CNN: Stored inference record. Total records: {len(self.inference_records)}")

        except Exception as e:
            logger.error(f"Error storing CNN inference record: {e}")

    def calculate_price_vector_loss(self, predicted_vectors, actual_price_changes, time_diffs):
        """
        Calculate price vector loss for multi-timeframe predictions

        Args:
            predicted_vectors: Dict with 'short_term', 'mid_term', 'long_term' predictions
            actual_price_changes: Dict with corresponding actual price changes
            time_diffs: Dict with time differences for each timeframe

        Returns:
            Total loss tensor for backpropagation
        """
        try:
            total_loss = 0.0
            loss_count = 0

            timeframes = ['short_term', 'mid_term', 'long_term']
            weights = [1.0, 0.8, 0.6]  # Weight short-term predictions higher

            for timeframe, weight in zip(timeframes, weights):
                if timeframe in predicted_vectors and timeframe in actual_price_changes:
                    pred_vector = predicted_vectors[timeframe]
                    actual_change = actual_price_changes[timeframe]
                    time_diff = time_diffs.get(timeframe, 1.0)

                    # Extract prediction components [direction, confidence, magnitude, volatility_risk]
                    pred_direction = pred_vector[0].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[0]
                    pred_confidence = pred_vector[1].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[1]
                    pred_magnitude = pred_vector[2].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[2]
                    pred_volatility = pred_vector[3].item() if isinstance(pred_vector, torch.Tensor) else pred_vector[3]

                    # Calculate actual metrics
                    actual_direction = 1.0 if actual_change > 0.05 else -1.0 if actual_change < -0.05 else 0.0
                    actual_magnitude = min(abs(actual_change) / 5.0, 1.0)  # Normalize to 0-1, cap at 5%

                    # Direction loss (most important)
                    if actual_direction != 0.0:
                        direction_error = abs(pred_direction - actual_direction)
                    else:
                        direction_error = abs(pred_direction) * 0.5  # Penalty for predicting movement when there's none

                    # Magnitude loss
                    magnitude_error = abs(pred_magnitude - actual_magnitude)

                    # Confidence calibration loss (confidence should match accuracy)
                    direction_accuracy = 1.0 - (direction_error / 2.0)  # 0 to 1
                    confidence_error = abs(pred_confidence - direction_accuracy)

                    # Time decay factor
                    time_decay = max(0.1, 1.0 - (time_diff / 60.0))  # Decay over 1 hour

                    # Combined loss for this timeframe
                    timeframe_loss = (
                        direction_error * 2.0 +      # Direction is most important
                        magnitude_error * 1.5 +      # Magnitude is important
                        confidence_error * 1.0       # Confidence calibration
                    ) * time_decay * weight

                    total_loss += timeframe_loss
                    loss_count += 1

                    logger.debug(f"CNN {timeframe.upper()} VECTOR LOSS: "
                               f"dir_err={direction_error:.3f}, mag_err={magnitude_error:.3f}, "
                               f"conf_err={confidence_error:.3f}, total={timeframe_loss:.3f}")

            if loss_count > 0:
                avg_loss = total_loss / loss_count
                return torch.tensor(avg_loss, dtype=torch.float32, device=self.device, requires_grad=True)
            else:
                return torch.tensor(0.0, dtype=torch.float32, device=self.device, requires_grad=True)

        except Exception as e:
            logger.error(f"Error calculating CNN price vector loss: {e}")
            return torch.tensor(0.0, dtype=torch.float32, device=self.device, requires_grad=True)

    def train_on_stored_records(self, optimizer, min_records=10):
        """
        Train on stored inference records for long-term price vector prediction

        Args:
            optimizer: PyTorch optimizer
            min_records: Minimum number of records needed for training

        Returns:
            Average training loss
        """
        try:
            if len(self.inference_records) < min_records:
                logger.debug(f"CNN: Not enough records for long-term training ({len(self.inference_records)} < {min_records})")
                return 0.0

            self.train()
            total_loss = 0.0
            trained_count = 0

            # Process records in batches
            batch_size = min(8, len(self.inference_records))
            for i in range(0, len(self.inference_records), batch_size):
                batch_records = self.inference_records[i:i+batch_size]

                batch_inputs = []
                batch_targets = []

                for record in batch_records:
                    # Check if we have actual price movement data for this record
                    if 'actual_price_changes' in record['metadata'] and 'time_diffs' in record['metadata']:
                        batch_inputs.append(record['input_data'])
                        batch_targets.append({
                            'actual_price_changes': record['metadata']['actual_price_changes'],
                            'time_diffs': record['metadata']['time_diffs']
                        })

                if not batch_inputs:
                    continue

                # Stack inputs into batch tensor
                if isinstance(batch_inputs[0], torch.Tensor):
                    batch_input_tensor = torch.stack(batch_inputs).to(self.device)
                else:
                    batch_input_tensor = torch.tensor(batch_inputs, dtype=torch.float32, device=self.device)

                optimizer.zero_grad()

                # Forward pass
                q_values, extrema_pred, price_direction_pred, features, advanced_pred, multi_timeframe_pred = self(batch_input_tensor)

                # Calculate price vector losses for the batch
                batch_loss = 0.0
                for j, target in enumerate(batch_targets):
                    # Extract multi-timeframe predictions for this sample
                    sample_multi_pred = multi_timeframe_pred[j] if multi_timeframe_pred is not None else None

                    if sample_multi_pred is not None:
                        predicted_vectors = {
                            'short_term': sample_multi_pred[0:4],   # [direction, confidence, magnitude, volatility]
                            'mid_term': sample_multi_pred[4:8],     # [direction, confidence, magnitude, volatility]
                            'long_term': sample_multi_pred[8:12]    # [direction, confidence, magnitude, volatility]
                        }

                        sample_loss = self.calculate_price_vector_loss(
                            predicted_vectors,
                            target['actual_price_changes'],
                            target['time_diffs']
                        )
                        batch_loss += sample_loss

                if batch_loss > 0:
                    avg_batch_loss = batch_loss / len(batch_targets)
                    avg_batch_loss.backward()

                    # Gradient clipping
                    torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)

                    optimizer.step()

                    total_loss += avg_batch_loss.item()
                    trained_count += 1

            avg_loss = total_loss / max(trained_count, 1)
            self.training_loss_history.append(avg_loss)

            # Keep only last 100 loss values
            if len(self.training_loss_history) > 100:
                self.training_loss_history = self.training_loss_history[-100:]

            logger.info(f"CNN: Trained on {trained_count} batches from {len(self.inference_records)} stored records. Avg loss: {avg_loss:.4f}")
            return avg_loss

        except Exception as e:
            logger.error(f"Error training CNN on stored records: {e}")
            return 0.0

    def process_price_direction_predictions(self, price_direction_tensor):
        """
        Process price direction predictions into a standardized format
        Compatible with orchestrator's price vector system

        Args:
            price_direction_tensor: Tensor with [direction, confidence] or multi-timeframe predictions

        Returns:
            Dict with direction and confidence for compatibility
        """
        try:
            if price_direction_tensor is None:
                return None

            if isinstance(price_direction_tensor, torch.Tensor):
                if price_direction_tensor.dim() > 1:
                    price_direction_tensor = price_direction_tensor.squeeze(0)

                # Extract short-term prediction (most immediate) for compatibility
                direction = float(price_direction_tensor[0].item())
                confidence = float(price_direction_tensor[1].item())

                return {
                    'direction': direction,
                    'confidence': confidence
                }

            return None

        except Exception as e:
            logger.debug(f"Error processing CNN price direction predictions: {e}")
            return None

    def get_multi_timeframe_predictions(self, multi_timeframe_tensor):
        """
        Extract multi-timeframe price vector predictions

        Args:
            multi_timeframe_tensor: Tensor with all timeframe predictions

        Returns:
            Dict with short_term, mid_term, long_term predictions
        """
        try:
            if multi_timeframe_tensor is None:
                return {}

            if isinstance(multi_timeframe_tensor, torch.Tensor):
                if multi_timeframe_tensor.dim() > 1:
                    multi_timeframe_tensor = multi_timeframe_tensor.squeeze(0)

                predictions = {
                    'short_term': {
                        'direction': float(multi_timeframe_tensor[0].item()),
                        'confidence': float(multi_timeframe_tensor[1].item()),
                        'magnitude': float(multi_timeframe_tensor[2].item()),
                        'volatility_risk': float(multi_timeframe_tensor[3].item())
                    },
                    'mid_term': {
                        'direction': float(multi_timeframe_tensor[4].item()),
                        'confidence': float(multi_timeframe_tensor[5].item()),
                        'magnitude': float(multi_timeframe_tensor[6].item()),
                        'volatility_risk': float(multi_timeframe_tensor[7].item())
                    },
                    'long_term': {
                        'direction': float(multi_timeframe_tensor[8].item()),
                        'confidence': float(multi_timeframe_tensor[9].item()),
                        'magnitude': float(multi_timeframe_tensor[10].item()),
                        'volatility_risk': float(multi_timeframe_tensor[11].item())
                    }
                }

                return predictions

            return {}

        except Exception as e:
            logger.debug(f"Error extracting multi-timeframe predictions: {e}")
            return {}


# Additional utility for example sifting
class ExampleSiftingDataset:
    """
    Dataset that selectively keeps high-quality examples for training
    to improve model performance
    """
    def __init__(self, max_examples=50000):
        self.examples = []
        self.labels = []
        self.rewards = []
        self.max_examples = max_examples
        self.min_reward_threshold = -0.05  # Minimum reward to keep an example

    def add_example(self, state, action, reward, next_state, done):
        """Add a new training example with reward-based filtering"""
        # Only keep examples with rewards above the threshold
        if reward > self.min_reward_threshold:
            self.examples.append((state, action, reward, next_state, done))
            self.rewards.append(reward)

            # Sort by reward and keep only the top examples
            if len(self.examples) > self.max_examples:
                # Sort by reward (highest first)
                sorted_indices = np.argsort(self.rewards)[::-1]
                # Keep top examples
                self.examples = [self.examples[i] for i in sorted_indices[:self.max_examples]]
                self.rewards = [self.rewards[i] for i in sorted_indices[:self.max_examples]]

                # Update the minimum reward threshold to be the minimum in our kept examples
                self.min_reward_threshold = min(self.rewards)

    def get_batch(self, batch_size):
        """Get a batch of examples, prioritizing better examples"""
        if not self.examples:
            return None

        # Calculate selection probabilities based on rewards
        rewards = np.array(self.rewards)
        # Shift rewards to be positive for probability calculation
        min_reward = min(rewards)
        shifted_rewards = rewards - min_reward + 0.1  # Add small constant
        probs = shifted_rewards / shifted_rewards.sum()

        # Sample batch indices with reward-based probabilities
        indices = np.random.choice(
            len(self.examples),
            size=min(batch_size, len(self.examples)),
            p=probs,
            replace=False
        )

        # Create batch
        batch = [self.examples[i] for i in indices]
        states, actions, rewards, next_states, dones = zip(*batch)

        return {
            'states': np.array(states),
            'actions': np.array(actions),
            'rewards': np.array(rewards),
            'next_states': np.array(next_states),
            'dones': np.array(dones)
        }

    def __len__(self):
        return len(self.examples)