wip enhanced multitimeframe model

2025-03-12 01:46:48 +02:00
parent ad559d8c61
commit 506458d55e
5 changed files with 1972 additions and 1498 deletions
--- a/crypto/gogo2/TradingEnvironment
+++ b/crypto/gogo2/TradingEnvironment
@@ -0,0 +1,42 @@
 def step(self, action):
    """Take an action in the environment and return the next state, reward, and done flag"""
    # Store current price before taking action
    self.current_price = self.data[self.current_step]['close']
    # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
    if not self.demo and self.trading_client:
        # Execute real trades in live mode
        asyncio.create_task(self._execute_live_action(action))
    # Calculate reward (simulation still runs in parallel with live trading)
    reward, _ = self.calculate_reward(action)  # Unpack the tuple here
    # Check for stop loss / take profit hits
    self.check_sl_tp()
    # Move to next step
    self.current_step += 1
    done = self.current_step >= len(self.data) - 1
    # Get new state
    next_state = self.get_state()
    return next_state, reward, done 
 def calculate_reward(self, action):
    """Calculate the reward for the current action."""
    # ... (existing code)
    # Combine all reward components
    reward = pnl_reward + timing_reward + risk_reward + prediction_reward
    # Log components for analysis
    info = {
        'pnl_reward': pnl_reward,
        'timing_reward': timing_reward,
        'risk_reward': risk_reward,
        'prediction_reward': prediction_reward,
        'total_reward': reward
    }
    return reward  # Return only the reward, not the info dictionary 
--- a/crypto/gogo2/_notes.md
+++ b/crypto/gogo2/_notes.md
@@ -38,3 +38,26 @@ it seems the model is not learning a lot. we keep hovering about the same starti
 it seems we may need another NN model down the loop jut to predict the extremums of the price.  
 we may have to include  a mechanism to calculate the extremums of the price retrospectively and to use that to bootstrap pre-train the model. 
 Why Performance Might Be Stagnating
 Several factors could explain why the model isn't improving significantly during training:
 Insufficient Model Capacity for Price Prediction: While the price prediction model has 267,663 parameters, financial time series prediction is extremely challenging. The market may have patterns that are too complex or too random for the current model to capture effectively.
 Overfitting to Training Data: The model might be memorizing patterns in the training data that don't generalize to new market conditions.
 Transformer-LSTM Redundancy in DQN: Your DQN model uses both a transformer and an LSTM, which might be redundant. Both are designed to capture sequential dependencies, and having both could lead to overfitting or training instability.
 Imbalanced Parameter Distribution: 64.5% of your DQN parameters are in the transformer component, which might be excessive for the task.
 Reward Function Issues: The reward function might not be properly aligned with profitable trading strategies, or it might be too sparse to provide meaningful learning signals.
 Suggested Improvements
 1. Enhance Price Prediction Training
 2. Simplify the DQN Architecture
 Consider creating a more streamlined DQN model:
 3. Improve the Reward Function
 Make sure your reward function provides meaningful signals for learning:
 4. Implement Curriculum Learning
 Start with simpler market conditions and gradually increase complexity:
 Conclusion
 The issue appears to be a combination of model complexity, potential overfitting, and possibly insufficient learning signals from the reward function. By simplifying the DQN architecture (particularly reducing the transformer component), improving the price prediction training, and enhancing the reward function, you should see better learning progress.
 Would you like me to implement any of these specific improvements to your codebase? 
--- a/crypto/gogo2/count_params.py
+++ b/crypto/gogo2/count_params.py
@@ -3,6 +3,278 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn import TransformerEncoder, TransformerEncoderLayer
 class EnhancedPricePredictionModel(nn.Module):
    def __init__(self, input_dim=2, hidden_dim=256, num_layers=3, output_dim=5, num_timeframes=3):
        super(EnhancedPricePredictionModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.num_timeframes = num_timeframes
        # Separate LSTM for each timeframe
        self.timeframe_lstms = nn.ModuleList([
            nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
            for _ in range(num_timeframes)
        ])
        # Cross-timeframe attention
        self.cross_attention = nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
        # Self-attention for each timeframe
        self.self_attentions = nn.ModuleList([
            nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
            for _ in range(num_timeframes)
        ])
        # Timeframe fusion layer
        self.fusion_layer = nn.Sequential(
            nn.Linear(hidden_dim * num_timeframes, hidden_dim * 2),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim * 2, hidden_dim)
        )
        # Fully connected layer for price prediction
        self.price_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, output_dim)
        )
        # Fully connected layer for extrema prediction (high and low points)
        self.extrema_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, 10)  # 5 time steps, 2 classes (high/low) each
        )
        # Volume prediction layer
        self.volume_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x_list):
        # x_list is a list of tensors, one for each timeframe
        # Each x shape: (batch_size, seq_len, input_dim)
        # Process each timeframe with its own LSTM
        lstm_outputs = []
        for i, x in enumerate(x_list):
            lstm_out, _ = self.timeframe_lstms[i](x)  # lstm_out: (batch_size, seq_len, hidden_dim)
            lstm_outputs.append(lstm_out)
        # Apply self-attention to each timeframe
        attn_outputs = []
        for i, lstm_out in enumerate(lstm_outputs):
            attn_output, _ = self.self_attentions[i](lstm_out, lstm_out, lstm_out)
            attn_outputs.append(attn_output[:, -1, :])  # Use the last time step
        # Concatenate all timeframe representations
        combined = torch.cat(attn_outputs, dim=1)  # (batch_size, hidden_dim * num_timeframes)
        # Fuse timeframe information
        fused = self.fusion_layer(combined)  # (batch_size, hidden_dim)
        # Price prediction
        price_pred = self.price_fc(fused)
        # Extrema prediction
        extrema_logits = self.extrema_fc(fused)
        # Volume prediction
        volume_pred = self.volume_fc(fused)
        return price_pred, extrema_logits, volume_pred
 class EnhancedDQN(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_dim=512):
        super(EnhancedDQN, self).__init__()
        # Feature extraction layers with increased capacity
        self.feature_extraction = nn.Sequential(
            nn.Linear(state_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
        )
        # Advantage stream with increased capacity
        self.advantage_stream = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, action_dim)
        )
        # Value stream with increased capacity
        self.value_stream = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, 1)
        )
        # Enhanced transformer for temporal dependencies
        encoder_layers = TransformerEncoderLayer(
            d_model=hidden_dim, 
            nhead=8, 
            dim_feedforward=hidden_dim*4, 
            dropout=0.1,
            batch_first=True
        )
        self.transformer = TransformerEncoder(encoder_layers, num_layers=3)
        # LSTM for sequential decision making with increased capacity
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.1)
        # Final layers with increased capacity
        self.final_layers = nn.Sequential(
            nn.Linear(hidden_dim*2, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, action_dim)
        )
        # Market regime classification layer
        self.market_regime_classifier = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, 3)  # 3 regimes: trending, ranging, volatile
        )
    def forward(self, state, hidden=None):
        # Extract features
        features = self.feature_extraction(state)
        features = features.unsqueeze(1)  # Add sequence dimension for transformer/LSTM
        # Transformer processing
        transformer_out = self.transformer(features)
        # LSTM processing
        lstm_out, lstm_hidden = self.lstm(transformer_out)
        # Dueling architecture
        advantage = self.advantage_stream(features.squeeze(1))
        value = self.value_stream(features.squeeze(1))
        # Combine transformer, LSTM and dueling outputs
        combined = torch.cat([transformer_out.squeeze(1), lstm_out.squeeze(1)], dim=1)
        q_values = self.final_layers(combined)
        # Market regime classification
        market_regime = self.market_regime_classifier(transformer_out.squeeze(1))
        # Dueling Q-value computation
        dueling_q = value + advantage - advantage.mean(dim=1, keepdim=True)
        # Final Q-values are a weighted combination of the dueling Q-values and the direct Q-values
        # This allows the model to use either approach depending on the situation
        q_values = 0.5 * dueling_q + 0.5 * q_values
        return q_values, lstm_hidden, market_regime
 def count_parameters(model):
    total_params = 0
    layer_params = {}
    for name, param in model.named_parameters():
        if param.requires_grad:
            param_count = param.numel()
            total_params += param_count
            layer_params[name] = (param_count, param.shape)
    return total_params, layer_params
 def main():
    # Initialize the original models for comparison
    original_price_model = PricePredictionModel()
    original_price_total_params, _ = count_parameters(original_price_model)
    state_dim = 50
    action_dim = 3
    original_dqn_model = DQN(state_dim=state_dim, action_dim=action_dim)
    original_dqn_total_params, _ = count_parameters(original_dqn_model)
    # Initialize the enhanced models
    enhanced_price_model = EnhancedPricePredictionModel(num_timeframes=3)
    enhanced_price_total_params, enhanced_price_layer_params = count_parameters(enhanced_price_model)
    # Increase state dimension to accommodate multiple timeframes
    enhanced_state_dim = 100  # Increased from 50 to accommodate more features
    enhanced_dqn_model = EnhancedDQN(state_dim=enhanced_state_dim, action_dim=action_dim)
    enhanced_dqn_total_params, enhanced_dqn_layer_params = count_parameters(enhanced_dqn_model)
    # Print comparison
    print("=== MODEL SIZE COMPARISON ===")
    print(f"Original Price Prediction Model: {original_price_total_params:,} parameters")
    print(f"Enhanced Price Prediction Model: {enhanced_price_total_params:,} parameters")
    print(f"Growth Factor: {enhanced_price_total_params / original_price_total_params:.2f}x\n")
    print(f"Original DQN Model: {original_dqn_total_params:,} parameters")
    print(f"Enhanced DQN Model: {enhanced_dqn_total_params:,} parameters")
    print(f"Growth Factor: {enhanced_dqn_total_params / original_dqn_total_params:.2f}x\n")
    print(f"Total Original Models: {original_price_total_params + original_dqn_total_params:,} parameters")
    print(f"Total Enhanced Models: {enhanced_price_total_params + enhanced_dqn_total_params:,} parameters")
    print(f"Overall Growth Factor: {(enhanced_price_total_params + enhanced_dqn_total_params) / (original_price_total_params + original_dqn_total_params):.2f}x\n")
    # Print VRAM usage estimate (rough approximation)
    bytes_per_param = 4  # 4 bytes for float32
    original_vram_mb = (original_price_total_params + original_dqn_total_params) * bytes_per_param / (1024 * 1024)
    enhanced_vram_mb = (enhanced_price_total_params + enhanced_dqn_total_params) * bytes_per_param / (1024 * 1024)
    print("=== ESTIMATED VRAM USAGE ===")
    print(f"Original Models: {original_vram_mb:.2f} MB")
    print(f"Enhanced Models: {enhanced_vram_mb:.2f} MB")
    print(f"Available VRAM: 8,192 MB (8 GB)")
    print(f"VRAM Utilization: {enhanced_vram_mb / 8192 * 100:.2f}%\n")
    # Print detailed breakdown of enhanced models
    print("=== ENHANCED PRICE PREDICTION MODEL BREAKDOWN ===")
    # Group parameters by component
    timeframe_lstm_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "timeframe_lstms" in name)
    attention_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "attention" in name)
    fusion_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "fusion" in name)
    output_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if any(x in name for x in ["price_fc", "extrema_fc", "volume_fc"]))
    print(f"Timeframe LSTMs: {timeframe_lstm_params:,} parameters ({timeframe_lstm_params/enhanced_price_total_params*100:.1f}%)")
    print(f"Attention Mechanisms: {attention_params:,} parameters ({attention_params/enhanced_price_total_params*100:.1f}%)")
    print(f"Fusion Layer: {fusion_params:,} parameters ({fusion_params/enhanced_price_total_params*100:.1f}%)")
    print(f"Output Layers: {output_params:,} parameters ({output_params/enhanced_price_total_params*100:.1f}%)\n")
    print("=== ENHANCED DQN MODEL BREAKDOWN ===")
    # Group parameters by component
    feature_extraction_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "feature_extraction" in name)
    advantage_value_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "advantage_stream" in name or "value_stream" in name)
    transformer_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "transformer" in name)
    lstm_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "lstm" in name and "transformer" not in name)
    final_layers_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "final_layers" in name)
    market_regime_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "market_regime" in name)
    print(f"Feature Extraction: {feature_extraction_params:,} parameters ({feature_extraction_params/enhanced_dqn_total_params*100:.1f}%)")
    print(f"Advantage & Value Streams: {advantage_value_params:,} parameters ({advantage_value_params/enhanced_dqn_total_params*100:.1f}%)")
    print(f"Transformer: {transformer_params:,} parameters ({transformer_params/enhanced_dqn_total_params*100:.1f}%)")
    print(f"LSTM: {lstm_params:,} parameters ({lstm_params/enhanced_dqn_total_params*100:.1f}%)")
    print(f"Final Layers: {final_layers_params:,} parameters ({final_layers_params/enhanced_dqn_total_params*100:.1f}%)")
    print(f"Market Regime Classifier: {market_regime_params:,} parameters ({market_regime_params/enhanced_dqn_total_params*100:.1f}%)")
 # Keep the original models for comparison
 class PricePredictionModel(nn.Module):
    def __init__(self, input_dim=2, hidden_dim=128, num_layers=2, output_dim=5):
        super(PricePredictionModel, self).__init__()
@@ -102,106 +374,5 @@ class DQN(nn.Module):
        return q_values, lstm_hidden
 def count_parameters(model):
    total_params = 0
    layer_params = {}
    for name, param in model.named_parameters():
        if param.requires_grad:
            param_count = param.numel()
            total_params += param_count
            layer_params[name] = (param_count, param.shape)
    return total_params, layer_params
 def main():
    # Initialize the Price Prediction Model
    price_model = PricePredictionModel()
    price_total_params, price_layer_params = count_parameters(price_model)
    print(f"Price Prediction Model parameters: {price_total_params:,}")
    print("\nPrice Prediction Model Layers:")
    for name, (count, shape) in price_layer_params.items():
        print(f"{name}: {count:,} (shape: {shape})")
    # Initialize the DQN Model with typical dimensions
    state_dim = 50  # Typical state dimension for the trading bot
    action_dim = 3  # Typical action dimension (buy, sell, hold)
    dqn_model = DQN(state_dim=state_dim, action_dim=action_dim)
    dqn_total_params, dqn_layer_params = count_parameters(dqn_model)
    # Count parameters by category
    feature_extraction_params = sum(count for name, (count, _) in dqn_layer_params.items() if "feature_extraction" in name)
    advantage_value_params = sum(count for name, (count, _) in dqn_layer_params.items() if "advantage_stream" in name or "value_stream" in name)
    transformer_params = sum(count for name, (count, _) in dqn_layer_params.items() if "transformer" in name)
    lstm_params = sum(count for name, (count, _) in dqn_layer_params.items() if "lstm" in name and "transformer" not in name)
    final_layers_params = sum(count for name, (count, _) in dqn_layer_params.items() if "final_layers" in name)
    print(f"\nDQN Model parameters: {dqn_total_params:,}")
    # Create sets to track which parameters we've printed
    printed_params = set()
    # Print DQN layers in groups to avoid output truncation
    print(f"\nDQN Model Layers (Feature Extraction): {feature_extraction_params:,} parameters")
    for name, (count, shape) in dqn_layer_params.items():
        if "feature_extraction" in name:
            print(f"{name}: {count:,} (shape: {shape})")
            printed_params.add(name)
    print(f"\nDQN Model Layers (Advantage & Value Streams): {advantage_value_params:,} parameters")
    for name, (count, shape) in dqn_layer_params.items():
        if "advantage_stream" in name or "value_stream" in name:
            print(f"{name}: {count:,} (shape: {shape})")
            printed_params.add(name)
    print(f"\nDQN Model Layers (Transformer): {transformer_params:,} parameters")
    for name, (count, shape) in dqn_layer_params.items():
        if "transformer" in name:
            print(f"{name}: {count:,} (shape: {shape})")
            printed_params.add(name)
    print(f"\nDQN Model Layers (LSTM): {lstm_params:,} parameters")
    for name, (count, shape) in dqn_layer_params.items():
        if "lstm" in name and "transformer" not in name:
            print(f"{name}: {count:,} (shape: {shape})")
            printed_params.add(name)
    print(f"\nDQN Model Layers (Final Layers): {final_layers_params:,} parameters")
    for name, (count, shape) in dqn_layer_params.items():
        if "final_layers" in name:
            print(f"{name}: {count:,} (shape: {shape})")
            printed_params.add(name)
    # Print any remaining parameters that weren't caught by the categories above
    remaining_params = set(dqn_layer_params.keys()) - printed_params
    if remaining_params:
        remaining_params_count = sum(dqn_layer_params[name][0] for name in remaining_params)
        print(f"\nDQN Model Layers (Other): {remaining_params_count:,} parameters")
        for name in remaining_params:
            count, shape = dqn_layer_params[name]
            print(f"{name}: {count:,} (shape: {shape})")
    # Total parameters across both models
    print(f"\nTotal parameters (both models): {price_total_params + dqn_total_params:,}")
    # Print summary of parameter distribution
    print("\nParameter Distribution Summary:")
    print(f"Price Prediction Model: {price_total_params:,} parameters ({price_total_params/(price_total_params + dqn_total_params)*100:.1f}%)")
    print(f"DQN Model: {dqn_total_params:,} parameters ({dqn_total_params/(price_total_params + dqn_total_params)*100:.1f}%)")
    print("\nDQN Model Breakdown:")
    print(f"- Feature Extraction: {feature_extraction_params:,} parameters ({feature_extraction_params/dqn_total_params*100:.1f}%)")
    print(f"- Advantage & Value Streams: {advantage_value_params:,} parameters ({advantage_value_params/dqn_total_params*100:.1f}%)")
    print(f"- Transformer: {transformer_params:,} parameters ({transformer_params/dqn_total_params*100:.1f}%)")
    print(f"- LSTM: {lstm_params:,} parameters ({lstm_params/dqn_total_params*100:.1f}%)")
    print(f"- Final Layers: {final_layers_params:,} parameters ({final_layers_params/dqn_total_params*100:.1f}%)")
    # Verify that all parameters are accounted for
    total_by_category = feature_extraction_params + advantage_value_params + transformer_params + lstm_params + final_layers_params
    if remaining_params:
        total_by_category += remaining_params_count
    print(f"\nSum of all categories: {total_by_category:,} parameters")
    print(f"Difference from total: {dqn_total_params - total_by_category:,} parameters")
 if __name__ == "__main__":
    main() 
--- a/crypto/gogo2/enhanced_models.py
+++ b/crypto/gogo2/enhanced_models.py
@@ -0,0 +1,434 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn import TransformerEncoder, TransformerEncoderLayer
 class EnhancedPricePredictionModel(nn.Module):
    def __init__(self, input_dim=2, hidden_dim=256, num_layers=3, output_dim=5, num_timeframes=3):
        super(EnhancedPricePredictionModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.num_timeframes = num_timeframes
        # Separate LSTM for each timeframe
        self.timeframe_lstms = nn.ModuleList([
            nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
            for _ in range(num_timeframes)
        ])
        # Cross-timeframe attention
        self.cross_attention = nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
        # Self-attention for each timeframe
        self.self_attentions = nn.ModuleList([
            nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
            for _ in range(num_timeframes)
        ])
        # Timeframe fusion layer
        self.fusion_layer = nn.Sequential(
            nn.Linear(hidden_dim * num_timeframes, hidden_dim * 2),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim * 2, hidden_dim)
        )
        # Fully connected layer for price prediction
        self.price_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, output_dim)
        )
        # Fully connected layer for extrema prediction (high and low points)
        self.extrema_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, 10)  # 5 time steps, 2 classes (high/low) each
        )
        # Volume prediction layer
        self.volume_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x_list):
        # x_list is a list of tensors, one for each timeframe
        # Each x shape: (batch_size, seq_len, input_dim)
        # Process each timeframe with its own LSTM
        lstm_outputs = []
        for i, x in enumerate(x_list):
            lstm_out, _ = self.timeframe_lstms[i](x)  # lstm_out: (batch_size, seq_len, hidden_dim)
            lstm_outputs.append(lstm_out)
        # Apply self-attention to each timeframe
        attn_outputs = []
        for i, lstm_out in enumerate(lstm_outputs):
            attn_output, _ = self.self_attentions[i](lstm_out, lstm_out, lstm_out)
            attn_outputs.append(attn_output[:, -1, :])  # Use the last time step
        # Concatenate all timeframe representations
        combined = torch.cat(attn_outputs, dim=1)  # (batch_size, hidden_dim * num_timeframes)
        # Fuse timeframe information
        fused = self.fusion_layer(combined)  # (batch_size, hidden_dim)
        # Price prediction
        price_pred = self.price_fc(fused)
        # Extrema prediction
        extrema_logits = self.extrema_fc(fused)
        # Volume prediction
        volume_pred = self.volume_fc(fused)
        return price_pred, extrema_logits, volume_pred
 class EnhancedDQN(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_dim=512):
        super(EnhancedDQN, self).__init__()
        # Feature extraction layers with increased capacity
        self.feature_extraction = nn.Sequential(
            nn.Linear(state_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
        )
        # Advantage stream with increased capacity
        self.advantage_stream = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, action_dim)
        )
        # Value stream with increased capacity
        self.value_stream = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, 1)
        )
        # Enhanced transformer for temporal dependencies
        encoder_layers = TransformerEncoderLayer(
            d_model=hidden_dim, 
            nhead=8, 
            dim_feedforward=hidden_dim*4, 
            dropout=0.1,
            batch_first=True
        )
        self.transformer = TransformerEncoder(encoder_layers, num_layers=3)
        # LSTM for sequential decision making with increased capacity
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.1)
        # Final layers with increased capacity
        self.final_layers = nn.Sequential(
            nn.Linear(hidden_dim*2, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, action_dim)
        )
        # Market regime classification layer
        self.market_regime_classifier = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim // 2, 3)  # 3 regimes: trending, ranging, volatile
        )
    def forward(self, state, hidden=None):
        # Extract features
        features = self.feature_extraction(state)
        features = features.unsqueeze(1)  # Add sequence dimension for transformer/LSTM
        # Transformer processing
        transformer_out = self.transformer(features)
        # LSTM processing
        lstm_out, lstm_hidden = self.lstm(transformer_out)
        # Dueling architecture
        advantage = self.advantage_stream(features.squeeze(1))
        value = self.value_stream(features.squeeze(1))
        # Combine transformer, LSTM and dueling outputs
        combined = torch.cat([transformer_out.squeeze(1), lstm_out.squeeze(1)], dim=1)
        q_values = self.final_layers(combined)
        # Market regime classification
        market_regime = self.market_regime_classifier(transformer_out.squeeze(1))
        # Dueling Q-value computation
        dueling_q = value + advantage - advantage.mean(dim=1, keepdim=True)
        # Final Q-values are a weighted combination of the dueling Q-values and the direct Q-values
        # This allows the model to use either approach depending on the situation
        q_values = 0.5 * dueling_q + 0.5 * q_values
        return q_values, lstm_hidden, market_regime
 class EnhancedReplayBuffer:
    """Enhanced replay buffer with prioritized experience replay and n-step returns"""
    def __init__(self, capacity, alpha=0.6, beta=0.4, beta_increment=0.001, n_step=3, gamma=0.99):
        self.capacity = capacity
        self.buffer = []
        self.position = 0
        self.priorities = torch.zeros(capacity)
        self.alpha = alpha  # Priority exponent
        self.beta = beta  # Importance sampling weight
        self.beta_increment = beta_increment  # Beta annealing
        self.n_step = n_step  # n-step returns
        self.gamma = gamma  # Discount factor
        self.n_step_buffer = []
        self.max_priority = 1.0
    def push(self, state, action, reward, next_state, done):
        # Store experience in n-step buffer
        self.n_step_buffer.append((state, action, reward, next_state, done))
        # If we don't have enough experiences for n-step return, wait
        if len(self.n_step_buffer) < self.n_step and not done:
            return
        # Calculate n-step return
        reward_n = 0
        for i in range(self.n_step):
            if i >= len(self.n_step_buffer):
                break
            reward_n += self.gamma**i * self.n_step_buffer[i][2]
        # Get state, action from the first experience
        state = self.n_step_buffer[0][0]
        action = self.n_step_buffer[0][1]
        # Get next_state, done from the last experience
        next_state = self.n_step_buffer[-1][3]
        done = self.n_step_buffer[-1][4]
        # Store in replay buffer with max priority
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
        self.buffer[self.position] = (state, action, reward_n, next_state, done)
        # Set priority to max priority to ensure it gets sampled
        self.priorities[self.position] = self.max_priority
        # Move position pointer
        self.position = (self.position + 1) % self.capacity
        # Remove the first experience from n-step buffer
        self.n_step_buffer.pop(0)
        # If episode is done, clear n-step buffer
        if done:
            self.n_step_buffer = []
    def sample(self, batch_size):
        # Calculate sampling probabilities
        if len(self.buffer) < self.capacity:
            probs = self.priorities[:len(self.buffer)]
        else:
            probs = self.priorities
        # Normalize probabilities
        probs = probs ** self.alpha
        probs = probs / probs.sum()
        # Sample indices based on priorities
        indices = torch.multinomial(probs, batch_size, replacement=True)
        # Get samples
        states = []
        actions = []
        rewards = []
        next_states = []
        dones = []
        # Calculate importance sampling weights
        weights = (len(self.buffer) * probs[indices]) ** (-self.beta)
        weights = weights / weights.max()
        self.beta = min(1.0, self.beta + self.beta_increment)  # Anneal beta
        # Get experiences
        for idx in indices:
            state, action, reward, next_state, done = self.buffer[idx]
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            next_states.append(next_state)
            dones.append(done)
        return (
            torch.stack(states),
            torch.tensor(actions),
            torch.tensor(rewards, dtype=torch.float32),
            torch.stack(next_states),
            torch.tensor(dones, dtype=torch.float32),
            indices,
            weights
        )
    def update_priorities(self, indices, td_errors):
        for idx, td_error in zip(indices, td_errors):
            # Update priority based on TD error
            priority = abs(td_error) + 1e-5  # Small constant to ensure non-zero priority
            self.priorities[idx] = priority
            self.max_priority = max(self.max_priority, priority)
    def __len__(self):
        return len(self.buffer)
 def train_price_predictor(model, data_loaders, optimizer, device, epochs=10):
    """
    Train the price prediction model using data from multiple timeframes
    Args:
        model: The EnhancedPricePredictionModel
        data_loaders: List of DataLoader objects, one for each timeframe
        optimizer: Optimizer for training
        device: Device to train on (CPU or GPU)
        epochs: Number of training epochs
    """
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        num_batches = 0
        # Assume all dataloaders have the same length
        for batch_idx, batch_data in enumerate(zip(*data_loaders)):
            # Each batch_data is a tuple of (inputs, price_targets, extrema_targets, volume_targets) for each timeframe
            optimizer.zero_grad()
            # Prepare inputs for each timeframe
            inputs_list = [data[0].to(device) for data in batch_data]
            price_targets = batch_data[0][1].to(device)  # Use targets from the first timeframe (e.g., 1m)
            extrema_targets = batch_data[0][2].to(device)
            volume_targets = batch_data[0][3].to(device)
            # Forward pass
            price_pred, extrema_logits, volume_pred = model(inputs_list)
            # Calculate losses
            price_loss = F.mse_loss(price_pred, price_targets)
            extrema_loss = F.binary_cross_entropy_with_logits(extrema_logits, extrema_targets)
            volume_loss = F.mse_loss(volume_pred, volume_targets)
            # Combined loss with weighting
            loss = price_loss + 0.5 * extrema_loss + 0.3 * volume_loss
            # Backward pass
            loss.backward()
            # Gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
            num_batches += 1
            if batch_idx % 100 == 0:
                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, Loss: {loss.item():.6f}")
        avg_loss = total_loss / num_batches
        print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.6f}")
        # Learning rate scheduling
        if epoch > 0 and epoch % 5 == 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9
    return model
 def prepare_multi_timeframe_data(exchange, timeframes=['1m', '15m', '1h'], lookback=30):
    """
    Prepare data from multiple timeframes for training
    Args:
        exchange: Exchange object to fetch data from
        timeframes: List of timeframes to fetch
        lookback: Number of candles to look back
    Returns:
        List of DataLoader objects, one for each timeframe
    """
    data_loaders = []
    for timeframe in timeframes:
        # Fetch historical data for this timeframe
        candles = exchange.fetch_ohlcv(timeframe=timeframe, limit=1000)
        # Prepare inputs and targets
        inputs = []
        price_targets = []
        extrema_targets = []
        volume_targets = []
        for i in range(lookback, len(candles) - 5):
            # Input: lookback candles (price and volume)
            input_data = torch.tensor([
                [candle[4], candle[5]] for candle in candles[i-lookback:i]
            ], dtype=torch.float32)
            # Target: next 5 candles (price)
            price_target = torch.tensor([
                candle[4] for candle in candles[i:i+5]
            ], dtype=torch.float32)
            # Target: extrema points in next 5 candles
            extrema_target = torch.zeros(10, dtype=torch.float32)  # 5 time steps, 2 classes each
            for j in range(5):
                # Simple extrema detection for demonstration
                if j > 0 and j < 4:
                    # Local high
                    if candles[i+j][2] > candles[i+j-1][2] and candles[i+j][2] > candles[i+j+1][2]:
                        extrema_target[j*2] = 1.0
                    # Local low
                    if candles[i+j][3] < candles[i+j-1][3] and candles[i+j][3] < candles[i+j+1][3]:
                        extrema_target[j*2+1] = 1.0
            # Target: volume for next 5 candles
            volume_target = torch.tensor([
                candle[5] for candle in candles[i:i+5]
            ], dtype=torch.float32)
            inputs.append(input_data)
            price_targets.append(price_target)
            extrema_targets.append(extrema_target)
            volume_targets.append(volume_target)
        # Create dataset and dataloader
        dataset = torch.utils.data.TensorDataset(
            torch.stack(inputs),
            torch.stack(price_targets),
            torch.stack(extrema_targets),
            torch.stack(volume_targets)
        )
        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=32, shuffle=True
        )
        data_loaders.append(data_loader)
    return data_loaders 
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py