wip enhanced multitimeframe model

2025-03-12 01:46:48 +02:00
parent ad559d8c61
commit 506458d55e
5 changed files with 1972 additions and 1498 deletions
--- a/crypto/gogo2/TradingEnvironment
+++ b/crypto/gogo2/TradingEnvironment
@ -0,0 +1,42 @@
+def step(self, action):
+    """Take an action in the environment and return the next state, reward, and done flag"""
+    # Store current price before taking action
+    self.current_price = self.data[self.current_step]['close']
+    
+    # Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
+    if not self.demo and self.trading_client:
+        # Execute real trades in live mode
+        asyncio.create_task(self._execute_live_action(action))
+    
+    # Calculate reward (simulation still runs in parallel with live trading)
+    reward, _ = self.calculate_reward(action)  # Unpack the tuple here
+    
+    # Check for stop loss / take profit hits
+    self.check_sl_tp()
+    
+    # Move to next step
+    self.current_step += 1
+    done = self.current_step >= len(self.data) - 1
+    
+    # Get new state
+    next_state = self.get_state()
+    
+    return next_state, reward, done 
+
+def calculate_reward(self, action):
+    """Calculate the reward for the current action."""
+    # ... (existing code)
+
+    # Combine all reward components
+    reward = pnl_reward + timing_reward + risk_reward + prediction_reward
+    
+    # Log components for analysis
+    info = {
+        'pnl_reward': pnl_reward,
+        'timing_reward': timing_reward,
+        'risk_reward': risk_reward,
+        'prediction_reward': prediction_reward,
+        'total_reward': reward
+    }
+    
+    return reward  # Return only the reward, not the info dictionary 
--- a/crypto/gogo2/_notes.md
+++ b/crypto/gogo2/_notes.md
@ -38,3 +38,26 @@ it seems the model is not learning a lot. we keep hovering about the same starti

 it seems we may need another NN model down the loop jut to predict the extremums of the price.  
 we may have to include  a mechanism to calculate the extremums of the price retrospectively and to use that to bootstrap pre-train the model. 
+
+
+
+
+
+Why Performance Might Be Stagnating
+Several factors could explain why the model isn't improving significantly during training:
+Insufficient Model Capacity for Price Prediction: While the price prediction model has 267,663 parameters, financial time series prediction is extremely challenging. The market may have patterns that are too complex or too random for the current model to capture effectively.
+Overfitting to Training Data: The model might be memorizing patterns in the training data that don't generalize to new market conditions.
+Transformer-LSTM Redundancy in DQN: Your DQN model uses both a transformer and an LSTM, which might be redundant. Both are designed to capture sequential dependencies, and having both could lead to overfitting or training instability.
+Imbalanced Parameter Distribution: 64.5% of your DQN parameters are in the transformer component, which might be excessive for the task.
+Reward Function Issues: The reward function might not be properly aligned with profitable trading strategies, or it might be too sparse to provide meaningful learning signals.
+Suggested Improvements
+1. Enhance Price Prediction Training
+2. Simplify the DQN Architecture
+Consider creating a more streamlined DQN model:
+3. Improve the Reward Function
+Make sure your reward function provides meaningful signals for learning:
+4. Implement Curriculum Learning
+Start with simpler market conditions and gradually increase complexity:
+Conclusion
+The issue appears to be a combination of model complexity, potential overfitting, and possibly insufficient learning signals from the reward function. By simplifying the DQN architecture (particularly reducing the transformer component), improving the price prediction training, and enhancing the reward function, you should see better learning progress.
+Would you like me to implement any of these specific improvements to your codebase? 
--- a/crypto/gogo2/count_params.py
+++ b/crypto/gogo2/count_params.py
@ -3,6 +3,278 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn import TransformerEncoder, TransformerEncoderLayer

+class EnhancedPricePredictionModel(nn.Module):
+    def __init__(self, input_dim=2, hidden_dim=256, num_layers=3, output_dim=5, num_timeframes=3):
+        super(EnhancedPricePredictionModel, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.num_timeframes = num_timeframes
+        
+        # Separate LSTM for each timeframe
+        self.timeframe_lstms = nn.ModuleList([
+            nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
+            for _ in range(num_timeframes)
+        ])
+        
+        # Cross-timeframe attention
+        self.cross_attention = nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
+        
+        # Self-attention for each timeframe
+        self.self_attentions = nn.ModuleList([
+            nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
+            for _ in range(num_timeframes)
+        ])
+        
+        # Timeframe fusion layer
+        self.fusion_layer = nn.Sequential(
+            nn.Linear(hidden_dim * num_timeframes, hidden_dim * 2),
+            nn.LeakyReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(hidden_dim * 2, hidden_dim)
+        )
+        
+        # Fully connected layer for price prediction
+        self.price_fc = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, output_dim)
+        )
+        
+        # Fully connected layer for extrema prediction (high and low points)
+        self.extrema_fc = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, 10)  # 5 time steps, 2 classes (high/low) each
+        )
+        
+        # Volume prediction layer
+        self.volume_fc = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, output_dim)
+        )
+        
+    def forward(self, x_list):
+        # x_list is a list of tensors, one for each timeframe
+        # Each x shape: (batch_size, seq_len, input_dim)
+        
+        # Process each timeframe with its own LSTM
+        lstm_outputs = []
+        for i, x in enumerate(x_list):
+            lstm_out, _ = self.timeframe_lstms[i](x)  # lstm_out: (batch_size, seq_len, hidden_dim)
+            lstm_outputs.append(lstm_out)
+        
+        # Apply self-attention to each timeframe
+        attn_outputs = []
+        for i, lstm_out in enumerate(lstm_outputs):
+            attn_output, _ = self.self_attentions[i](lstm_out, lstm_out, lstm_out)
+            attn_outputs.append(attn_output[:, -1, :])  # Use the last time step
+        
+        # Concatenate all timeframe representations
+        combined = torch.cat(attn_outputs, dim=1)  # (batch_size, hidden_dim * num_timeframes)
+        
+        # Fuse timeframe information
+        fused = self.fusion_layer(combined)  # (batch_size, hidden_dim)
+        
+        # Price prediction
+        price_pred = self.price_fc(fused)
+        
+        # Extrema prediction
+        extrema_logits = self.extrema_fc(fused)
+        
+        # Volume prediction
+        volume_pred = self.volume_fc(fused)
+        
+        return price_pred, extrema_logits, volume_pred
+
+class EnhancedDQN(nn.Module):
+    def __init__(self, state_dim, action_dim, hidden_dim=512):
+        super(EnhancedDQN, self).__init__()
+        
+        # Feature extraction layers with increased capacity
+        self.feature_extraction = nn.Sequential(
+            nn.Linear(state_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+        )
+        
+        # Advantage stream with increased capacity
+        self.advantage_stream = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, action_dim)
+        )
+        
+        # Value stream with increased capacity
+        self.value_stream = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, 1)
+        )
+        
+        # Enhanced transformer for temporal dependencies
+        encoder_layers = TransformerEncoderLayer(
+            d_model=hidden_dim, 
+            nhead=8, 
+            dim_feedforward=hidden_dim*4, 
+            dropout=0.1,
+            batch_first=True
+        )
+        self.transformer = TransformerEncoder(encoder_layers, num_layers=3)
+        
+        # LSTM for sequential decision making with increased capacity
+        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.1)
+        
+        # Final layers with increased capacity
+        self.final_layers = nn.Sequential(
+            nn.Linear(hidden_dim*2, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, action_dim)
+        )
+        
+        # Market regime classification layer
+        self.market_regime_classifier = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, 3)  # 3 regimes: trending, ranging, volatile
+        )
+        
+    def forward(self, state, hidden=None):
+        # Extract features
+        features = self.feature_extraction(state)
+        features = features.unsqueeze(1)  # Add sequence dimension for transformer/LSTM
+        
+        # Transformer processing
+        transformer_out = self.transformer(features)
+        
+        # LSTM processing
+        lstm_out, lstm_hidden = self.lstm(transformer_out)
+        
+        # Dueling architecture
+        advantage = self.advantage_stream(features.squeeze(1))
+        value = self.value_stream(features.squeeze(1))
+        
+        # Combine transformer, LSTM and dueling outputs
+        combined = torch.cat([transformer_out.squeeze(1), lstm_out.squeeze(1)], dim=1)
+        q_values = self.final_layers(combined)
+        
+        # Market regime classification
+        market_regime = self.market_regime_classifier(transformer_out.squeeze(1))
+        
+        # Dueling Q-value computation
+        dueling_q = value + advantage - advantage.mean(dim=1, keepdim=True)
+        
+        # Final Q-values are a weighted combination of the dueling Q-values and the direct Q-values
+        # This allows the model to use either approach depending on the situation
+        q_values = 0.5 * dueling_q + 0.5 * q_values
+        
+        return q_values, lstm_hidden, market_regime
+
+def count_parameters(model):
+    total_params = 0
+    layer_params = {}
+    
+    for name, param in model.named_parameters():
+        if param.requires_grad:
+            param_count = param.numel()
+            total_params += param_count
+            layer_params[name] = (param_count, param.shape)
+    
+    return total_params, layer_params
+
+def main():
+    # Initialize the original models for comparison
+    original_price_model = PricePredictionModel()
+    original_price_total_params, _ = count_parameters(original_price_model)
+    
+    state_dim = 50
+    action_dim = 3
+    original_dqn_model = DQN(state_dim=state_dim, action_dim=action_dim)
+    original_dqn_total_params, _ = count_parameters(original_dqn_model)
+    
+    # Initialize the enhanced models
+    enhanced_price_model = EnhancedPricePredictionModel(num_timeframes=3)
+    enhanced_price_total_params, enhanced_price_layer_params = count_parameters(enhanced_price_model)
+    
+    # Increase state dimension to accommodate multiple timeframes
+    enhanced_state_dim = 100  # Increased from 50 to accommodate more features
+    enhanced_dqn_model = EnhancedDQN(state_dim=enhanced_state_dim, action_dim=action_dim)
+    enhanced_dqn_total_params, enhanced_dqn_layer_params = count_parameters(enhanced_dqn_model)
+    
+    # Print comparison
+    print("=== MODEL SIZE COMPARISON ===")
+    print(f"Original Price Prediction Model: {original_price_total_params:,} parameters")
+    print(f"Enhanced Price Prediction Model: {enhanced_price_total_params:,} parameters")
+    print(f"Growth Factor: {enhanced_price_total_params / original_price_total_params:.2f}x\n")
+    
+    print(f"Original DQN Model: {original_dqn_total_params:,} parameters")
+    print(f"Enhanced DQN Model: {enhanced_dqn_total_params:,} parameters")
+    print(f"Growth Factor: {enhanced_dqn_total_params / original_dqn_total_params:.2f}x\n")
+    
+    print(f"Total Original Models: {original_price_total_params + original_dqn_total_params:,} parameters")
+    print(f"Total Enhanced Models: {enhanced_price_total_params + enhanced_dqn_total_params:,} parameters")
+    print(f"Overall Growth Factor: {(enhanced_price_total_params + enhanced_dqn_total_params) / (original_price_total_params + original_dqn_total_params):.2f}x\n")
+    
+    # Print VRAM usage estimate (rough approximation)
+    bytes_per_param = 4  # 4 bytes for float32
+    original_vram_mb = (original_price_total_params + original_dqn_total_params) * bytes_per_param / (1024 * 1024)
+    enhanced_vram_mb = (enhanced_price_total_params + enhanced_dqn_total_params) * bytes_per_param / (1024 * 1024)
+    
+    print("=== ESTIMATED VRAM USAGE ===")
+    print(f"Original Models: {original_vram_mb:.2f} MB")
+    print(f"Enhanced Models: {enhanced_vram_mb:.2f} MB")
+    print(f"Available VRAM: 8,192 MB (8 GB)")
+    print(f"VRAM Utilization: {enhanced_vram_mb / 8192 * 100:.2f}%\n")
+    
+    # Print detailed breakdown of enhanced models
+    print("=== ENHANCED PRICE PREDICTION MODEL BREAKDOWN ===")
+    
+    # Group parameters by component
+    timeframe_lstm_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "timeframe_lstms" in name)
+    attention_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "attention" in name)
+    fusion_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "fusion" in name)
+    output_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if any(x in name for x in ["price_fc", "extrema_fc", "volume_fc"]))
+    
+    print(f"Timeframe LSTMs: {timeframe_lstm_params:,} parameters ({timeframe_lstm_params/enhanced_price_total_params*100:.1f}%)")
+    print(f"Attention Mechanisms: {attention_params:,} parameters ({attention_params/enhanced_price_total_params*100:.1f}%)")
+    print(f"Fusion Layer: {fusion_params:,} parameters ({fusion_params/enhanced_price_total_params*100:.1f}%)")
+    print(f"Output Layers: {output_params:,} parameters ({output_params/enhanced_price_total_params*100:.1f}%)\n")
+    
+    print("=== ENHANCED DQN MODEL BREAKDOWN ===")
+    
+    # Group parameters by component
+    feature_extraction_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "feature_extraction" in name)
+    advantage_value_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "advantage_stream" in name or "value_stream" in name)
+    transformer_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "transformer" in name)
+    lstm_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "lstm" in name and "transformer" not in name)
+    final_layers_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "final_layers" in name)
+    market_regime_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "market_regime" in name)
+    
+    print(f"Feature Extraction: {feature_extraction_params:,} parameters ({feature_extraction_params/enhanced_dqn_total_params*100:.1f}%)")
+    print(f"Advantage & Value Streams: {advantage_value_params:,} parameters ({advantage_value_params/enhanced_dqn_total_params*100:.1f}%)")
+    print(f"Transformer: {transformer_params:,} parameters ({transformer_params/enhanced_dqn_total_params*100:.1f}%)")
+    print(f"LSTM: {lstm_params:,} parameters ({lstm_params/enhanced_dqn_total_params*100:.1f}%)")
+    print(f"Final Layers: {final_layers_params:,} parameters ({final_layers_params/enhanced_dqn_total_params*100:.1f}%)")
+    print(f"Market Regime Classifier: {market_regime_params:,} parameters ({market_regime_params/enhanced_dqn_total_params*100:.1f}%)")
+
+# Keep the original models for comparison
 class PricePredictionModel(nn.Module):
    def __init__(self, input_dim=2, hidden_dim=128, num_layers=2, output_dim=5):
        super(PricePredictionModel, self).__init__()
@ -102,106 +374,5 @@ class DQN(nn.Module):
        
        return q_values, lstm_hidden

-def count_parameters(model):
-    total_params = 0
-    layer_params = {}
-    
-    for name, param in model.named_parameters():
-        if param.requires_grad:
-            param_count = param.numel()
-            total_params += param_count
-            layer_params[name] = (param_count, param.shape)
-    
-    return total_params, layer_params
-
-def main():
-    # Initialize the Price Prediction Model
-    price_model = PricePredictionModel()
-    price_total_params, price_layer_params = count_parameters(price_model)
-    
-    print(f"Price Prediction Model parameters: {price_total_params:,}")
-    print("\nPrice Prediction Model Layers:")
-    for name, (count, shape) in price_layer_params.items():
-        print(f"{name}: {count:,} (shape: {shape})")
-    
-    # Initialize the DQN Model with typical dimensions
-    state_dim = 50  # Typical state dimension for the trading bot
-    action_dim = 3  # Typical action dimension (buy, sell, hold)
-    dqn_model = DQN(state_dim=state_dim, action_dim=action_dim)
-    dqn_total_params, dqn_layer_params = count_parameters(dqn_model)
-    
-    # Count parameters by category
-    feature_extraction_params = sum(count for name, (count, _) in dqn_layer_params.items() if "feature_extraction" in name)
-    advantage_value_params = sum(count for name, (count, _) in dqn_layer_params.items() if "advantage_stream" in name or "value_stream" in name)
-    transformer_params = sum(count for name, (count, _) in dqn_layer_params.items() if "transformer" in name)
-    lstm_params = sum(count for name, (count, _) in dqn_layer_params.items() if "lstm" in name and "transformer" not in name)
-    final_layers_params = sum(count for name, (count, _) in dqn_layer_params.items() if "final_layers" in name)
-    
-    print(f"\nDQN Model parameters: {dqn_total_params:,}")
-    
-    # Create sets to track which parameters we've printed
-    printed_params = set()
-    
-    # Print DQN layers in groups to avoid output truncation
-    print(f"\nDQN Model Layers (Feature Extraction): {feature_extraction_params:,} parameters")
-    for name, (count, shape) in dqn_layer_params.items():
-        if "feature_extraction" in name:
-            print(f"{name}: {count:,} (shape: {shape})")
-            printed_params.add(name)
-    
-    print(f"\nDQN Model Layers (Advantage & Value Streams): {advantage_value_params:,} parameters")
-    for name, (count, shape) in dqn_layer_params.items():
-        if "advantage_stream" in name or "value_stream" in name:
-            print(f"{name}: {count:,} (shape: {shape})")
-            printed_params.add(name)
-    
-    print(f"\nDQN Model Layers (Transformer): {transformer_params:,} parameters")
-    for name, (count, shape) in dqn_layer_params.items():
-        if "transformer" in name:
-            print(f"{name}: {count:,} (shape: {shape})")
-            printed_params.add(name)
-    
-    print(f"\nDQN Model Layers (LSTM): {lstm_params:,} parameters")
-    for name, (count, shape) in dqn_layer_params.items():
-        if "lstm" in name and "transformer" not in name:
-            print(f"{name}: {count:,} (shape: {shape})")
-            printed_params.add(name)
-    
-    print(f"\nDQN Model Layers (Final Layers): {final_layers_params:,} parameters")
-    for name, (count, shape) in dqn_layer_params.items():
-        if "final_layers" in name:
-            print(f"{name}: {count:,} (shape: {shape})")
-            printed_params.add(name)
-    
-    # Print any remaining parameters that weren't caught by the categories above
-    remaining_params = set(dqn_layer_params.keys()) - printed_params
-    if remaining_params:
-        remaining_params_count = sum(dqn_layer_params[name][0] for name in remaining_params)
-        print(f"\nDQN Model Layers (Other): {remaining_params_count:,} parameters")
-        for name in remaining_params:
-            count, shape = dqn_layer_params[name]
-            print(f"{name}: {count:,} (shape: {shape})")
-    
-    # Total parameters across both models
-    print(f"\nTotal parameters (both models): {price_total_params + dqn_total_params:,}")
-    
-    # Print summary of parameter distribution
-    print("\nParameter Distribution Summary:")
-    print(f"Price Prediction Model: {price_total_params:,} parameters ({price_total_params/(price_total_params + dqn_total_params)*100:.1f}%)")
-    print(f"DQN Model: {dqn_total_params:,} parameters ({dqn_total_params/(price_total_params + dqn_total_params)*100:.1f}%)")
-    print("\nDQN Model Breakdown:")
-    print(f"- Feature Extraction: {feature_extraction_params:,} parameters ({feature_extraction_params/dqn_total_params*100:.1f}%)")
-    print(f"- Advantage & Value Streams: {advantage_value_params:,} parameters ({advantage_value_params/dqn_total_params*100:.1f}%)")
-    print(f"- Transformer: {transformer_params:,} parameters ({transformer_params/dqn_total_params*100:.1f}%)")
-    print(f"- LSTM: {lstm_params:,} parameters ({lstm_params/dqn_total_params*100:.1f}%)")
-    print(f"- Final Layers: {final_layers_params:,} parameters ({final_layers_params/dqn_total_params*100:.1f}%)")
-    
-    # Verify that all parameters are accounted for
-    total_by_category = feature_extraction_params + advantage_value_params + transformer_params + lstm_params + final_layers_params
-    if remaining_params:
-        total_by_category += remaining_params_count
-    print(f"\nSum of all categories: {total_by_category:,} parameters")
-    print(f"Difference from total: {dqn_total_params - total_by_category:,} parameters")
-
 if __name__ == "__main__":
    main() 
--- a/crypto/gogo2/enhanced_models.py
+++ b/crypto/gogo2/enhanced_models.py
@ -0,0 +1,434 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import TransformerEncoder, TransformerEncoderLayer
+
+class EnhancedPricePredictionModel(nn.Module):
+    def __init__(self, input_dim=2, hidden_dim=256, num_layers=3, output_dim=5, num_timeframes=3):
+        super(EnhancedPricePredictionModel, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.num_timeframes = num_timeframes
+        
+        # Separate LSTM for each timeframe
+        self.timeframe_lstms = nn.ModuleList([
+            nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
+            for _ in range(num_timeframes)
+        ])
+        
+        # Cross-timeframe attention
+        self.cross_attention = nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
+        
+        # Self-attention for each timeframe
+        self.self_attentions = nn.ModuleList([
+            nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
+            for _ in range(num_timeframes)
+        ])
+        
+        # Timeframe fusion layer
+        self.fusion_layer = nn.Sequential(
+            nn.Linear(hidden_dim * num_timeframes, hidden_dim * 2),
+            nn.LeakyReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(hidden_dim * 2, hidden_dim)
+        )
+        
+        # Fully connected layer for price prediction
+        self.price_fc = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, output_dim)
+        )
+        
+        # Fully connected layer for extrema prediction (high and low points)
+        self.extrema_fc = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, 10)  # 5 time steps, 2 classes (high/low) each
+        )
+        
+        # Volume prediction layer
+        self.volume_fc = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, output_dim)
+        )
+        
+    def forward(self, x_list):
+        # x_list is a list of tensors, one for each timeframe
+        # Each x shape: (batch_size, seq_len, input_dim)
+        
+        # Process each timeframe with its own LSTM
+        lstm_outputs = []
+        for i, x in enumerate(x_list):
+            lstm_out, _ = self.timeframe_lstms[i](x)  # lstm_out: (batch_size, seq_len, hidden_dim)
+            lstm_outputs.append(lstm_out)
+        
+        # Apply self-attention to each timeframe
+        attn_outputs = []
+        for i, lstm_out in enumerate(lstm_outputs):
+            attn_output, _ = self.self_attentions[i](lstm_out, lstm_out, lstm_out)
+            attn_outputs.append(attn_output[:, -1, :])  # Use the last time step
+        
+        # Concatenate all timeframe representations
+        combined = torch.cat(attn_outputs, dim=1)  # (batch_size, hidden_dim * num_timeframes)
+        
+        # Fuse timeframe information
+        fused = self.fusion_layer(combined)  # (batch_size, hidden_dim)
+        
+        # Price prediction
+        price_pred = self.price_fc(fused)
+        
+        # Extrema prediction
+        extrema_logits = self.extrema_fc(fused)
+        
+        # Volume prediction
+        volume_pred = self.volume_fc(fused)
+        
+        return price_pred, extrema_logits, volume_pred
+
+class EnhancedDQN(nn.Module):
+    def __init__(self, state_dim, action_dim, hidden_dim=512):
+        super(EnhancedDQN, self).__init__()
+        
+        # Feature extraction layers with increased capacity
+        self.feature_extraction = nn.Sequential(
+            nn.Linear(state_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+        )
+        
+        # Advantage stream with increased capacity
+        self.advantage_stream = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, action_dim)
+        )
+        
+        # Value stream with increased capacity
+        self.value_stream = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, 1)
+        )
+        
+        # Enhanced transformer for temporal dependencies
+        encoder_layers = TransformerEncoderLayer(
+            d_model=hidden_dim, 
+            nhead=8, 
+            dim_feedforward=hidden_dim*4, 
+            dropout=0.1,
+            batch_first=True
+        )
+        self.transformer = TransformerEncoder(encoder_layers, num_layers=3)
+        
+        # LSTM for sequential decision making with increased capacity
+        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.1)
+        
+        # Final layers with increased capacity
+        self.final_layers = nn.Sequential(
+            nn.Linear(hidden_dim*2, hidden_dim),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, action_dim)
+        )
+        
+        # Market regime classification layer
+        self.market_regime_classifier = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim // 2),
+            nn.LeakyReLU(),
+            nn.Linear(hidden_dim // 2, 3)  # 3 regimes: trending, ranging, volatile
+        )
+        
+    def forward(self, state, hidden=None):
+        # Extract features
+        features = self.feature_extraction(state)
+        features = features.unsqueeze(1)  # Add sequence dimension for transformer/LSTM
+        
+        # Transformer processing
+        transformer_out = self.transformer(features)
+        
+        # LSTM processing
+        lstm_out, lstm_hidden = self.lstm(transformer_out)
+        
+        # Dueling architecture
+        advantage = self.advantage_stream(features.squeeze(1))
+        value = self.value_stream(features.squeeze(1))
+        
+        # Combine transformer, LSTM and dueling outputs
+        combined = torch.cat([transformer_out.squeeze(1), lstm_out.squeeze(1)], dim=1)
+        q_values = self.final_layers(combined)
+        
+        # Market regime classification
+        market_regime = self.market_regime_classifier(transformer_out.squeeze(1))
+        
+        # Dueling Q-value computation
+        dueling_q = value + advantage - advantage.mean(dim=1, keepdim=True)
+        
+        # Final Q-values are a weighted combination of the dueling Q-values and the direct Q-values
+        # This allows the model to use either approach depending on the situation
+        q_values = 0.5 * dueling_q + 0.5 * q_values
+        
+        return q_values, lstm_hidden, market_regime
+
+class EnhancedReplayBuffer:
+    """Enhanced replay buffer with prioritized experience replay and n-step returns"""
+    def __init__(self, capacity, alpha=0.6, beta=0.4, beta_increment=0.001, n_step=3, gamma=0.99):
+        self.capacity = capacity
+        self.buffer = []
+        self.position = 0
+        self.priorities = torch.zeros(capacity)
+        self.alpha = alpha  # Priority exponent
+        self.beta = beta  # Importance sampling weight
+        self.beta_increment = beta_increment  # Beta annealing
+        self.n_step = n_step  # n-step returns
+        self.gamma = gamma  # Discount factor
+        self.n_step_buffer = []
+        self.max_priority = 1.0
+        
+    def push(self, state, action, reward, next_state, done):
+        # Store experience in n-step buffer
+        self.n_step_buffer.append((state, action, reward, next_state, done))
+        
+        # If we don't have enough experiences for n-step return, wait
+        if len(self.n_step_buffer) < self.n_step and not done:
+            return
+            
+        # Calculate n-step return
+        reward_n = 0
+        for i in range(self.n_step):
+            if i >= len(self.n_step_buffer):
+                break
+            reward_n += self.gamma**i * self.n_step_buffer[i][2]
+            
+        # Get state, action from the first experience
+        state = self.n_step_buffer[0][0]
+        action = self.n_step_buffer[0][1]
+        
+        # Get next_state, done from the last experience
+        next_state = self.n_step_buffer[-1][3]
+        done = self.n_step_buffer[-1][4]
+        
+        # Store in replay buffer with max priority
+        if len(self.buffer) < self.capacity:
+            self.buffer.append(None)
+        self.buffer[self.position] = (state, action, reward_n, next_state, done)
+        
+        # Set priority to max priority to ensure it gets sampled
+        self.priorities[self.position] = self.max_priority
+        
+        # Move position pointer
+        self.position = (self.position + 1) % self.capacity
+        
+        # Remove the first experience from n-step buffer
+        self.n_step_buffer.pop(0)
+        
+        # If episode is done, clear n-step buffer
+        if done:
+            self.n_step_buffer = []
+    
+    def sample(self, batch_size):
+        # Calculate sampling probabilities
+        if len(self.buffer) < self.capacity:
+            probs = self.priorities[:len(self.buffer)]
+        else:
+            probs = self.priorities
+            
+        # Normalize probabilities
+        probs = probs ** self.alpha
+        probs = probs / probs.sum()
+        
+        # Sample indices based on priorities
+        indices = torch.multinomial(probs, batch_size, replacement=True)
+        
+        # Get samples
+        states = []
+        actions = []
+        rewards = []
+        next_states = []
+        dones = []
+        
+        # Calculate importance sampling weights
+        weights = (len(self.buffer) * probs[indices]) ** (-self.beta)
+        weights = weights / weights.max()
+        self.beta = min(1.0, self.beta + self.beta_increment)  # Anneal beta
+        
+        # Get experiences
+        for idx in indices:
+            state, action, reward, next_state, done = self.buffer[idx]
+            states.append(state)
+            actions.append(action)
+            rewards.append(reward)
+            next_states.append(next_state)
+            dones.append(done)
+            
+        return (
+            torch.stack(states),
+            torch.tensor(actions),
+            torch.tensor(rewards, dtype=torch.float32),
+            torch.stack(next_states),
+            torch.tensor(dones, dtype=torch.float32),
+            indices,
+            weights
+        )
+    
+    def update_priorities(self, indices, td_errors):
+        for idx, td_error in zip(indices, td_errors):
+            # Update priority based on TD error
+            priority = abs(td_error) + 1e-5  # Small constant to ensure non-zero priority
+            self.priorities[idx] = priority
+            self.max_priority = max(self.max_priority, priority)
+    
+    def __len__(self):
+        return len(self.buffer)
+
+def train_price_predictor(model, data_loaders, optimizer, device, epochs=10):
+    """
+    Train the price prediction model using data from multiple timeframes
+    
+    Args:
+        model: The EnhancedPricePredictionModel
+        data_loaders: List of DataLoader objects, one for each timeframe
+        optimizer: Optimizer for training
+        device: Device to train on (CPU or GPU)
+        epochs: Number of training epochs
+    """
+    model.train()
+    for epoch in range(epochs):
+        total_loss = 0
+        num_batches = 0
+        
+        # Assume all dataloaders have the same length
+        for batch_idx, batch_data in enumerate(zip(*data_loaders)):
+            # Each batch_data is a tuple of (inputs, price_targets, extrema_targets, volume_targets) for each timeframe
+            optimizer.zero_grad()
+            
+            # Prepare inputs for each timeframe
+            inputs_list = [data[0].to(device) for data in batch_data]
+            price_targets = batch_data[0][1].to(device)  # Use targets from the first timeframe (e.g., 1m)
+            extrema_targets = batch_data[0][2].to(device)
+            volume_targets = batch_data[0][3].to(device)
+            
+            # Forward pass
+            price_pred, extrema_logits, volume_pred = model(inputs_list)
+            
+            # Calculate losses
+            price_loss = F.mse_loss(price_pred, price_targets)
+            extrema_loss = F.binary_cross_entropy_with_logits(extrema_logits, extrema_targets)
+            volume_loss = F.mse_loss(volume_pred, volume_targets)
+            
+            # Combined loss with weighting
+            loss = price_loss + 0.5 * extrema_loss + 0.3 * volume_loss
+            
+            # Backward pass
+            loss.backward()
+            
+            # Gradient clipping to prevent exploding gradients
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            
+            optimizer.step()
+            
+            total_loss += loss.item()
+            num_batches += 1
+            
+            if batch_idx % 100 == 0:
+                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, Loss: {loss.item():.6f}")
+        
+        avg_loss = total_loss / num_batches
+        print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.6f}")
+        
+        # Learning rate scheduling
+        if epoch > 0 and epoch % 5 == 0:
+            for param_group in optimizer.param_groups:
+                param_group['lr'] *= 0.9
+                
+    return model
+
+def prepare_multi_timeframe_data(exchange, timeframes=['1m', '15m', '1h'], lookback=30):
+    """
+    Prepare data from multiple timeframes for training
+    
+    Args:
+        exchange: Exchange object to fetch data from
+        timeframes: List of timeframes to fetch
+        lookback: Number of candles to look back
+        
+    Returns:
+        List of DataLoader objects, one for each timeframe
+    """
+    data_loaders = []
+    
+    for timeframe in timeframes:
+        # Fetch historical data for this timeframe
+        candles = exchange.fetch_ohlcv(timeframe=timeframe, limit=1000)
+        
+        # Prepare inputs and targets
+        inputs = []
+        price_targets = []
+        extrema_targets = []
+        volume_targets = []
+        
+        for i in range(lookback, len(candles) - 5):
+            # Input: lookback candles (price and volume)
+            input_data = torch.tensor([
+                [candle[4], candle[5]] for candle in candles[i-lookback:i]
+            ], dtype=torch.float32)
+            
+            # Target: next 5 candles (price)
+            price_target = torch.tensor([
+                candle[4] for candle in candles[i:i+5]
+            ], dtype=torch.float32)
+            
+            # Target: extrema points in next 5 candles
+            extrema_target = torch.zeros(10, dtype=torch.float32)  # 5 time steps, 2 classes each
+            for j in range(5):
+                # Simple extrema detection for demonstration
+                if j > 0 and j < 4:
+                    # Local high
+                    if candles[i+j][2] > candles[i+j-1][2] and candles[i+j][2] > candles[i+j+1][2]:
+                        extrema_target[j*2] = 1.0
+                    # Local low
+                    if candles[i+j][3] < candles[i+j-1][3] and candles[i+j][3] < candles[i+j+1][3]:
+                        extrema_target[j*2+1] = 1.0
+            
+            # Target: volume for next 5 candles
+            volume_target = torch.tensor([
+                candle[5] for candle in candles[i:i+5]
+            ], dtype=torch.float32)
+            
+            inputs.append(input_data)
+            price_targets.append(price_target)
+            extrema_targets.append(extrema_target)
+            volume_targets.append(volume_target)
+        
+        # Create dataset and dataloader
+        dataset = torch.utils.data.TensorDataset(
+            torch.stack(inputs),
+            torch.stack(price_targets),
+            torch.stack(extrema_targets),
+            torch.stack(volume_targets)
+        )
+        
+        data_loader = torch.utils.data.DataLoader(
+            dataset, batch_size=32, shuffle=True
+        )
+        
+        data_loaders.append(data_loader)
+    
+    return data_loaders 
--- a/crypto/gogo2/main.py
+++ b/crypto/gogo2/main.py