wip enhanced multitimeframe model

This commit is contained in:
Dobromir Popov 2025-03-12 01:46:48 +02:00
parent ad559d8c61
commit 506458d55e
5 changed files with 1972 additions and 1498 deletions

View File

@ -0,0 +1,42 @@
def step(self, action):
"""Take an action in the environment and return the next state, reward, and done flag"""
# Store current price before taking action
self.current_price = self.data[self.current_step]['close']
# Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
if not self.demo and self.trading_client:
# Execute real trades in live mode
asyncio.create_task(self._execute_live_action(action))
# Calculate reward (simulation still runs in parallel with live trading)
reward, _ = self.calculate_reward(action) # Unpack the tuple here
# Check for stop loss / take profit hits
self.check_sl_tp()
# Move to next step
self.current_step += 1
done = self.current_step >= len(self.data) - 1
# Get new state
next_state = self.get_state()
return next_state, reward, done
def calculate_reward(self, action):
"""Calculate the reward for the current action."""
# ... (existing code)
# Combine all reward components
reward = pnl_reward + timing_reward + risk_reward + prediction_reward
# Log components for analysis
info = {
'pnl_reward': pnl_reward,
'timing_reward': timing_reward,
'risk_reward': risk_reward,
'prediction_reward': prediction_reward,
'total_reward': reward
}
return reward # Return only the reward, not the info dictionary

View File

@ -38,3 +38,26 @@ it seems the model is not learning a lot. we keep hovering about the same starti
it seems we may need another NN model down the loop jut to predict the extremums of the price. it seems we may need another NN model down the loop jut to predict the extremums of the price.
we may have to include a mechanism to calculate the extremums of the price retrospectively and to use that to bootstrap pre-train the model. we may have to include a mechanism to calculate the extremums of the price retrospectively and to use that to bootstrap pre-train the model.
Why Performance Might Be Stagnating
Several factors could explain why the model isn't improving significantly during training:
Insufficient Model Capacity for Price Prediction: While the price prediction model has 267,663 parameters, financial time series prediction is extremely challenging. The market may have patterns that are too complex or too random for the current model to capture effectively.
Overfitting to Training Data: The model might be memorizing patterns in the training data that don't generalize to new market conditions.
Transformer-LSTM Redundancy in DQN: Your DQN model uses both a transformer and an LSTM, which might be redundant. Both are designed to capture sequential dependencies, and having both could lead to overfitting or training instability.
Imbalanced Parameter Distribution: 64.5% of your DQN parameters are in the transformer component, which might be excessive for the task.
Reward Function Issues: The reward function might not be properly aligned with profitable trading strategies, or it might be too sparse to provide meaningful learning signals.
Suggested Improvements
1. Enhance Price Prediction Training
2. Simplify the DQN Architecture
Consider creating a more streamlined DQN model:
3. Improve the Reward Function
Make sure your reward function provides meaningful signals for learning:
4. Implement Curriculum Learning
Start with simpler market conditions and gradually increase complexity:
Conclusion
The issue appears to be a combination of model complexity, potential overfitting, and possibly insufficient learning signals from the reward function. By simplifying the DQN architecture (particularly reducing the transformer component), improving the price prediction training, and enhancing the reward function, you should see better learning progress.
Would you like me to implement any of these specific improvements to your codebase?

View File

@ -3,6 +3,278 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer from torch.nn import TransformerEncoder, TransformerEncoderLayer
class EnhancedPricePredictionModel(nn.Module):
def __init__(self, input_dim=2, hidden_dim=256, num_layers=3, output_dim=5, num_timeframes=3):
super(EnhancedPricePredictionModel, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.num_timeframes = num_timeframes
# Separate LSTM for each timeframe
self.timeframe_lstms = nn.ModuleList([
nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
for _ in range(num_timeframes)
])
# Cross-timeframe attention
self.cross_attention = nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
# Self-attention for each timeframe
self.self_attentions = nn.ModuleList([
nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
for _ in range(num_timeframes)
])
# Timeframe fusion layer
self.fusion_layer = nn.Sequential(
nn.Linear(hidden_dim * num_timeframes, hidden_dim * 2),
nn.LeakyReLU(),
nn.Dropout(0.2),
nn.Linear(hidden_dim * 2, hidden_dim)
)
# Fully connected layer for price prediction
self.price_fc = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, output_dim)
)
# Fully connected layer for extrema prediction (high and low points)
self.extrema_fc = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, 10) # 5 time steps, 2 classes (high/low) each
)
# Volume prediction layer
self.volume_fc = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, output_dim)
)
def forward(self, x_list):
# x_list is a list of tensors, one for each timeframe
# Each x shape: (batch_size, seq_len, input_dim)
# Process each timeframe with its own LSTM
lstm_outputs = []
for i, x in enumerate(x_list):
lstm_out, _ = self.timeframe_lstms[i](x) # lstm_out: (batch_size, seq_len, hidden_dim)
lstm_outputs.append(lstm_out)
# Apply self-attention to each timeframe
attn_outputs = []
for i, lstm_out in enumerate(lstm_outputs):
attn_output, _ = self.self_attentions[i](lstm_out, lstm_out, lstm_out)
attn_outputs.append(attn_output[:, -1, :]) # Use the last time step
# Concatenate all timeframe representations
combined = torch.cat(attn_outputs, dim=1) # (batch_size, hidden_dim * num_timeframes)
# Fuse timeframe information
fused = self.fusion_layer(combined) # (batch_size, hidden_dim)
# Price prediction
price_pred = self.price_fc(fused)
# Extrema prediction
extrema_logits = self.extrema_fc(fused)
# Volume prediction
volume_pred = self.volume_fc(fused)
return price_pred, extrema_logits, volume_pred
class EnhancedDQN(nn.Module):
def __init__(self, state_dim, action_dim, hidden_dim=512):
super(EnhancedDQN, self).__init__()
# Feature extraction layers with increased capacity
self.feature_extraction = nn.Sequential(
nn.Linear(state_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
)
# Advantage stream with increased capacity
self.advantage_stream = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, action_dim)
)
# Value stream with increased capacity
self.value_stream = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, 1)
)
# Enhanced transformer for temporal dependencies
encoder_layers = TransformerEncoderLayer(
d_model=hidden_dim,
nhead=8,
dim_feedforward=hidden_dim*4,
dropout=0.1,
batch_first=True
)
self.transformer = TransformerEncoder(encoder_layers, num_layers=3)
# LSTM for sequential decision making with increased capacity
self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.1)
# Final layers with increased capacity
self.final_layers = nn.Sequential(
nn.Linear(hidden_dim*2, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, action_dim)
)
# Market regime classification layer
self.market_regime_classifier = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, 3) # 3 regimes: trending, ranging, volatile
)
def forward(self, state, hidden=None):
# Extract features
features = self.feature_extraction(state)
features = features.unsqueeze(1) # Add sequence dimension for transformer/LSTM
# Transformer processing
transformer_out = self.transformer(features)
# LSTM processing
lstm_out, lstm_hidden = self.lstm(transformer_out)
# Dueling architecture
advantage = self.advantage_stream(features.squeeze(1))
value = self.value_stream(features.squeeze(1))
# Combine transformer, LSTM and dueling outputs
combined = torch.cat([transformer_out.squeeze(1), lstm_out.squeeze(1)], dim=1)
q_values = self.final_layers(combined)
# Market regime classification
market_regime = self.market_regime_classifier(transformer_out.squeeze(1))
# Dueling Q-value computation
dueling_q = value + advantage - advantage.mean(dim=1, keepdim=True)
# Final Q-values are a weighted combination of the dueling Q-values and the direct Q-values
# This allows the model to use either approach depending on the situation
q_values = 0.5 * dueling_q + 0.5 * q_values
return q_values, lstm_hidden, market_regime
def count_parameters(model):
total_params = 0
layer_params = {}
for name, param in model.named_parameters():
if param.requires_grad:
param_count = param.numel()
total_params += param_count
layer_params[name] = (param_count, param.shape)
return total_params, layer_params
def main():
# Initialize the original models for comparison
original_price_model = PricePredictionModel()
original_price_total_params, _ = count_parameters(original_price_model)
state_dim = 50
action_dim = 3
original_dqn_model = DQN(state_dim=state_dim, action_dim=action_dim)
original_dqn_total_params, _ = count_parameters(original_dqn_model)
# Initialize the enhanced models
enhanced_price_model = EnhancedPricePredictionModel(num_timeframes=3)
enhanced_price_total_params, enhanced_price_layer_params = count_parameters(enhanced_price_model)
# Increase state dimension to accommodate multiple timeframes
enhanced_state_dim = 100 # Increased from 50 to accommodate more features
enhanced_dqn_model = EnhancedDQN(state_dim=enhanced_state_dim, action_dim=action_dim)
enhanced_dqn_total_params, enhanced_dqn_layer_params = count_parameters(enhanced_dqn_model)
# Print comparison
print("=== MODEL SIZE COMPARISON ===")
print(f"Original Price Prediction Model: {original_price_total_params:,} parameters")
print(f"Enhanced Price Prediction Model: {enhanced_price_total_params:,} parameters")
print(f"Growth Factor: {enhanced_price_total_params / original_price_total_params:.2f}x\n")
print(f"Original DQN Model: {original_dqn_total_params:,} parameters")
print(f"Enhanced DQN Model: {enhanced_dqn_total_params:,} parameters")
print(f"Growth Factor: {enhanced_dqn_total_params / original_dqn_total_params:.2f}x\n")
print(f"Total Original Models: {original_price_total_params + original_dqn_total_params:,} parameters")
print(f"Total Enhanced Models: {enhanced_price_total_params + enhanced_dqn_total_params:,} parameters")
print(f"Overall Growth Factor: {(enhanced_price_total_params + enhanced_dqn_total_params) / (original_price_total_params + original_dqn_total_params):.2f}x\n")
# Print VRAM usage estimate (rough approximation)
bytes_per_param = 4 # 4 bytes for float32
original_vram_mb = (original_price_total_params + original_dqn_total_params) * bytes_per_param / (1024 * 1024)
enhanced_vram_mb = (enhanced_price_total_params + enhanced_dqn_total_params) * bytes_per_param / (1024 * 1024)
print("=== ESTIMATED VRAM USAGE ===")
print(f"Original Models: {original_vram_mb:.2f} MB")
print(f"Enhanced Models: {enhanced_vram_mb:.2f} MB")
print(f"Available VRAM: 8,192 MB (8 GB)")
print(f"VRAM Utilization: {enhanced_vram_mb / 8192 * 100:.2f}%\n")
# Print detailed breakdown of enhanced models
print("=== ENHANCED PRICE PREDICTION MODEL BREAKDOWN ===")
# Group parameters by component
timeframe_lstm_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "timeframe_lstms" in name)
attention_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "attention" in name)
fusion_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if "fusion" in name)
output_params = sum(count for name, (count, _) in enhanced_price_layer_params.items() if any(x in name for x in ["price_fc", "extrema_fc", "volume_fc"]))
print(f"Timeframe LSTMs: {timeframe_lstm_params:,} parameters ({timeframe_lstm_params/enhanced_price_total_params*100:.1f}%)")
print(f"Attention Mechanisms: {attention_params:,} parameters ({attention_params/enhanced_price_total_params*100:.1f}%)")
print(f"Fusion Layer: {fusion_params:,} parameters ({fusion_params/enhanced_price_total_params*100:.1f}%)")
print(f"Output Layers: {output_params:,} parameters ({output_params/enhanced_price_total_params*100:.1f}%)\n")
print("=== ENHANCED DQN MODEL BREAKDOWN ===")
# Group parameters by component
feature_extraction_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "feature_extraction" in name)
advantage_value_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "advantage_stream" in name or "value_stream" in name)
transformer_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "transformer" in name)
lstm_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "lstm" in name and "transformer" not in name)
final_layers_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "final_layers" in name)
market_regime_params = sum(count for name, (count, _) in enhanced_dqn_layer_params.items() if "market_regime" in name)
print(f"Feature Extraction: {feature_extraction_params:,} parameters ({feature_extraction_params/enhanced_dqn_total_params*100:.1f}%)")
print(f"Advantage & Value Streams: {advantage_value_params:,} parameters ({advantage_value_params/enhanced_dqn_total_params*100:.1f}%)")
print(f"Transformer: {transformer_params:,} parameters ({transformer_params/enhanced_dqn_total_params*100:.1f}%)")
print(f"LSTM: {lstm_params:,} parameters ({lstm_params/enhanced_dqn_total_params*100:.1f}%)")
print(f"Final Layers: {final_layers_params:,} parameters ({final_layers_params/enhanced_dqn_total_params*100:.1f}%)")
print(f"Market Regime Classifier: {market_regime_params:,} parameters ({market_regime_params/enhanced_dqn_total_params*100:.1f}%)")
# Keep the original models for comparison
class PricePredictionModel(nn.Module): class PricePredictionModel(nn.Module):
def __init__(self, input_dim=2, hidden_dim=128, num_layers=2, output_dim=5): def __init__(self, input_dim=2, hidden_dim=128, num_layers=2, output_dim=5):
super(PricePredictionModel, self).__init__() super(PricePredictionModel, self).__init__()
@ -102,106 +374,5 @@ class DQN(nn.Module):
return q_values, lstm_hidden return q_values, lstm_hidden
def count_parameters(model):
total_params = 0
layer_params = {}
for name, param in model.named_parameters():
if param.requires_grad:
param_count = param.numel()
total_params += param_count
layer_params[name] = (param_count, param.shape)
return total_params, layer_params
def main():
# Initialize the Price Prediction Model
price_model = PricePredictionModel()
price_total_params, price_layer_params = count_parameters(price_model)
print(f"Price Prediction Model parameters: {price_total_params:,}")
print("\nPrice Prediction Model Layers:")
for name, (count, shape) in price_layer_params.items():
print(f"{name}: {count:,} (shape: {shape})")
# Initialize the DQN Model with typical dimensions
state_dim = 50 # Typical state dimension for the trading bot
action_dim = 3 # Typical action dimension (buy, sell, hold)
dqn_model = DQN(state_dim=state_dim, action_dim=action_dim)
dqn_total_params, dqn_layer_params = count_parameters(dqn_model)
# Count parameters by category
feature_extraction_params = sum(count for name, (count, _) in dqn_layer_params.items() if "feature_extraction" in name)
advantage_value_params = sum(count for name, (count, _) in dqn_layer_params.items() if "advantage_stream" in name or "value_stream" in name)
transformer_params = sum(count for name, (count, _) in dqn_layer_params.items() if "transformer" in name)
lstm_params = sum(count for name, (count, _) in dqn_layer_params.items() if "lstm" in name and "transformer" not in name)
final_layers_params = sum(count for name, (count, _) in dqn_layer_params.items() if "final_layers" in name)
print(f"\nDQN Model parameters: {dqn_total_params:,}")
# Create sets to track which parameters we've printed
printed_params = set()
# Print DQN layers in groups to avoid output truncation
print(f"\nDQN Model Layers (Feature Extraction): {feature_extraction_params:,} parameters")
for name, (count, shape) in dqn_layer_params.items():
if "feature_extraction" in name:
print(f"{name}: {count:,} (shape: {shape})")
printed_params.add(name)
print(f"\nDQN Model Layers (Advantage & Value Streams): {advantage_value_params:,} parameters")
for name, (count, shape) in dqn_layer_params.items():
if "advantage_stream" in name or "value_stream" in name:
print(f"{name}: {count:,} (shape: {shape})")
printed_params.add(name)
print(f"\nDQN Model Layers (Transformer): {transformer_params:,} parameters")
for name, (count, shape) in dqn_layer_params.items():
if "transformer" in name:
print(f"{name}: {count:,} (shape: {shape})")
printed_params.add(name)
print(f"\nDQN Model Layers (LSTM): {lstm_params:,} parameters")
for name, (count, shape) in dqn_layer_params.items():
if "lstm" in name and "transformer" not in name:
print(f"{name}: {count:,} (shape: {shape})")
printed_params.add(name)
print(f"\nDQN Model Layers (Final Layers): {final_layers_params:,} parameters")
for name, (count, shape) in dqn_layer_params.items():
if "final_layers" in name:
print(f"{name}: {count:,} (shape: {shape})")
printed_params.add(name)
# Print any remaining parameters that weren't caught by the categories above
remaining_params = set(dqn_layer_params.keys()) - printed_params
if remaining_params:
remaining_params_count = sum(dqn_layer_params[name][0] for name in remaining_params)
print(f"\nDQN Model Layers (Other): {remaining_params_count:,} parameters")
for name in remaining_params:
count, shape = dqn_layer_params[name]
print(f"{name}: {count:,} (shape: {shape})")
# Total parameters across both models
print(f"\nTotal parameters (both models): {price_total_params + dqn_total_params:,}")
# Print summary of parameter distribution
print("\nParameter Distribution Summary:")
print(f"Price Prediction Model: {price_total_params:,} parameters ({price_total_params/(price_total_params + dqn_total_params)*100:.1f}%)")
print(f"DQN Model: {dqn_total_params:,} parameters ({dqn_total_params/(price_total_params + dqn_total_params)*100:.1f}%)")
print("\nDQN Model Breakdown:")
print(f"- Feature Extraction: {feature_extraction_params:,} parameters ({feature_extraction_params/dqn_total_params*100:.1f}%)")
print(f"- Advantage & Value Streams: {advantage_value_params:,} parameters ({advantage_value_params/dqn_total_params*100:.1f}%)")
print(f"- Transformer: {transformer_params:,} parameters ({transformer_params/dqn_total_params*100:.1f}%)")
print(f"- LSTM: {lstm_params:,} parameters ({lstm_params/dqn_total_params*100:.1f}%)")
print(f"- Final Layers: {final_layers_params:,} parameters ({final_layers_params/dqn_total_params*100:.1f}%)")
# Verify that all parameters are accounted for
total_by_category = feature_extraction_params + advantage_value_params + transformer_params + lstm_params + final_layers_params
if remaining_params:
total_by_category += remaining_params_count
print(f"\nSum of all categories: {total_by_category:,} parameters")
print(f"Difference from total: {dqn_total_params - total_by_category:,} parameters")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -0,0 +1,434 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
class EnhancedPricePredictionModel(nn.Module):
def __init__(self, input_dim=2, hidden_dim=256, num_layers=3, output_dim=5, num_timeframes=3):
super(EnhancedPricePredictionModel, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.num_timeframes = num_timeframes
# Separate LSTM for each timeframe
self.timeframe_lstms = nn.ModuleList([
nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
for _ in range(num_timeframes)
])
# Cross-timeframe attention
self.cross_attention = nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
# Self-attention for each timeframe
self.self_attentions = nn.ModuleList([
nn.MultiheadAttention(hidden_dim, num_heads=8, batch_first=True, dropout=0.1)
for _ in range(num_timeframes)
])
# Timeframe fusion layer
self.fusion_layer = nn.Sequential(
nn.Linear(hidden_dim * num_timeframes, hidden_dim * 2),
nn.LeakyReLU(),
nn.Dropout(0.2),
nn.Linear(hidden_dim * 2, hidden_dim)
)
# Fully connected layer for price prediction
self.price_fc = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, output_dim)
)
# Fully connected layer for extrema prediction (high and low points)
self.extrema_fc = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, 10) # 5 time steps, 2 classes (high/low) each
)
# Volume prediction layer
self.volume_fc = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, output_dim)
)
def forward(self, x_list):
# x_list is a list of tensors, one for each timeframe
# Each x shape: (batch_size, seq_len, input_dim)
# Process each timeframe with its own LSTM
lstm_outputs = []
for i, x in enumerate(x_list):
lstm_out, _ = self.timeframe_lstms[i](x) # lstm_out: (batch_size, seq_len, hidden_dim)
lstm_outputs.append(lstm_out)
# Apply self-attention to each timeframe
attn_outputs = []
for i, lstm_out in enumerate(lstm_outputs):
attn_output, _ = self.self_attentions[i](lstm_out, lstm_out, lstm_out)
attn_outputs.append(attn_output[:, -1, :]) # Use the last time step
# Concatenate all timeframe representations
combined = torch.cat(attn_outputs, dim=1) # (batch_size, hidden_dim * num_timeframes)
# Fuse timeframe information
fused = self.fusion_layer(combined) # (batch_size, hidden_dim)
# Price prediction
price_pred = self.price_fc(fused)
# Extrema prediction
extrema_logits = self.extrema_fc(fused)
# Volume prediction
volume_pred = self.volume_fc(fused)
return price_pred, extrema_logits, volume_pred
class EnhancedDQN(nn.Module):
def __init__(self, state_dim, action_dim, hidden_dim=512):
super(EnhancedDQN, self).__init__()
# Feature extraction layers with increased capacity
self.feature_extraction = nn.Sequential(
nn.Linear(state_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
)
# Advantage stream with increased capacity
self.advantage_stream = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, action_dim)
)
# Value stream with increased capacity
self.value_stream = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, 1)
)
# Enhanced transformer for temporal dependencies
encoder_layers = TransformerEncoderLayer(
d_model=hidden_dim,
nhead=8,
dim_feedforward=hidden_dim*4,
dropout=0.1,
batch_first=True
)
self.transformer = TransformerEncoder(encoder_layers, num_layers=3)
# LSTM for sequential decision making with increased capacity
self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.1)
# Final layers with increased capacity
self.final_layers = nn.Sequential(
nn.Linear(hidden_dim*2, hidden_dim),
nn.LeakyReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, action_dim)
)
# Market regime classification layer
self.market_regime_classifier = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim // 2),
nn.LeakyReLU(),
nn.Linear(hidden_dim // 2, 3) # 3 regimes: trending, ranging, volatile
)
def forward(self, state, hidden=None):
# Extract features
features = self.feature_extraction(state)
features = features.unsqueeze(1) # Add sequence dimension for transformer/LSTM
# Transformer processing
transformer_out = self.transformer(features)
# LSTM processing
lstm_out, lstm_hidden = self.lstm(transformer_out)
# Dueling architecture
advantage = self.advantage_stream(features.squeeze(1))
value = self.value_stream(features.squeeze(1))
# Combine transformer, LSTM and dueling outputs
combined = torch.cat([transformer_out.squeeze(1), lstm_out.squeeze(1)], dim=1)
q_values = self.final_layers(combined)
# Market regime classification
market_regime = self.market_regime_classifier(transformer_out.squeeze(1))
# Dueling Q-value computation
dueling_q = value + advantage - advantage.mean(dim=1, keepdim=True)
# Final Q-values are a weighted combination of the dueling Q-values and the direct Q-values
# This allows the model to use either approach depending on the situation
q_values = 0.5 * dueling_q + 0.5 * q_values
return q_values, lstm_hidden, market_regime
class EnhancedReplayBuffer:
"""Enhanced replay buffer with prioritized experience replay and n-step returns"""
def __init__(self, capacity, alpha=0.6, beta=0.4, beta_increment=0.001, n_step=3, gamma=0.99):
self.capacity = capacity
self.buffer = []
self.position = 0
self.priorities = torch.zeros(capacity)
self.alpha = alpha # Priority exponent
self.beta = beta # Importance sampling weight
self.beta_increment = beta_increment # Beta annealing
self.n_step = n_step # n-step returns
self.gamma = gamma # Discount factor
self.n_step_buffer = []
self.max_priority = 1.0
def push(self, state, action, reward, next_state, done):
# Store experience in n-step buffer
self.n_step_buffer.append((state, action, reward, next_state, done))
# If we don't have enough experiences for n-step return, wait
if len(self.n_step_buffer) < self.n_step and not done:
return
# Calculate n-step return
reward_n = 0
for i in range(self.n_step):
if i >= len(self.n_step_buffer):
break
reward_n += self.gamma**i * self.n_step_buffer[i][2]
# Get state, action from the first experience
state = self.n_step_buffer[0][0]
action = self.n_step_buffer[0][1]
# Get next_state, done from the last experience
next_state = self.n_step_buffer[-1][3]
done = self.n_step_buffer[-1][4]
# Store in replay buffer with max priority
if len(self.buffer) < self.capacity:
self.buffer.append(None)
self.buffer[self.position] = (state, action, reward_n, next_state, done)
# Set priority to max priority to ensure it gets sampled
self.priorities[self.position] = self.max_priority
# Move position pointer
self.position = (self.position + 1) % self.capacity
# Remove the first experience from n-step buffer
self.n_step_buffer.pop(0)
# If episode is done, clear n-step buffer
if done:
self.n_step_buffer = []
def sample(self, batch_size):
# Calculate sampling probabilities
if len(self.buffer) < self.capacity:
probs = self.priorities[:len(self.buffer)]
else:
probs = self.priorities
# Normalize probabilities
probs = probs ** self.alpha
probs = probs / probs.sum()
# Sample indices based on priorities
indices = torch.multinomial(probs, batch_size, replacement=True)
# Get samples
states = []
actions = []
rewards = []
next_states = []
dones = []
# Calculate importance sampling weights
weights = (len(self.buffer) * probs[indices]) ** (-self.beta)
weights = weights / weights.max()
self.beta = min(1.0, self.beta + self.beta_increment) # Anneal beta
# Get experiences
for idx in indices:
state, action, reward, next_state, done = self.buffer[idx]
states.append(state)
actions.append(action)
rewards.append(reward)
next_states.append(next_state)
dones.append(done)
return (
torch.stack(states),
torch.tensor(actions),
torch.tensor(rewards, dtype=torch.float32),
torch.stack(next_states),
torch.tensor(dones, dtype=torch.float32),
indices,
weights
)
def update_priorities(self, indices, td_errors):
for idx, td_error in zip(indices, td_errors):
# Update priority based on TD error
priority = abs(td_error) + 1e-5 # Small constant to ensure non-zero priority
self.priorities[idx] = priority
self.max_priority = max(self.max_priority, priority)
def __len__(self):
return len(self.buffer)
def train_price_predictor(model, data_loaders, optimizer, device, epochs=10):
"""
Train the price prediction model using data from multiple timeframes
Args:
model: The EnhancedPricePredictionModel
data_loaders: List of DataLoader objects, one for each timeframe
optimizer: Optimizer for training
device: Device to train on (CPU or GPU)
epochs: Number of training epochs
"""
model.train()
for epoch in range(epochs):
total_loss = 0
num_batches = 0
# Assume all dataloaders have the same length
for batch_idx, batch_data in enumerate(zip(*data_loaders)):
# Each batch_data is a tuple of (inputs, price_targets, extrema_targets, volume_targets) for each timeframe
optimizer.zero_grad()
# Prepare inputs for each timeframe
inputs_list = [data[0].to(device) for data in batch_data]
price_targets = batch_data[0][1].to(device) # Use targets from the first timeframe (e.g., 1m)
extrema_targets = batch_data[0][2].to(device)
volume_targets = batch_data[0][3].to(device)
# Forward pass
price_pred, extrema_logits, volume_pred = model(inputs_list)
# Calculate losses
price_loss = F.mse_loss(price_pred, price_targets)
extrema_loss = F.binary_cross_entropy_with_logits(extrema_logits, extrema_targets)
volume_loss = F.mse_loss(volume_pred, volume_targets)
# Combined loss with weighting
loss = price_loss + 0.5 * extrema_loss + 0.3 * volume_loss
# Backward pass
loss.backward()
# Gradient clipping to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
total_loss += loss.item()
num_batches += 1
if batch_idx % 100 == 0:
print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, Loss: {loss.item():.6f}")
avg_loss = total_loss / num_batches
print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.6f}")
# Learning rate scheduling
if epoch > 0 and epoch % 5 == 0:
for param_group in optimizer.param_groups:
param_group['lr'] *= 0.9
return model
def prepare_multi_timeframe_data(exchange, timeframes=['1m', '15m', '1h'], lookback=30):
"""
Prepare data from multiple timeframes for training
Args:
exchange: Exchange object to fetch data from
timeframes: List of timeframes to fetch
lookback: Number of candles to look back
Returns:
List of DataLoader objects, one for each timeframe
"""
data_loaders = []
for timeframe in timeframes:
# Fetch historical data for this timeframe
candles = exchange.fetch_ohlcv(timeframe=timeframe, limit=1000)
# Prepare inputs and targets
inputs = []
price_targets = []
extrema_targets = []
volume_targets = []
for i in range(lookback, len(candles) - 5):
# Input: lookback candles (price and volume)
input_data = torch.tensor([
[candle[4], candle[5]] for candle in candles[i-lookback:i]
], dtype=torch.float32)
# Target: next 5 candles (price)
price_target = torch.tensor([
candle[4] for candle in candles[i:i+5]
], dtype=torch.float32)
# Target: extrema points in next 5 candles
extrema_target = torch.zeros(10, dtype=torch.float32) # 5 time steps, 2 classes each
for j in range(5):
# Simple extrema detection for demonstration
if j > 0 and j < 4:
# Local high
if candles[i+j][2] > candles[i+j-1][2] and candles[i+j][2] > candles[i+j+1][2]:
extrema_target[j*2] = 1.0
# Local low
if candles[i+j][3] < candles[i+j-1][3] and candles[i+j][3] < candles[i+j+1][3]:
extrema_target[j*2+1] = 1.0
# Target: volume for next 5 candles
volume_target = torch.tensor([
candle[5] for candle in candles[i:i+5]
], dtype=torch.float32)
inputs.append(input_data)
price_targets.append(price_target)
extrema_targets.append(extrema_target)
volume_targets.append(volume_target)
# Create dataset and dataloader
dataset = torch.utils.data.TensorDataset(
torch.stack(inputs),
torch.stack(price_targets),
torch.stack(extrema_targets),
torch.stack(volume_targets)
)
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=32, shuffle=True
)
data_loaders.append(data_loader)
return data_loaders

File diff suppressed because it is too large Load Diff