404 lines
16 KiB
Python
404 lines
16 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
import numpy as np
|
|
import os
|
|
import logging
|
|
import torch.nn.functional as F
|
|
from typing import List, Tuple
|
|
|
|
# Configure logger
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class PricePatternAttention(nn.Module):
|
|
"""
|
|
Attention mechanism specifically designed to focus on price patterns
|
|
that might indicate local extrema or trend reversals
|
|
"""
|
|
def __init__(self, input_dim, hidden_dim=64):
|
|
super(PricePatternAttention, self).__init__()
|
|
self.query = nn.Linear(input_dim, hidden_dim)
|
|
self.key = nn.Linear(input_dim, hidden_dim)
|
|
self.value = nn.Linear(input_dim, hidden_dim)
|
|
self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float32))
|
|
|
|
def forward(self, x):
|
|
"""Apply attention to input sequence"""
|
|
# x shape: [batch_size, seq_len, features]
|
|
batch_size, seq_len, _ = x.size()
|
|
|
|
# Project input to query, key, value
|
|
q = self.query(x) # [batch_size, seq_len, hidden_dim]
|
|
k = self.key(x) # [batch_size, seq_len, hidden_dim]
|
|
v = self.value(x) # [batch_size, seq_len, hidden_dim]
|
|
|
|
# Calculate attention scores
|
|
scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len]
|
|
|
|
# Apply softmax to get attention weights
|
|
attn_weights = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len]
|
|
|
|
# Apply attention to values
|
|
output = torch.matmul(attn_weights, v) # [batch_size, seq_len, hidden_dim]
|
|
|
|
return output, attn_weights
|
|
|
|
class AdaptiveNorm(nn.Module):
|
|
"""
|
|
Adaptive normalization layer that chooses between different normalization
|
|
methods based on input dimensions
|
|
"""
|
|
def __init__(self, num_features):
|
|
super(AdaptiveNorm, self).__init__()
|
|
self.batch_norm = nn.BatchNorm1d(num_features, affine=True)
|
|
self.group_norm = nn.GroupNorm(min(32, num_features), num_features)
|
|
self.layer_norm = nn.LayerNorm([num_features, 1])
|
|
|
|
def forward(self, x):
|
|
# Check input dimensions
|
|
batch_size, channels, seq_len = x.size()
|
|
|
|
# Choose normalization method:
|
|
# - Batch size > 1 and seq_len > 1: BatchNorm
|
|
# - Batch size == 1 or seq_len == 1: GroupNorm
|
|
# - Fallback for extreme cases: LayerNorm
|
|
if batch_size > 1 and seq_len > 1:
|
|
return self.batch_norm(x)
|
|
elif seq_len > 1:
|
|
return self.group_norm(x)
|
|
else:
|
|
# For 1D inputs (seq_len=1), we need to adjust the layer norm
|
|
# to the actual input size
|
|
if not hasattr(self, 'layer_norm_1d') or self.layer_norm_1d.normalized_shape[0] != channels:
|
|
self.layer_norm_1d = nn.LayerNorm([channels, seq_len]).to(x.device)
|
|
return self.layer_norm_1d(x)
|
|
|
|
class SimpleCNN(nn.Module):
|
|
"""
|
|
Simple CNN model for reinforcement learning with image-like state inputs
|
|
"""
|
|
def __init__(self, input_shape, n_actions):
|
|
super(SimpleCNN, self).__init__()
|
|
|
|
# Store dimensions
|
|
self.input_shape = input_shape
|
|
self.n_actions = n_actions
|
|
|
|
# Calculate input dimensions
|
|
if len(input_shape) == 3: # [channels, height, width]
|
|
self.channels, self.height, self.width = input_shape
|
|
self.feature_dim = self.height * self.width
|
|
elif len(input_shape) == 2: # [timeframes, features]
|
|
self.channels = input_shape[0]
|
|
self.features = input_shape[1]
|
|
self.feature_dim = self.features
|
|
elif len(input_shape) == 1: # [features]
|
|
self.channels = 1
|
|
self.features = input_shape[0]
|
|
self.feature_dim = self.features
|
|
else:
|
|
raise ValueError(f"Unsupported input shape: {input_shape}")
|
|
|
|
# Build network
|
|
self._build_network()
|
|
|
|
# Initialize device
|
|
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
self.to(self.device)
|
|
|
|
logger.info(f"SimpleCNN initialized with input shape: {input_shape}, actions: {n_actions}")
|
|
|
|
def _build_network(self):
|
|
"""Build the neural network with current feature dimensions"""
|
|
# Create a flexible architecture that adapts to input dimensions
|
|
self.fc_layers = nn.Sequential(
|
|
nn.Linear(self.feature_dim, 256),
|
|
nn.ReLU(),
|
|
nn.Linear(256, 256),
|
|
nn.ReLU()
|
|
)
|
|
|
|
# Output heads (Dueling DQN architecture)
|
|
self.advantage_head = nn.Linear(256, self.n_actions)
|
|
self.value_head = nn.Linear(256, 1)
|
|
|
|
# Extrema detection head
|
|
self.extrema_head = nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
|
|
|
|
def _check_rebuild_network(self, features):
|
|
"""Check if network needs to be rebuilt for different feature dimensions"""
|
|
if features != self.feature_dim:
|
|
logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
|
|
self.feature_dim = features
|
|
self._build_network()
|
|
# Move to device after rebuilding
|
|
self.to(self.device)
|
|
return True
|
|
return False
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Forward pass through the network
|
|
Returns both action values and extrema predictions
|
|
"""
|
|
# Handle different input shapes
|
|
if len(x.shape) == 2: # [batch_size, features]
|
|
# Simple feature vector
|
|
batch_size, features = x.shape
|
|
# Check if we need to rebuild the network for new dimensions
|
|
self._check_rebuild_network(features)
|
|
|
|
elif len(x.shape) == 3: # [batch_size, timeframes/channels, features]
|
|
# Reshape to flatten timeframes/channels with features
|
|
batch_size, timeframes, features = x.shape
|
|
total_features = timeframes * features
|
|
|
|
# Check if we need to rebuild the network for new dimensions
|
|
self._check_rebuild_network(total_features)
|
|
|
|
# Reshape tensor to [batch_size, total_features]
|
|
x = x.reshape(batch_size, total_features)
|
|
|
|
# Apply fully connected layers
|
|
fc_out = self.fc_layers(x)
|
|
|
|
# Dueling architecture
|
|
advantage = self.advantage_head(fc_out)
|
|
value = self.value_head(fc_out)
|
|
|
|
# Q-values = value + (advantage - mean(advantage))
|
|
action_values = value + advantage - advantage.mean(dim=1, keepdim=True)
|
|
|
|
# Extrema predictions
|
|
extrema_pred = self.extrema_head(fc_out)
|
|
|
|
return action_values, extrema_pred
|
|
|
|
class CNNModelPyTorch(nn.Module):
|
|
"""
|
|
CNN model for trading with multiple timeframes
|
|
"""
|
|
def __init__(self, window_size=20, num_features=5, output_size=3, timeframes=None):
|
|
super(CNNModelPyTorch, self).__init__()
|
|
|
|
if timeframes is None:
|
|
timeframes = [1]
|
|
|
|
self.window_size = window_size
|
|
self.num_features = num_features
|
|
self.output_size = output_size
|
|
self.timeframes = timeframes
|
|
|
|
# Calculate total input features across all timeframes
|
|
self.total_features = num_features * len(timeframes)
|
|
|
|
# Device configuration
|
|
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
logger.info(f"Using device: {self.device}")
|
|
|
|
# Create model architecture
|
|
self._create_layers()
|
|
|
|
# Move model to device
|
|
self.to(self.device)
|
|
|
|
def _create_layers(self):
|
|
"""Create all model layers with current feature dimensions"""
|
|
# Convolutional layers - use total_features as input channels
|
|
self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1)
|
|
self.norm1 = AdaptiveNorm(64)
|
|
self.dropout1 = nn.Dropout(0.2)
|
|
|
|
self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
|
|
self.norm2 = AdaptiveNorm(128)
|
|
self.dropout2 = nn.Dropout(0.3)
|
|
|
|
self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
|
|
self.norm3 = AdaptiveNorm(256)
|
|
self.dropout3 = nn.Dropout(0.4)
|
|
|
|
# Add price pattern attention layer
|
|
self.attention = PricePatternAttention(256)
|
|
|
|
# Extrema detection specialized convolutional layer
|
|
self.extrema_conv = nn.Conv1d(256, 128, kernel_size=3, padding=1) # Smaller kernel for small inputs
|
|
self.extrema_norm = AdaptiveNorm(128)
|
|
|
|
# Fully connected layers - input size will be determined dynamically
|
|
self.fc1 = None # Will be initialized in forward pass
|
|
self.fc2 = nn.Linear(512, 256)
|
|
self.dropout_fc = nn.Dropout(0.5)
|
|
|
|
# Advantage and Value streams (Dueling DQN architecture)
|
|
self.fc3 = nn.Linear(256, self.output_size) # Advantage stream
|
|
self.value_fc = nn.Linear(256, 1) # Value stream
|
|
|
|
# Additional prediction head for extrema detection (tops/bottoms)
|
|
self.extrema_fc = nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
|
|
|
|
# Initialize optimizer and scheduler
|
|
self.optimizer = optim.Adam(self.parameters(), lr=0.001)
|
|
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
|
|
self.optimizer, mode='max', factor=0.5, patience=5, verbose=True
|
|
)
|
|
|
|
def rebuild_conv_layers(self, input_channels):
|
|
"""
|
|
Rebuild convolutional layers for different input dimensions
|
|
|
|
Args:
|
|
input_channels: Number of input channels (features) in the data
|
|
"""
|
|
logger.info(f"Rebuilding convolutional layers for {input_channels} input channels")
|
|
|
|
# Update total features
|
|
self.total_features = input_channels
|
|
|
|
# Recreate all layers with new dimensions
|
|
self._create_layers()
|
|
|
|
# Move layers to device
|
|
self.to(self.device)
|
|
|
|
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
"""Forward pass through the network"""
|
|
# Ensure input is on the correct device
|
|
x = x.to(self.device)
|
|
|
|
# Check input dimensions and reshape as needed
|
|
if len(x.size()) == 2:
|
|
# If input is [batch_size, features], reshape to [batch_size, features, 1]
|
|
batch_size, feature_dim = x.size()
|
|
|
|
# Check and handle if input features don't match model expectations
|
|
if feature_dim != self.total_features:
|
|
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
|
self.rebuild_conv_layers(feature_dim)
|
|
|
|
# For 1D input, use a sequence length of 1
|
|
seq_len = 1
|
|
x = x.unsqueeze(2) # Reshape to [batch, features, 1]
|
|
elif len(x.size()) == 3:
|
|
# Standard case: [batch_size, window_size, features]
|
|
batch_size, seq_len, feature_dim = x.size()
|
|
|
|
# Check and handle if input dimensions don't match model expectations
|
|
if feature_dim != self.total_features:
|
|
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
|
self.rebuild_conv_layers(feature_dim)
|
|
|
|
# Reshape input: [batch, window_size, features] -> [batch, features, window_size]
|
|
x = x.permute(0, 2, 1)
|
|
else:
|
|
raise ValueError(f"Unexpected input shape: {x.size()}, expected 2D or 3D tensor")
|
|
|
|
# Convolutional layers with dropout - safely handle small spatial dimensions
|
|
try:
|
|
x = self.dropout1(F.relu(self.norm1(self.conv1(x))))
|
|
x = self.dropout2(F.relu(self.norm2(self.conv2(x))))
|
|
x = self.dropout3(F.relu(self.norm3(self.conv3(x))))
|
|
except Exception as e:
|
|
logger.warning(f"Error in convolutional layers: {str(e)}")
|
|
# Fallback for very small inputs: skip some convolutions
|
|
if seq_len < 3:
|
|
# Apply a simpler convolution for very small inputs
|
|
x = F.relu(self.conv1(x))
|
|
x = F.relu(self.conv2(x))
|
|
# Skip last conv if we get dimension errors
|
|
try:
|
|
x = F.relu(self.conv3(x))
|
|
except:
|
|
pass
|
|
|
|
# Store conv features for extrema detection
|
|
conv_features = x
|
|
|
|
# Get the current shape after convolutions
|
|
_, channels, conv_seq_len = x.size()
|
|
|
|
# Initialize fc1 if not created yet or if the shape has changed
|
|
if self.fc1 is None:
|
|
flattened_size = channels * conv_seq_len
|
|
logger.info(f"Initializing fc1 with input size {flattened_size}")
|
|
self.fc1 = nn.Linear(flattened_size, 512).to(self.device)
|
|
|
|
# Apply extrema detection safely
|
|
try:
|
|
extrema_features = F.relu(self.extrema_norm(self.extrema_conv(conv_features)))
|
|
except Exception as e:
|
|
logger.warning(f"Error in extrema detection: {str(e)}")
|
|
extrema_features = conv_features # Fallback
|
|
|
|
# Handle attention for small sequence lengths
|
|
if conv_seq_len > 1:
|
|
# Reshape for attention: [batch, channels, seq_len] -> [batch, seq_len, channels]
|
|
x_attention = x.permute(0, 2, 1)
|
|
|
|
# Apply attention
|
|
try:
|
|
attention_output, attention_weights = self.attention(x_attention)
|
|
except Exception as e:
|
|
logger.warning(f"Error in attention layer: {str(e)}")
|
|
# Fallback: don't use attention
|
|
|
|
# Flatten - get the actual shape for this batch
|
|
flattened_size = channels * conv_seq_len
|
|
x = x.view(batch_size, flattened_size)
|
|
|
|
# Check if we need to recreate fc1 with the correct size
|
|
if self.fc1.in_features != flattened_size:
|
|
logger.info(f"Recreating fc1 layer to match input size {flattened_size}")
|
|
self.fc1 = nn.Linear(flattened_size, 512).to(self.device)
|
|
# Reinitialize optimizer after changing the model
|
|
self.optimizer = optim.Adam(self.parameters(), lr=0.001)
|
|
|
|
# Fully connected layers with dropout
|
|
x = F.relu(self.fc1(x))
|
|
x = self.dropout_fc(F.relu(self.fc2(x)))
|
|
|
|
# Split into advantage and value streams
|
|
advantage = self.fc3(x)
|
|
value = self.value_fc(x)
|
|
|
|
# Combine value and advantage
|
|
q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
|
|
|
|
# Also compute extrema prediction from the same features
|
|
extrema_flat = extrema_features.view(batch_size, -1)
|
|
extrema_pred = self.extrema_fc(x) # Use the same features for extrema prediction
|
|
|
|
return q_values, extrema_pred
|
|
|
|
def predict(self, X):
|
|
"""Make predictions"""
|
|
self.eval()
|
|
|
|
# Convert to tensor if not already
|
|
if not isinstance(X, torch.Tensor):
|
|
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
|
else:
|
|
X_tensor = X.to(self.device)
|
|
|
|
with torch.no_grad():
|
|
q_values, extrema_pred = self(X_tensor)
|
|
q_values_np = q_values.cpu().numpy()
|
|
actions = np.argmax(q_values_np, axis=1)
|
|
|
|
# Also return extrema predictions
|
|
extrema_np = extrema_pred.cpu().numpy()
|
|
extrema_classes = np.argmax(extrema_np, axis=1)
|
|
|
|
return actions, q_values_np, extrema_classes
|
|
|
|
def save(self, path: str):
|
|
"""Save model weights"""
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
torch.save(self.state_dict(), f"{path}.pt")
|
|
logger.info(f"Model saved to {path}.pt")
|
|
|
|
def load(self, path: str):
|
|
"""Load model weights"""
|
|
self.load_state_dict(torch.load(f"{path}.pt", map_location=self.device))
|
|
self.eval()
|
|
logger.info(f"Model loaded from {path}.pt") |