enhancements
This commit is contained in:
@ -11,6 +11,39 @@ from typing import List, Tuple
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PricePatternAttention(nn.Module):
|
||||
"""
|
||||
Attention mechanism specifically designed to focus on price patterns
|
||||
that might indicate local extrema or trend reversals
|
||||
"""
|
||||
def __init__(self, input_dim, hidden_dim=64):
|
||||
super(PricePatternAttention, self).__init__()
|
||||
self.query = nn.Linear(input_dim, hidden_dim)
|
||||
self.key = nn.Linear(input_dim, hidden_dim)
|
||||
self.value = nn.Linear(input_dim, hidden_dim)
|
||||
self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float32))
|
||||
|
||||
def forward(self, x):
|
||||
"""Apply attention to input sequence"""
|
||||
# x shape: [batch_size, seq_len, features]
|
||||
batch_size, seq_len, _ = x.size()
|
||||
|
||||
# Project input to query, key, value
|
||||
q = self.query(x) # [batch_size, seq_len, hidden_dim]
|
||||
k = self.key(x) # [batch_size, seq_len, hidden_dim]
|
||||
v = self.value(x) # [batch_size, seq_len, hidden_dim]
|
||||
|
||||
# Calculate attention scores
|
||||
scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale # [batch_size, seq_len, seq_len]
|
||||
|
||||
# Apply softmax to get attention weights
|
||||
attn_weights = F.softmax(scores, dim=-1) # [batch_size, seq_len, seq_len]
|
||||
|
||||
# Apply attention to values
|
||||
output = torch.matmul(attn_weights, v) # [batch_size, seq_len, hidden_dim]
|
||||
|
||||
return output, attn_weights
|
||||
|
||||
class CNNModelPyTorch(nn.Module):
|
||||
"""
|
||||
CNN model for trading with multiple timeframes
|
||||
@ -30,7 +63,15 @@ class CNNModelPyTorch(nn.Module):
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
logger.info(f"Using device: {self.device}")
|
||||
|
||||
# Convolutional layers
|
||||
# Create model architecture
|
||||
self._create_layers()
|
||||
|
||||
# Move model to device
|
||||
self.to(self.device)
|
||||
|
||||
def _create_layers(self):
|
||||
"""Create all model layers with current feature dimensions"""
|
||||
# Convolutional layers - use total_features as input channels
|
||||
self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1)
|
||||
self.bn1 = nn.BatchNorm1d(64)
|
||||
|
||||
@ -40,24 +81,49 @@ class CNNModelPyTorch(nn.Module):
|
||||
self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
|
||||
self.bn3 = nn.BatchNorm1d(256)
|
||||
|
||||
# Calculate size after convolutions
|
||||
conv_output_size = window_size * 256
|
||||
# Add price pattern attention layer
|
||||
self.attention = PricePatternAttention(256)
|
||||
|
||||
# Extrema detection specialized convolutional layer
|
||||
self.extrema_conv = nn.Conv1d(256, 128, kernel_size=5, padding=2)
|
||||
self.extrema_bn = nn.BatchNorm1d(128)
|
||||
|
||||
# Calculate size after convolutions - adjusted for attention output
|
||||
conv_output_size = self.window_size * 256
|
||||
|
||||
# Fully connected layers
|
||||
self.fc1 = nn.Linear(conv_output_size, 512)
|
||||
self.fc2 = nn.Linear(512, 256)
|
||||
|
||||
# Advantage and Value streams (Dueling DQN architecture)
|
||||
self.fc3 = nn.Linear(256, output_size) # Advantage stream
|
||||
self.fc3 = nn.Linear(256, self.output_size) # Advantage stream
|
||||
self.value_fc = nn.Linear(256, 1) # Value stream
|
||||
|
||||
# Additional prediction head for extrema detection (tops/bottoms)
|
||||
self.extrema_fc = nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
|
||||
|
||||
# Initialize optimizer and scheduler
|
||||
self.optimizer = optim.Adam(self.parameters(), lr=0.001)
|
||||
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
|
||||
self.optimizer, mode='max', factor=0.5, patience=5, verbose=True
|
||||
)
|
||||
|
||||
def rebuild_conv_layers(self, input_channels):
|
||||
"""
|
||||
Rebuild convolutional layers for different input dimensions
|
||||
|
||||
# Move model to device
|
||||
Args:
|
||||
input_channels: Number of input channels (features) in the data
|
||||
"""
|
||||
logger.info(f"Rebuilding convolutional layers for {input_channels} input channels")
|
||||
|
||||
# Update total features
|
||||
self.total_features = input_channels
|
||||
|
||||
# Recreate all layers with new dimensions
|
||||
self._create_layers()
|
||||
|
||||
# Move layers to device
|
||||
self.to(self.device)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
@ -65,8 +131,13 @@ class CNNModelPyTorch(nn.Module):
|
||||
# Ensure input is on the correct device
|
||||
x = x.to(self.device)
|
||||
|
||||
# Check and handle if input dimensions don't match model expectations
|
||||
batch_size, window_len, feature_dim = x.size()
|
||||
if feature_dim != self.total_features:
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
||||
self.rebuild_conv_layers(feature_dim)
|
||||
|
||||
# Reshape input: [batch, window_size, features] -> [batch, channels, window_size]
|
||||
batch_size = x.size(0)
|
||||
x = x.permute(0, 2, 1)
|
||||
|
||||
# Convolutional layers
|
||||
@ -74,6 +145,26 @@ class CNNModelPyTorch(nn.Module):
|
||||
x = F.relu(self.bn2(self.conv2(x)))
|
||||
x = F.relu(self.bn3(self.conv3(x)))
|
||||
|
||||
# Store conv features for extrema detection
|
||||
conv_features = x
|
||||
|
||||
# Reshape for attention: [batch, channels, window_size] -> [batch, window_size, channels]
|
||||
x_attention = x.permute(0, 2, 1)
|
||||
|
||||
# Apply attention
|
||||
attention_output, attention_weights = self.attention(x_attention)
|
||||
|
||||
# We'll use attention directly without the residual connection
|
||||
# to avoid dimension mismatch issues
|
||||
attention_reshaped = attention_output.permute(0, 2, 1) # [batch, channels, window_size]
|
||||
|
||||
# Apply extrema detection specialized layer
|
||||
extrema_features = F.relu(self.extrema_bn(self.extrema_conv(conv_features)))
|
||||
|
||||
# Use attention features directly instead of residual connection
|
||||
# to avoid dimension mismatches
|
||||
x = conv_features # Just use the convolutional features
|
||||
|
||||
# Flatten
|
||||
x = x.view(batch_size, -1)
|
||||
|
||||
@ -88,7 +179,11 @@ class CNNModelPyTorch(nn.Module):
|
||||
# Combine value and advantage
|
||||
q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
|
||||
|
||||
return q_values, value
|
||||
# Also compute extrema prediction from the same features
|
||||
extrema_flat = extrema_features.view(batch_size, -1)
|
||||
extrema_pred = self.extrema_fc(x) # Use the same features for extrema prediction
|
||||
|
||||
return q_values, extrema_pred
|
||||
|
||||
def predict(self, X):
|
||||
"""Make predictions"""
|
||||
@ -101,11 +196,15 @@ class CNNModelPyTorch(nn.Module):
|
||||
X_tensor = X.to(self.device)
|
||||
|
||||
with torch.no_grad():
|
||||
q_values, value = self(X_tensor)
|
||||
q_values, extrema_pred = self(X_tensor)
|
||||
q_values_np = q_values.cpu().numpy()
|
||||
actions = np.argmax(q_values_np, axis=1)
|
||||
|
||||
return actions, q_values_np
|
||||
# Also return extrema predictions
|
||||
extrema_np = extrema_pred.cpu().numpy()
|
||||
extrema_classes = np.argmax(extrema_np, axis=1)
|
||||
|
||||
return actions, q_values_np, extrema_classes
|
||||
|
||||
def save(self, path: str):
|
||||
"""Save model weights"""
|
||||
|
Reference in New Issue
Block a user