new training process and changes to the models (wip)
This commit is contained in:
@ -44,13 +44,46 @@ class PricePatternAttention(nn.Module):
|
||||
|
||||
return output, attn_weights
|
||||
|
||||
class AdaptiveNorm(nn.Module):
|
||||
"""
|
||||
Adaptive normalization layer that chooses between different normalization
|
||||
methods based on input dimensions
|
||||
"""
|
||||
def __init__(self, num_features):
|
||||
super(AdaptiveNorm, self).__init__()
|
||||
self.batch_norm = nn.BatchNorm1d(num_features, affine=True)
|
||||
self.group_norm = nn.GroupNorm(min(32, num_features), num_features)
|
||||
self.layer_norm = nn.LayerNorm([num_features, 1])
|
||||
|
||||
def forward(self, x):
|
||||
# Check input dimensions
|
||||
batch_size, channels, seq_len = x.size()
|
||||
|
||||
# Choose normalization method:
|
||||
# - Batch size > 1 and seq_len > 1: BatchNorm
|
||||
# - Batch size == 1 or seq_len == 1: GroupNorm
|
||||
# - Fallback for extreme cases: LayerNorm
|
||||
if batch_size > 1 and seq_len > 1:
|
||||
return self.batch_norm(x)
|
||||
elif seq_len > 1:
|
||||
return self.group_norm(x)
|
||||
else:
|
||||
# For 1D inputs (seq_len=1), we need to adjust the layer norm
|
||||
# to the actual input size
|
||||
if not hasattr(self, 'layer_norm_1d') or self.layer_norm_1d.normalized_shape[0] != channels:
|
||||
self.layer_norm_1d = nn.LayerNorm([channels, seq_len]).to(x.device)
|
||||
return self.layer_norm_1d(x)
|
||||
|
||||
class CNNModelPyTorch(nn.Module):
|
||||
"""
|
||||
CNN model for trading with multiple timeframes
|
||||
"""
|
||||
def __init__(self, window_size, num_features, output_size, timeframes):
|
||||
def __init__(self, window_size=20, num_features=5, output_size=3, timeframes=None):
|
||||
super(CNNModelPyTorch, self).__init__()
|
||||
|
||||
if timeframes is None:
|
||||
timeframes = [1]
|
||||
|
||||
self.window_size = window_size
|
||||
self.num_features = num_features
|
||||
self.output_size = output_size
|
||||
@ -73,27 +106,28 @@ class CNNModelPyTorch(nn.Module):
|
||||
"""Create all model layers with current feature dimensions"""
|
||||
# Convolutional layers - use total_features as input channels
|
||||
self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1)
|
||||
self.bn1 = nn.BatchNorm1d(64)
|
||||
self.norm1 = AdaptiveNorm(64)
|
||||
self.dropout1 = nn.Dropout(0.2)
|
||||
|
||||
self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
|
||||
self.bn2 = nn.BatchNorm1d(128)
|
||||
self.norm2 = AdaptiveNorm(128)
|
||||
self.dropout2 = nn.Dropout(0.3)
|
||||
|
||||
self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
|
||||
self.bn3 = nn.BatchNorm1d(256)
|
||||
self.norm3 = AdaptiveNorm(256)
|
||||
self.dropout3 = nn.Dropout(0.4)
|
||||
|
||||
# Add price pattern attention layer
|
||||
self.attention = PricePatternAttention(256)
|
||||
|
||||
# Extrema detection specialized convolutional layer
|
||||
self.extrema_conv = nn.Conv1d(256, 128, kernel_size=5, padding=2)
|
||||
self.extrema_bn = nn.BatchNorm1d(128)
|
||||
self.extrema_conv = nn.Conv1d(256, 128, kernel_size=3, padding=1) # Smaller kernel for small inputs
|
||||
self.extrema_norm = AdaptiveNorm(128)
|
||||
|
||||
# Calculate size after convolutions - adjusted for attention output
|
||||
conv_output_size = self.window_size * 256
|
||||
|
||||
# Fully connected layers
|
||||
self.fc1 = nn.Linear(conv_output_size, 512)
|
||||
# Fully connected layers - input size will be determined dynamically
|
||||
self.fc1 = None # Will be initialized in forward pass
|
||||
self.fc2 = nn.Linear(512, 256)
|
||||
self.dropout_fc = nn.Dropout(0.5)
|
||||
|
||||
# Advantage and Value streams (Dueling DQN architecture)
|
||||
self.fc3 = nn.Linear(256, self.output_size) # Advantage stream
|
||||
@ -131,46 +165,96 @@ class CNNModelPyTorch(nn.Module):
|
||||
# Ensure input is on the correct device
|
||||
x = x.to(self.device)
|
||||
|
||||
# Check and handle if input dimensions don't match model expectations
|
||||
batch_size, window_len, feature_dim = x.size()
|
||||
if feature_dim != self.total_features:
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
||||
self.rebuild_conv_layers(feature_dim)
|
||||
# Check input dimensions and reshape as needed
|
||||
if len(x.size()) == 2:
|
||||
# If input is [batch_size, features], reshape to [batch_size, features, 1]
|
||||
batch_size, feature_dim = x.size()
|
||||
|
||||
# Check and handle if input features don't match model expectations
|
||||
if feature_dim != self.total_features:
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
||||
self.rebuild_conv_layers(feature_dim)
|
||||
|
||||
# For 1D input, use a sequence length of 1
|
||||
seq_len = 1
|
||||
x = x.unsqueeze(2) # Reshape to [batch, features, 1]
|
||||
elif len(x.size()) == 3:
|
||||
# Standard case: [batch_size, window_size, features]
|
||||
batch_size, seq_len, feature_dim = x.size()
|
||||
|
||||
# Check and handle if input dimensions don't match model expectations
|
||||
if feature_dim != self.total_features:
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
||||
self.rebuild_conv_layers(feature_dim)
|
||||
|
||||
# Reshape input: [batch, window_size, features] -> [batch, features, window_size]
|
||||
x = x.permute(0, 2, 1)
|
||||
else:
|
||||
raise ValueError(f"Unexpected input shape: {x.size()}, expected 2D or 3D tensor")
|
||||
|
||||
# Reshape input: [batch, window_size, features] -> [batch, channels, window_size]
|
||||
x = x.permute(0, 2, 1)
|
||||
|
||||
# Convolutional layers
|
||||
x = F.relu(self.bn1(self.conv1(x)))
|
||||
x = F.relu(self.bn2(self.conv2(x)))
|
||||
x = F.relu(self.bn3(self.conv3(x)))
|
||||
# Convolutional layers with dropout - safely handle small spatial dimensions
|
||||
try:
|
||||
x = self.dropout1(F.relu(self.norm1(self.conv1(x))))
|
||||
x = self.dropout2(F.relu(self.norm2(self.conv2(x))))
|
||||
x = self.dropout3(F.relu(self.norm3(self.conv3(x))))
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in convolutional layers: {str(e)}")
|
||||
# Fallback for very small inputs: skip some convolutions
|
||||
if seq_len < 3:
|
||||
# Apply a simpler convolution for very small inputs
|
||||
x = F.relu(self.conv1(x))
|
||||
x = F.relu(self.conv2(x))
|
||||
# Skip last conv if we get dimension errors
|
||||
try:
|
||||
x = F.relu(self.conv3(x))
|
||||
except:
|
||||
pass
|
||||
|
||||
# Store conv features for extrema detection
|
||||
conv_features = x
|
||||
|
||||
# Reshape for attention: [batch, channels, window_size] -> [batch, window_size, channels]
|
||||
x_attention = x.permute(0, 2, 1)
|
||||
# Get the current shape after convolutions
|
||||
_, channels, conv_seq_len = x.size()
|
||||
|
||||
# Apply attention
|
||||
attention_output, attention_weights = self.attention(x_attention)
|
||||
# Initialize fc1 if not created yet or if the shape has changed
|
||||
if self.fc1 is None:
|
||||
flattened_size = channels * conv_seq_len
|
||||
logger.info(f"Initializing fc1 with input size {flattened_size}")
|
||||
self.fc1 = nn.Linear(flattened_size, 512).to(self.device)
|
||||
|
||||
# We'll use attention directly without the residual connection
|
||||
# to avoid dimension mismatch issues
|
||||
attention_reshaped = attention_output.permute(0, 2, 1) # [batch, channels, window_size]
|
||||
# Apply extrema detection safely
|
||||
try:
|
||||
extrema_features = F.relu(self.extrema_norm(self.extrema_conv(conv_features)))
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in extrema detection: {str(e)}")
|
||||
extrema_features = conv_features # Fallback
|
||||
|
||||
# Apply extrema detection specialized layer
|
||||
extrema_features = F.relu(self.extrema_bn(self.extrema_conv(conv_features)))
|
||||
# Handle attention for small sequence lengths
|
||||
if conv_seq_len > 1:
|
||||
# Reshape for attention: [batch, channels, seq_len] -> [batch, seq_len, channels]
|
||||
x_attention = x.permute(0, 2, 1)
|
||||
|
||||
# Apply attention
|
||||
try:
|
||||
attention_output, attention_weights = self.attention(x_attention)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in attention layer: {str(e)}")
|
||||
# Fallback: don't use attention
|
||||
|
||||
# Use attention features directly instead of residual connection
|
||||
# to avoid dimension mismatches
|
||||
x = conv_features # Just use the convolutional features
|
||||
# Flatten - get the actual shape for this batch
|
||||
flattened_size = channels * conv_seq_len
|
||||
x = x.view(batch_size, flattened_size)
|
||||
|
||||
# Flatten
|
||||
x = x.view(batch_size, -1)
|
||||
# Check if we need to recreate fc1 with the correct size
|
||||
if self.fc1.in_features != flattened_size:
|
||||
logger.info(f"Recreating fc1 layer to match input size {flattened_size}")
|
||||
self.fc1 = nn.Linear(flattened_size, 512).to(self.device)
|
||||
# Reinitialize optimizer after changing the model
|
||||
self.optimizer = optim.Adam(self.parameters(), lr=0.001)
|
||||
|
||||
# Fully connected layers
|
||||
# Fully connected layers with dropout
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = self.dropout_fc(F.relu(self.fc2(x)))
|
||||
|
||||
# Split into advantage and value streams
|
||||
advantage = self.fc3(x)
|
||||
|
Reference in New Issue
Block a user