misc
This commit is contained in:
@ -112,27 +112,33 @@ class SimpleCNN(nn.Module):
|
||||
def _build_network(self):
|
||||
"""Build the neural network with current feature dimensions"""
|
||||
# Create a flexible architecture that adapts to input dimensions
|
||||
# Increased complexity
|
||||
self.fc_layers = nn.Sequential(
|
||||
nn.Linear(self.feature_dim, 256),
|
||||
nn.Linear(self.feature_dim, 512), # Increased size
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 256),
|
||||
nn.ReLU()
|
||||
nn.Dropout(0.2), # Added dropout
|
||||
nn.Linear(512, 512), # Increased size
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2), # Added dropout
|
||||
nn.Linear(512, 512), # Added layer
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2) # Added dropout
|
||||
)
|
||||
|
||||
# Output heads (Dueling DQN architecture)
|
||||
self.advantage_head = nn.Linear(256, self.n_actions)
|
||||
self.value_head = nn.Linear(256, 1)
|
||||
self.advantage_head = nn.Linear(512, self.n_actions) # Updated input size
|
||||
self.value_head = nn.Linear(512, 1) # Updated input size
|
||||
|
||||
# Extrema detection head
|
||||
self.extrema_head = nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
|
||||
self.extrema_head = nn.Linear(512, 3) # 0=bottom, 1=top, 2=neither, Updated input size
|
||||
|
||||
# Price prediction heads for different timeframes
|
||||
self.price_pred_immediate = nn.Linear(256, 3) # Up, Down, Sideways for immediate term (1s, 1m)
|
||||
self.price_pred_midterm = nn.Linear(256, 3) # Up, Down, Sideways for mid-term (1h)
|
||||
self.price_pred_longterm = nn.Linear(256, 3) # Up, Down, Sideways for long-term (1d)
|
||||
self.price_pred_immediate = nn.Linear(512, 3) # Updated input size
|
||||
self.price_pred_midterm = nn.Linear(512, 3) # Updated input size
|
||||
self.price_pred_longterm = nn.Linear(512, 3) # Updated input size
|
||||
|
||||
# Regression heads for exact price prediction
|
||||
self.price_pred_value = nn.Linear(256, 4) # Predicts % change for each timeframe (1s, 1m, 1h, 1d)
|
||||
self.price_pred_value = nn.Linear(512, 4) # Updated input size
|
||||
|
||||
def _check_rebuild_network(self, features):
|
||||
"""Check if network needs to be rebuilt for different feature dimensions"""
|
||||
@ -146,58 +152,70 @@ class SimpleCNN(nn.Module):
|
||||
return False
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through the network
|
||||
Returns action values, extrema predictions, and price movement predictions for multiple timeframes
|
||||
"""
|
||||
# Handle different input shapes
|
||||
if len(x.shape) == 2: # [batch_size, features]
|
||||
# Simple feature vector
|
||||
batch_size, features = x.shape
|
||||
# Check if we need to rebuild the network for new dimensions
|
||||
self._check_rebuild_network(features)
|
||||
|
||||
elif len(x.shape) == 3: # [batch_size, timeframes/channels, features]
|
||||
# Reshape to flatten timeframes/channels with features
|
||||
batch_size, timeframes, features = x.shape
|
||||
total_features = timeframes * features
|
||||
|
||||
# Check if we need to rebuild the network for new dimensions
|
||||
self._check_rebuild_network(total_features)
|
||||
|
||||
# Reshape tensor to [batch_size, total_features]
|
||||
x = x.reshape(batch_size, total_features)
|
||||
|
||||
# Apply fully connected layers
|
||||
fc_out = self.fc_layers(x)
|
||||
"""Forward pass through the network"""
|
||||
# Flatten input if needed to ensure it matches the expected feature dimension
|
||||
batch_size = x.size(0)
|
||||
|
||||
# Dueling architecture
|
||||
advantage = self.advantage_head(fc_out)
|
||||
value = self.value_head(fc_out)
|
||||
# Reshape input if needed
|
||||
if len(x.shape) > 2: # Handle multi-dimensional input
|
||||
# For 3D input: [batch, seq_len, features] or [batch, channels, features]
|
||||
x = x.reshape(batch_size, -1) # Flatten to [batch, seq_len*features]
|
||||
|
||||
# Q-values = value + (advantage - mean(advantage))
|
||||
action_values = value + advantage - advantage.mean(dim=1, keepdim=True)
|
||||
# Check if the feature dimension matches and rebuild if necessary
|
||||
if x.size(1) != self.feature_dim:
|
||||
self._check_rebuild_network(x.size(1))
|
||||
|
||||
# Extrema predictions
|
||||
extrema_pred = self.extrema_head(fc_out)
|
||||
# Apply fully connected layers with ReLU activation
|
||||
x = self.fc_layers(x)
|
||||
|
||||
# Price movement predictions for different timeframes
|
||||
price_immediate = self.price_pred_immediate(fc_out) # 1s, 1m
|
||||
price_midterm = self.price_pred_midterm(fc_out) # 1h
|
||||
price_longterm = self.price_pred_longterm(fc_out) # 1d
|
||||
# Branch 1: Action values (Q-values)
|
||||
action_values = self.advantage_head(x)
|
||||
|
||||
# Regression values for exact price predictions (percentage changes)
|
||||
price_values = self.price_pred_value(fc_out)
|
||||
# Branch 2: Extrema detection (market top/bottom classification)
|
||||
extrema_pred = self.extrema_head(x)
|
||||
|
||||
# Return all predictions in a structured dictionary
|
||||
# Branch 3: Price movement prediction over different timeframes
|
||||
# Split into three timeframes: immediate, midterm, longterm
|
||||
price_immediate = self.price_pred_immediate(x)
|
||||
price_midterm = self.price_pred_midterm(x)
|
||||
price_longterm = self.price_pred_longterm(x)
|
||||
|
||||
# Branch 4: Value prediction (regression for expected price changes)
|
||||
price_values = self.price_pred_value(x)
|
||||
|
||||
# Package price predictions
|
||||
price_predictions = {
|
||||
'immediate': price_immediate,
|
||||
'midterm': price_midterm,
|
||||
'longterm': price_longterm,
|
||||
'values': price_values
|
||||
'immediate': price_immediate, # Classification (up/down/sideways)
|
||||
'midterm': price_midterm, # Classification (up/down/sideways)
|
||||
'longterm': price_longterm, # Classification (up/down/sideways)
|
||||
'values': price_values # Regression (expected % change)
|
||||
}
|
||||
|
||||
return action_values, extrema_pred, price_predictions
|
||||
# Return all outputs and the hidden feature representation
|
||||
return action_values, extrema_pred, price_predictions, x
|
||||
|
||||
def extract_features(self, x):
|
||||
"""Extract hidden features from the input and return both action values and features"""
|
||||
# Flatten input if needed to ensure it matches the expected feature dimension
|
||||
batch_size = x.size(0)
|
||||
|
||||
# Reshape input if needed
|
||||
if len(x.shape) > 2: # Handle multi-dimensional input
|
||||
# For 3D input: [batch, seq_len, features] or [batch, channels, features]
|
||||
x = x.reshape(batch_size, -1) # Flatten to [batch, seq_len*features]
|
||||
|
||||
# Check if the feature dimension matches and rebuild if necessary
|
||||
if x.size(1) != self.feature_dim:
|
||||
self._check_rebuild_network(x.size(1))
|
||||
|
||||
# Apply fully connected layers with ReLU activation
|
||||
x_features = self.fc_layers(x)
|
||||
|
||||
# Branch 1: Action values (Q-values)
|
||||
action_values = self.advantage_head(x_features)
|
||||
|
||||
# Return action values and the hidden feature representation
|
||||
return action_values, x_features
|
||||
|
||||
def save(self, path):
|
||||
"""Save model weights and architecture"""
|
||||
@ -241,8 +259,10 @@ class CNNModelPyTorch(nn.Module):
|
||||
self.output_size = output_size
|
||||
self.timeframes = timeframes
|
||||
|
||||
# Calculate total input features across all timeframes
|
||||
self.total_features = num_features * len(timeframes)
|
||||
# num_features should already be the total features across all timeframes
|
||||
self.total_features = num_features
|
||||
logger.info(f"CNNModelPyTorch initialized with window_size={window_size}, num_features={num_features}, "
|
||||
f"total_features={self.total_features}, output_size={output_size}, timeframes={timeframes}")
|
||||
|
||||
# Device configuration
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
@ -317,6 +337,10 @@ class CNNModelPyTorch(nn.Module):
|
||||
# Ensure input is on the correct device
|
||||
x = x.to(self.device)
|
||||
|
||||
# Log input tensor shape for debugging
|
||||
input_shape = x.size()
|
||||
logger.debug(f"Input tensor shape: {input_shape}")
|
||||
|
||||
# Check input dimensions and reshape as needed
|
||||
if len(x.size()) == 2:
|
||||
# If input is [batch_size, features], reshape to [batch_size, features, 1]
|
||||
@ -324,8 +348,17 @@ class CNNModelPyTorch(nn.Module):
|
||||
|
||||
# Check and handle if input features don't match model expectations
|
||||
if feature_dim != self.total_features:
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
||||
self.rebuild_conv_layers(feature_dim)
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
|
||||
if not hasattr(self, 'rebuild_warning_shown'):
|
||||
logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
|
||||
self.rebuild_warning_shown = True
|
||||
# Don't rebuild - instead adapt the input
|
||||
# If features are fewer, pad with zeros. If more, truncate
|
||||
if feature_dim < self.total_features:
|
||||
padding = torch.zeros(batch_size, self.total_features - feature_dim, device=self.device)
|
||||
x = torch.cat([x, padding], dim=1)
|
||||
else:
|
||||
x = x[:, :self.total_features]
|
||||
|
||||
# For 1D input, use a sequence length of 1
|
||||
seq_len = 1
|
||||
@ -336,14 +369,26 @@ class CNNModelPyTorch(nn.Module):
|
||||
|
||||
# Check and handle if input dimensions don't match model expectations
|
||||
if feature_dim != self.total_features:
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
|
||||
self.rebuild_conv_layers(feature_dim)
|
||||
logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
|
||||
if not hasattr(self, 'rebuild_warning_shown'):
|
||||
logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
|
||||
self.rebuild_warning_shown = True
|
||||
# Don't rebuild - instead adapt the input
|
||||
# If features are fewer, pad with zeros. If more, truncate
|
||||
if feature_dim < self.total_features:
|
||||
padding = torch.zeros(batch_size, seq_len, self.total_features - feature_dim, device=self.device)
|
||||
x = torch.cat([x, padding], dim=2)
|
||||
else:
|
||||
x = x[:, :, :self.total_features]
|
||||
|
||||
# Reshape input: [batch, window_size, features] -> [batch, features, window_size]
|
||||
x = x.permute(0, 2, 1)
|
||||
else:
|
||||
raise ValueError(f"Unexpected input shape: {x.size()}, expected 2D or 3D tensor")
|
||||
|
||||
# Log reshaped tensor for debugging
|
||||
logger.debug(f"Reshaped tensor for convolution: {x.size()}")
|
||||
|
||||
# Convolutional layers with dropout - safely handle small spatial dimensions
|
||||
try:
|
||||
x = self.dropout1(F.relu(self.norm1(self.conv1(x))))
|
||||
|
Reference in New Issue
Block a user