misc

2025-05-13 17:19:52 +03:00
parent 7dda00b64a
commit c0872248ab
60 changed files with 42085 additions and 6885 deletions
--- a/NN/models/simple_cnn.py
+++ b/NN/models/simple_cnn.py
@ -112,27 +112,33 @@ class SimpleCNN(nn.Module):
    def _build_network(self):
        """Build the neural network with current feature dimensions"""
        # Create a flexible architecture that adapts to input dimensions
+        # Increased complexity
        self.fc_layers = nn.Sequential(
-            nn.Linear(self.feature_dim, 256),
+            nn.Linear(self.feature_dim, 512), # Increased size
            nn.ReLU(),
-            nn.Linear(256, 256),
-            nn.ReLU()
+            nn.Dropout(0.2), # Added dropout
+            nn.Linear(512, 512),             # Increased size
+            nn.ReLU(),
+            nn.Dropout(0.2),             # Added dropout
+            nn.Linear(512, 512),             # Added layer
+            nn.ReLU(),
+            nn.Dropout(0.2)              # Added dropout
        )
        
        # Output heads (Dueling DQN architecture)
-        self.advantage_head = nn.Linear(256, self.n_actions)
-        self.value_head = nn.Linear(256, 1)
+        self.advantage_head = nn.Linear(512, self.n_actions) # Updated input size
+        self.value_head = nn.Linear(512, 1)                 # Updated input size
        
        # Extrema detection head
-        self.extrema_head = nn.Linear(256, 3)  # 0=bottom, 1=top, 2=neither
+        self.extrema_head = nn.Linear(512, 3)  # 0=bottom, 1=top, 2=neither, Updated input size
        
        # Price prediction heads for different timeframes
-        self.price_pred_immediate = nn.Linear(256, 3)  # Up, Down, Sideways for immediate term (1s, 1m)
-        self.price_pred_midterm = nn.Linear(256, 3)    # Up, Down, Sideways for mid-term (1h)
-        self.price_pred_longterm = nn.Linear(256, 3)   # Up, Down, Sideways for long-term (1d)
+        self.price_pred_immediate = nn.Linear(512, 3)  # Updated input size
+        self.price_pred_midterm = nn.Linear(512, 3)    # Updated input size
+        self.price_pred_longterm = nn.Linear(512, 3)   # Updated input size
        
        # Regression heads for exact price prediction
-        self.price_pred_value = nn.Linear(256, 4)  # Predicts % change for each timeframe (1s, 1m, 1h, 1d)
+        self.price_pred_value = nn.Linear(512, 4)  # Updated input size
    
    def _check_rebuild_network(self, features):
        """Check if network needs to be rebuilt for different feature dimensions"""
@ -146,58 +152,70 @@ class SimpleCNN(nn.Module):
        return False
        
    def forward(self, x):
-        """
-        Forward pass through the network
-        Returns action values, extrema predictions, and price movement predictions for multiple timeframes
-        """
-        # Handle different input shapes
-        if len(x.shape) == 2:  # [batch_size, features]
-            # Simple feature vector
-            batch_size, features = x.shape
-            # Check if we need to rebuild the network for new dimensions
-            self._check_rebuild_network(features)
-            
-        elif len(x.shape) == 3:  # [batch_size, timeframes/channels, features]
-            # Reshape to flatten timeframes/channels with features
-            batch_size, timeframes, features = x.shape
-            total_features = timeframes * features
-            
-            # Check if we need to rebuild the network for new dimensions
-            self._check_rebuild_network(total_features)
-            
-            # Reshape tensor to [batch_size, total_features]
-            x = x.reshape(batch_size, total_features)
-            
-        # Apply fully connected layers
-        fc_out = self.fc_layers(x)
+        """Forward pass through the network"""
+        # Flatten input if needed to ensure it matches the expected feature dimension
+        batch_size = x.size(0)
        
-        # Dueling architecture
-        advantage = self.advantage_head(fc_out)
-        value = self.value_head(fc_out)
+        # Reshape input if needed
+        if len(x.shape) > 2:  # Handle multi-dimensional input
+            # For 3D input: [batch, seq_len, features] or [batch, channels, features]
+            x = x.reshape(batch_size, -1)  # Flatten to [batch, seq_len*features]
        
-        # Q-values = value + (advantage - mean(advantage))
-        action_values = value + advantage - advantage.mean(dim=1, keepdim=True)
+        # Check if the feature dimension matches and rebuild if necessary
+        if x.size(1) != self.feature_dim:
+            self._check_rebuild_network(x.size(1))
        
-        # Extrema predictions
-        extrema_pred = self.extrema_head(fc_out)
+        # Apply fully connected layers with ReLU activation
+        x = self.fc_layers(x)
        
-        # Price movement predictions for different timeframes
-        price_immediate = self.price_pred_immediate(fc_out)  # 1s, 1m
-        price_midterm = self.price_pred_midterm(fc_out)      # 1h
-        price_longterm = self.price_pred_longterm(fc_out)    # 1d
+        # Branch 1: Action values (Q-values)
+        action_values = self.advantage_head(x)
        
-        # Regression values for exact price predictions (percentage changes)
-        price_values = self.price_pred_value(fc_out)
+        # Branch 2: Extrema detection (market top/bottom classification)
+        extrema_pred = self.extrema_head(x)
        
-        # Return all predictions in a structured dictionary
+        # Branch 3: Price movement prediction over different timeframes
+        # Split into three timeframes: immediate, midterm, longterm
+        price_immediate = self.price_pred_immediate(x)
+        price_midterm = self.price_pred_midterm(x)
+        price_longterm = self.price_pred_longterm(x)
+        
+        # Branch 4: Value prediction (regression for expected price changes)
+        price_values = self.price_pred_value(x)
+        
+        # Package price predictions
        price_predictions = {
-            'immediate': price_immediate,
-            'midterm': price_midterm,
-            'longterm': price_longterm,
-            'values': price_values
+            'immediate': price_immediate,  # Classification (up/down/sideways)
+            'midterm': price_midterm,      # Classification (up/down/sideways)
+            'longterm': price_longterm,    # Classification (up/down/sideways)
+            'values': price_values         # Regression (expected % change)
        }
        
-        return action_values, extrema_pred, price_predictions
+        # Return all outputs and the hidden feature representation
+        return action_values, extrema_pred, price_predictions, x
+    
+    def extract_features(self, x):
+        """Extract hidden features from the input and return both action values and features"""
+        # Flatten input if needed to ensure it matches the expected feature dimension
+        batch_size = x.size(0)
+        
+        # Reshape input if needed
+        if len(x.shape) > 2:  # Handle multi-dimensional input
+            # For 3D input: [batch, seq_len, features] or [batch, channels, features]
+            x = x.reshape(batch_size, -1)  # Flatten to [batch, seq_len*features]
+        
+        # Check if the feature dimension matches and rebuild if necessary
+        if x.size(1) != self.feature_dim:
+            self._check_rebuild_network(x.size(1))
+            
+        # Apply fully connected layers with ReLU activation
+        x_features = self.fc_layers(x)
+        
+        # Branch 1: Action values (Q-values)
+        action_values = self.advantage_head(x_features)
+        
+        # Return action values and the hidden feature representation
+        return action_values, x_features
        
    def save(self, path):
        """Save model weights and architecture"""
@ -241,8 +259,10 @@ class CNNModelPyTorch(nn.Module):
        self.output_size = output_size
        self.timeframes = timeframes
        
-        # Calculate total input features across all timeframes
-        self.total_features = num_features * len(timeframes)
+        # num_features should already be the total features across all timeframes
+        self.total_features = num_features
+        logger.info(f"CNNModelPyTorch initialized with window_size={window_size}, num_features={num_features}, "
+                   f"total_features={self.total_features}, output_size={output_size}, timeframes={timeframes}")
        
        # Device configuration
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@ -317,6 +337,10 @@ class CNNModelPyTorch(nn.Module):
        # Ensure input is on the correct device
        x = x.to(self.device)
        
+        # Log input tensor shape for debugging
+        input_shape = x.size()
+        logger.debug(f"Input tensor shape: {input_shape}")
+        
        # Check input dimensions and reshape as needed
        if len(x.size()) == 2:
            # If input is [batch_size, features], reshape to [batch_size, features, 1]
@ -324,8 +348,17 @@ class CNNModelPyTorch(nn.Module):
            
            # Check and handle if input features don't match model expectations
            if feature_dim != self.total_features:
-                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
-                self.rebuild_conv_layers(feature_dim)
+                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
+                if not hasattr(self, 'rebuild_warning_shown'):
+                    logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
+                    self.rebuild_warning_shown = True
+                # Don't rebuild - instead adapt the input
+                # If features are fewer, pad with zeros. If more, truncate
+                if feature_dim < self.total_features:
+                    padding = torch.zeros(batch_size, self.total_features - feature_dim, device=self.device)
+                    x = torch.cat([x, padding], dim=1)
+                else:
+                    x = x[:, :self.total_features]
            
            # For 1D input, use a sequence length of 1
            seq_len = 1
@ -336,14 +369,26 @@ class CNNModelPyTorch(nn.Module):
            
            # Check and handle if input dimensions don't match model expectations
            if feature_dim != self.total_features:
-                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
-                self.rebuild_conv_layers(feature_dim)
+                logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features})")
+                if not hasattr(self, 'rebuild_warning_shown'):
+                    logger.error(f"Dimension mismatch: Expected {self.total_features} features but got {feature_dim}")
+                    self.rebuild_warning_shown = True
+                # Don't rebuild - instead adapt the input
+                # If features are fewer, pad with zeros. If more, truncate
+                if feature_dim < self.total_features:
+                    padding = torch.zeros(batch_size, seq_len, self.total_features - feature_dim, device=self.device)
+                    x = torch.cat([x, padding], dim=2)
+                else:
+                    x = x[:, :, :self.total_features]
            
            # Reshape input: [batch, window_size, features] -> [batch, features, window_size]
            x = x.permute(0, 2, 1)
        else:
            raise ValueError(f"Unexpected input shape: {x.size()}, expected 2D or 3D tensor")
        
+        # Log reshaped tensor for debugging
+        logger.debug(f"Reshaped tensor for convolution: {x.size()}")
+        
        # Convolutional layers with dropout - safely handle small spatial dimensions
        try:
            x = self.dropout1(F.relu(self.norm1(self.conv1(x))))