working training

2025-03-29 02:18:25 +02:00
parent 0b2000e3e7
commit 2255a8363a
4 changed files with 314 additions and 154 deletions
--- a/NN/models/cnn_model_pytorch.py
+++ b/NN/models/cnn_model_pytorch.py
@@ -24,60 +24,67 @@ logger = logging.getLogger(__name__)
 class CNNPyTorch(nn.Module):
    """PyTorch CNN model for time series analysis"""
    
-    def __init__(self, input_shape, output_size=3):
+    def __init__(self, input_shape, output_size=5):
        """
-        Initialize the CNN model.
+        Initialize the enhanced CNN model.
        
        Args:
            input_shape (tuple): Shape of input data (window_size, features)
-            output_size (int): Size of output (1 for regression, 3 for classification)
+            output_size (int): Always 5 for our trading signals
        """
        super(CNNPyTorch, self).__init__()
        
        window_size, num_features = input_shape
-        
-        # Architecture parameters
-        filters = [32, 64, 128]
-        kernel_sizes = [3, 5, 7]
-        lstm_units = 100
-        dense_units = 64
+        kernel_size = 5
        dropout_rate = 0.3
        
-        # Create parallel convolutional pathways
-        self.conv_paths = nn.ModuleList()
-        
-        for f, k in zip(filters, kernel_sizes):
-            path = nn.Sequential(
-                nn.Conv1d(num_features, f, kernel_size=k, padding='same'),
-                nn.ReLU(),
-                nn.BatchNorm1d(f),
-                nn.MaxPool1d(kernel_size=2, stride=1, padding=1),
-                nn.Dropout(dropout_rate)
-            )
-            self.conv_paths.append(path)
-        
-        # Calculate output size from conv paths
-        conv_output_size = sum(filters) * window_size
-        
-        # LSTM layer
-        self.lstm = nn.LSTM(
-            input_size=sum(filters),
-            hidden_size=lstm_units,
-            batch_first=True,
-            bidirectional=True
-        )
-        
-        # Dense layers
-        self.flatten = nn.Flatten()
-        self.dense1 = nn.Sequential(
-            nn.Linear(lstm_units * 2 * window_size, dense_units),
+        # Enhanced CNN Architecture
+        self.conv_layers = nn.Sequential(
+            # Block 1
+            nn.Conv1d(num_features, 64, kernel_size, padding='same'),
+            nn.BatchNorm1d(64),
            nn.ReLU(),
-            nn.BatchNorm1d(dense_units),
-            nn.Dropout(dropout_rate)
+            
+            # Block 2  
+            nn.Conv1d(64, 128, kernel_size, padding='same'),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            nn.MaxPool1d(2),
+            
+            # Block 3
+            nn.Conv1d(128, 256, kernel_size, padding='same'),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            
+            # Block 4
+            nn.Conv1d(256, 512, kernel_size, padding='same'),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.MaxPool1d(2)
        )
        
-        # Output layer
-        self.output = nn.Linear(dense_units, output_size)
+        # Calculate flattened size after conv and pooling
+        conv_output_size = 512 * (window_size // 4)
+        
+        # Enhanced dense layers
+        self.dense_block = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(conv_output_size, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(dropout_rate),
+            
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(dropout_rate),
+            
+            nn.Linear(256, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            
+            nn.Linear(128, output_size)
+        )
        
        # Activation based on output size
        if output_size == 1:
@@ -89,7 +96,7 @@ class CNNPyTorch(nn.Module):
    
    def forward(self, x):
        """
-        Forward pass through the network.
+        Forward pass through enhanced network.
        
        Args:
            x: Input tensor of shape [batch_size, window_size, features]
@@ -97,35 +104,15 @@ class CNNPyTorch(nn.Module):
        Returns:
            Output tensor of shape [batch_size, output_size]
        """
-        batch_size, window_size, num_features = x.shape
-        
        # Transpose for conv1d: [batch, features, window]
        x_t = x.transpose(1, 2)
        
-        # Process through parallel conv paths
-        conv_outputs = []
-        for path in self.conv_paths:
-            conv_outputs.append(path(x_t))
+        # Process through all CNN layers
+        conv_out = self.conv_layers(x_t)
        
-        # Concatenate conv outputs
-        conv_concat = torch.cat(conv_outputs, dim=1)
+        # Process through dense layers
+        output = self.dense_block(conv_out)
        
-        # Transpose back for LSTM: [batch, window, features]
-        conv_concat = conv_concat.transpose(1, 2)
-        
-        # LSTM processing
-        lstm_out, _ = self.lstm(conv_concat)
-        
-        # Flatten
-        flattened = self.flatten(lstm_out)
-        
-        # Dense processing
-        dense_out = self.dense1(flattened)
-        
-        # Output
-        output = self.output(dense_out)
-        
-        # Apply activation
        return self.activation(output)


@@ -137,7 +124,7 @@ class CNNModelPyTorch:
    predictions with the CNN model.
    """
    
-    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
+    def __init__(self, window_size, num_features, output_size=5, timeframes=None):
        """
        Initialize the CNN model.
        
@@ -506,41 +493,27 @@ class CNNModelPyTorch:
    
    def extract_hidden_features(self, X):
        """
-        Extract hidden features from the model.
+        Extract hidden features from the model - outputs from last dense layer before output.
        
        Args:
            X: Input data
            
        Returns:
-            Hidden features
+            Hidden features (output from penultimate dense layer)
        """
        # Convert to PyTorch tensor
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        
-        # Forward pass through the model up to the last hidden layer
+        # Forward pass through the model
        self.model.eval()
        with torch.no_grad():
-            # Get features before the output layer
+            # Get features through CNN layers
            x_t = X_tensor.transpose(1, 2)
+            conv_out = self.model.conv_layers(x_t)
            
-            # Process through parallel conv paths
-            conv_outputs = []
-            for path in self.model.conv_paths:
-                conv_outputs.append(path(x_t))
-            
-            # Concatenate conv outputs
-            conv_concat = torch.cat(conv_outputs, dim=1)
-            
-            # Transpose back for LSTM
-            conv_concat = conv_concat.transpose(1, 2)
-            
-            # LSTM processing
-            lstm_out, _ = self.model.lstm(conv_concat)
-            
-            # Flatten
-            flattened = self.model.flatten(lstm_out)
-            
-            # Dense processing
-            hidden_features = self.model.dense1(flattened)
+            # Process through all dense layers except the output layer
+            features = conv_out
+            for layer in self.model.dense_block[:-2]:  # Exclude last linear layer and dropout
+                features = layer(features)
        
-        return hidden_features.cpu().numpy() 
+        return features.cpu().numpy()