training

2025-03-29 04:09:03 +02:00
parent 43803caaf1
commit 8b3db10a85
3 changed files with 307 additions and 267 deletions
--- a/NN/models/cnn_model_pytorch.py
+++ b/NN/models/cnn_model_pytorch.py
@@ -11,6 +11,7 @@ import logging
 import numpy as np
 import matplotlib.pyplot as plt
 from datetime import datetime
+import math

 import torch
 import torch.nn as nn
@@ -24,79 +25,84 @@ logger = logging.getLogger(__name__)
 class CNNPyTorch(nn.Module):
    """PyTorch CNN model for time series analysis"""
    
-    def __init__(self, input_shape, output_size=5):
+    def __init__(self, input_shape, output_size=3):
        """
-        Initialize the enhanced CNN model.
+        Initialize the CNN model.
        
        Args:
            input_shape (tuple): Shape of input data (window_size, features)
-            output_size (int): Always 5 for our trading signals
+            output_size (int): Size of the output (3 for BUY/HOLD/SELL)
        """
        super(CNNPyTorch, self).__init__()
        
        window_size, num_features = input_shape
-        kernel_size = 5
+        kernel_size = min(5, window_size)  # Ensure kernel size doesn't exceed window size
        dropout_rate = 0.3
        
-        # Enhanced CNN Architecture
+        # Calculate initial channel size based on number of features
+        initial_channels = max(32, num_features * 2)  # Scale channels with features
+        
+        # CNN Architecture
        self.conv_layers = nn.Sequential(
            # Block 1
-            nn.Conv1d(num_features, 64, kernel_size, padding='same'),
-            nn.BatchNorm1d(64),
+            nn.Conv1d(num_features, initial_channels, kernel_size, padding='same'),
+            nn.BatchNorm1d(initial_channels),
            nn.ReLU(),
+            nn.Dropout(dropout_rate),
            
            # Block 2  
-            nn.Conv1d(64, 128, kernel_size, padding='same'),
-            nn.BatchNorm1d(128),
+            nn.Conv1d(initial_channels, initial_channels * 2, kernel_size, padding='same'),
+            nn.BatchNorm1d(initial_channels * 2),
            nn.ReLU(),
            nn.MaxPool1d(2),
+            nn.Dropout(dropout_rate),
            
            # Block 3
-            nn.Conv1d(128, 256, kernel_size, padding='same'),
-            nn.BatchNorm1d(256),
+            nn.Conv1d(initial_channels * 2, initial_channels * 4, kernel_size, padding='same'),
+            nn.BatchNorm1d(initial_channels * 4),
            nn.ReLU(),
+            nn.Dropout(dropout_rate),
            
            # Block 4
-            nn.Conv1d(256, 512, kernel_size, padding='same'),
-            nn.BatchNorm1d(512),
+            nn.Conv1d(initial_channels * 4, initial_channels * 8, kernel_size, padding='same'),
+            nn.BatchNorm1d(initial_channels * 8),
            nn.ReLU(),
-            nn.MaxPool1d(2)
+            nn.MaxPool1d(2),
+            nn.Dropout(dropout_rate)
        )
        
        # Calculate flattened size after conv and pooling
-        conv_output_size = 512 * (window_size // 4)
+        conv_output_size = (initial_channels * 8) * (window_size // 4)
+        
+        # Dense layers with scaled sizes
+        dense_size = min(2048, conv_output_size)  # Cap dense layer size
        
-        # Enhanced dense layers
        self.dense_block = nn.Sequential(
            nn.Flatten(),
-            nn.Linear(conv_output_size, 512),
-            nn.BatchNorm1d(512),
+            nn.Linear(conv_output_size, dense_size),
+            nn.BatchNorm1d(dense_size),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
-            nn.Linear(512, 256),
-            nn.BatchNorm1d(256),
+            nn.Linear(dense_size, dense_size // 2),
+            nn.BatchNorm1d(dense_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
-            nn.Linear(256, 128),
-            nn.BatchNorm1d(128),
+            nn.Linear(dense_size // 2, dense_size // 4),
+            nn.BatchNorm1d(dense_size // 4),
            nn.ReLU(),
+            nn.Dropout(dropout_rate),
            
-            nn.Linear(128, output_size)
+            nn.Linear(dense_size // 4, output_size)
        )
        
-        # Activation based on output size
-        if output_size == 1:
-            self.activation = nn.Sigmoid()  # Binary classification or regression
-        elif output_size > 1:
-            self.activation = nn.Softmax(dim=1)  # Multi-class classification
-        else:
-            self.activation = nn.Identity()  # No activation
+        # Activation for output
+        self.activation = nn.Softmax(dim=1)
    
    def forward(self, x):
        """
-        Forward pass through enhanced network.
+        Forward pass through the network.
        
        Args:
            x: Input tensor of shape [batch_size, window_size, features]
@@ -107,14 +113,16 @@ class CNNPyTorch(nn.Module):
        # Transpose for conv1d: [batch, features, window]
        x_t = x.transpose(1, 2)
        
-        # Process through all CNN layers
+        # Process through CNN layers
        conv_out = self.conv_layers(x_t)
        
        # Process through dense layers
-        output = self.dense_block(conv_out)
+        dense_out = self.dense_block(conv_out)
        
-        return self.activation(output)
-
+        # Apply activation
+        output = self.activation(dense_out)
+        
+        return output

 class CNNModelPyTorch:
    """
@@ -124,14 +132,14 @@ class CNNModelPyTorch:
    predictions with the CNN model.
    """
    
-    def __init__(self, window_size, num_features, output_size=5, timeframes=None):
+    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
        """
        Initialize the CNN model.
        
        Args:
            window_size (int): Size of the input window
            num_features (int): Number of features in the input data
-            output_size (int): Size of the output (1 for regression, 3 for classification)
+            output_size (int): Size of the output (default: 3 for BUY/HOLD/SELL)
            timeframes (list): List of timeframes used (for logging)
        """
        # Action tracking
@@ -171,27 +179,23 @@ class CNNModelPyTorch:
            output_size=self.output_size
        ).to(self.device)
        
-        # Initialize optimizer
+        # Initialize optimizer with learning rate schedule
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
+        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
+            self.optimizer, mode='max', factor=0.5, patience=10, verbose=True
+        )
        
-        # Initialize loss function based on output size
-        if self.output_size == 1:
-            self.criterion = nn.BCELoss()  # Binary classification
-        elif self.output_size > 1:
-            self.criterion = nn.CrossEntropyLoss()  # Multi-class classification
-        else:
-            self.criterion = nn.MSELoss()  # Regression
+        # Initialize loss function with class weights
+        class_weights = torch.tensor([1.0, 0.5, 1.0]).to(self.device)  # Lower weight for HOLD
+        self.criterion = nn.CrossEntropyLoss(weight=class_weights)
        
        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
    
-    def train_epoch(self, X_train, y_train, batch_size=32):
+    def train_epoch(self, X_train, y_train, future_prices=None, batch_size=32):
        """Train for one epoch and return loss and accuracy"""
        # Convert to PyTorch tensors
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
-        if self.output_size == 1:
-            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
-        else:
-            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
+        y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
        
        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
@@ -210,40 +214,44 @@ class CNNModelPyTorch:
            outputs = self.model(inputs)
            
            # Calculate loss
-            if self.output_size == 1:
-                loss = self.criterion(outputs, targets.unsqueeze(1))
-            else:
-                loss = self.criterion(outputs, targets)
+            loss = self.criterion(outputs, targets)
            
            # Backward pass and optimize
            loss.backward()
+            
+            # Clip gradients to prevent exploding gradients
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+            
            self.optimizer.step()
            
            # Statistics
            running_loss += loss.item()
-            if self.output_size > 1:
-                _, predicted = torch.max(outputs, 1)
-                total += targets.size(0)
-                correct += (predicted == targets).sum().item()
+            _, predicted = torch.max(outputs, 1)
+            total += targets.size(0)
+            correct += (predicted == targets).sum().item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total if total > 0 else 0
        
-        return epoch_loss, epoch_acc
+        # Update learning rate scheduler
+        self.scheduler.step(epoch_acc)
+        
+        # To maintain compatibility with the updated training code, we'll return 3 values
+        # But the price_loss will be zero since we're not using that in this model
+        return epoch_loss, 0.0, epoch_acc

-    def evaluate(self, X_val, y_val):
+    def evaluate(self, X_val, y_val, future_prices=None):
        """Evaluate on validation data and return loss and accuracy"""
+        # Convert to PyTorch tensors
        X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
-        if self.output_size == 1:
-            y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
-        else:
-            y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
-            
+        y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
+        
+        # Create DataLoader
        val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
        val_loader = DataLoader(val_dataset, batch_size=32)
        
        self.model.eval()
-        val_loss = 0.0
+        running_loss = 0.0
        correct = 0
        total = 0
        
@@ -253,20 +261,20 @@ class CNNModelPyTorch:
                outputs = self.model(inputs)
                
                # Calculate loss
-                if self.output_size == 1:
-                    loss = self.criterion(outputs, targets.unsqueeze(1))
-                else:
-                    loss = self.criterion(outputs, targets)
-                
-                val_loss += loss.item()
+                loss = self.criterion(outputs, targets)
+                running_loss += loss.item()
                
                # Calculate accuracy
-                if self.output_size > 1:
-                    _, predicted = torch.max(outputs, 1)
-                    total += targets.size(0)
-                    correct += (predicted == targets).sum().item()
+                _, predicted = torch.max(outputs, 1)
+                total += targets.size(0)
+                correct += (predicted == targets).sum().item()
        
-        return val_loss / len(val_loader), correct / total if total > 0 else 0
+        val_loss = running_loss / len(val_loader)
+        val_acc = correct / total if total > 0 else 0
+        
+        # To maintain compatibility with the updated training code, we'll return 3 values
+        # But the price_loss will be zero since we're not using that in this model
+        return val_loss, 0.0, val_acc

    def predict(self, X):
        """Make predictions on input data"""
@@ -275,15 +283,13 @@ class CNNModelPyTorch:
        
        with torch.no_grad():
            outputs = self.model(X_tensor)
-            if self.output_size > 1:
-                _, predicted = torch.max(outputs, 1)
-                return predicted.cpu().numpy()
-            else:
-                return outputs.cpu().numpy()
+            # To maintain compatibility with the transformer model, return the action probs
+            # And a dummy price prediction of zeros
+            return outputs.cpu().numpy(), np.zeros((len(X), 1))

    def predict_next_candles(self, X, n_candles=3):
        """
-        Predict the next n candles for each timeframe.
+        Predict the next n candles.
        
        Args:
            X: Input data of shape [batch_size, window_size, features]
@@ -296,33 +302,14 @@ class CNNModelPyTorch:
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        
        with torch.no_grad():
-            # Get the last window of data
-            last_window = X_tensor[-1:]  # [1, window_size, features]
+            # Get predictions for the input window
+            action_probs = self.model(X_tensor)
            
-            # Initialize predictions
+            # For compatibility, we'll return a dictionary with the timeframes
            predictions = {}
-            
-            # For each timeframe, predict next n candles
            for i, tf in enumerate(self.timeframes):
-                # Extract features for this timeframe
-                tf_features = last_window[:, :, i*5:(i+1)*5]  # [1, window_size, 5]
-                
-                # Predict next n candles
-                tf_predictions = []
-                current_window = tf_features
-                
-                for _ in range(n_candles):
-                    # Get prediction for next candle
-                    output = self.model(current_window)
-                    tf_predictions.append(output.cpu().numpy())
-                    
-                    # Update window for next prediction
-                    current_window = torch.cat([
-                        current_window[:, 1:, :],
-                        output.unsqueeze(1)
-                    ], dim=1)
-                
-                predictions[tf] = np.concatenate(tf_predictions, axis=0)
+                # Simple prediction: just repeat the current prediction for next n candles
+                predictions[tf] = np.tile(action_probs.cpu().numpy(), (n_candles, 1))
            
            return predictions