enhancements

2025-04-01 13:46:53 +03:00
parent a46b2c74f8
commit 73c5ecb0d2
17 changed files with 2279 additions and 736 deletions
--- a/NN/models/simple_cnn.py
+++ b/NN/models/simple_cnn.py
@ -11,6 +11,39 @@ from typing import List, Tuple
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

+class PricePatternAttention(nn.Module):
+    """
+    Attention mechanism specifically designed to focus on price patterns
+    that might indicate local extrema or trend reversals
+    """
+    def __init__(self, input_dim, hidden_dim=64):
+        super(PricePatternAttention, self).__init__()
+        self.query = nn.Linear(input_dim, hidden_dim)
+        self.key = nn.Linear(input_dim, hidden_dim)
+        self.value = nn.Linear(input_dim, hidden_dim)
+        self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float32))
+        
+    def forward(self, x):
+        """Apply attention to input sequence"""
+        # x shape: [batch_size, seq_len, features]
+        batch_size, seq_len, _ = x.size()
+        
+        # Project input to query, key, value
+        q = self.query(x)  # [batch_size, seq_len, hidden_dim]
+        k = self.key(x)    # [batch_size, seq_len, hidden_dim]
+        v = self.value(x)  # [batch_size, seq_len, hidden_dim]
+        
+        # Calculate attention scores
+        scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale  # [batch_size, seq_len, seq_len]
+        
+        # Apply softmax to get attention weights
+        attn_weights = F.softmax(scores, dim=-1)  # [batch_size, seq_len, seq_len]
+        
+        # Apply attention to values
+        output = torch.matmul(attn_weights, v)  # [batch_size, seq_len, hidden_dim]
+        
+        return output, attn_weights
+
 class CNNModelPyTorch(nn.Module):
    """
    CNN model for trading with multiple timeframes
@ -30,7 +63,15 @@ class CNNModelPyTorch(nn.Module):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        logger.info(f"Using device: {self.device}")
        
-        # Convolutional layers
+        # Create model architecture
+        self._create_layers()
+        
+        # Move model to device
+        self.to(self.device)
+    
+    def _create_layers(self):
+        """Create all model layers with current feature dimensions"""
+        # Convolutional layers - use total_features as input channels
        self.conv1 = nn.Conv1d(self.total_features, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(64)
        
@ -40,24 +81,49 @@ class CNNModelPyTorch(nn.Module):
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(256)
        
-        # Calculate size after convolutions
-        conv_output_size = window_size * 256
+        # Add price pattern attention layer
+        self.attention = PricePatternAttention(256)
+        
+        # Extrema detection specialized convolutional layer
+        self.extrema_conv = nn.Conv1d(256, 128, kernel_size=5, padding=2)
+        self.extrema_bn = nn.BatchNorm1d(128)
+        
+        # Calculate size after convolutions - adjusted for attention output
+        conv_output_size = self.window_size * 256
        
        # Fully connected layers
        self.fc1 = nn.Linear(conv_output_size, 512)
        self.fc2 = nn.Linear(512, 256)
        
        # Advantage and Value streams (Dueling DQN architecture)
-        self.fc3 = nn.Linear(256, output_size)  # Advantage stream
+        self.fc3 = nn.Linear(256, self.output_size)  # Advantage stream
        self.value_fc = nn.Linear(256, 1)  # Value stream
        
+        # Additional prediction head for extrema detection (tops/bottoms)
+        self.extrema_fc = nn.Linear(256, 3)  # 0=bottom, 1=top, 2=neither
+        
        # Initialize optimizer and scheduler
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, mode='max', factor=0.5, patience=5, verbose=True
        )
+    
+    def rebuild_conv_layers(self, input_channels):
+        """
+        Rebuild convolutional layers for different input dimensions
        
-        # Move model to device
+        Args:
+            input_channels: Number of input channels (features) in the data
+        """
+        logger.info(f"Rebuilding convolutional layers for {input_channels} input channels")
+        
+        # Update total features
+        self.total_features = input_channels
+        
+        # Recreate all layers with new dimensions
+        self._create_layers()
+        
+        # Move layers to device
        self.to(self.device)
        
    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
@ -65,8 +131,13 @@ class CNNModelPyTorch(nn.Module):
        # Ensure input is on the correct device
        x = x.to(self.device)
        
+        # Check and handle if input dimensions don't match model expectations
+        batch_size, window_len, feature_dim = x.size()
+        if feature_dim != self.total_features:
+            logger.warning(f"Input features ({feature_dim}) don't match model features ({self.total_features}), rebuilding layers")
+            self.rebuild_conv_layers(feature_dim)
+        
        # Reshape input: [batch, window_size, features] -> [batch, channels, window_size]
-        batch_size = x.size(0)
        x = x.permute(0, 2, 1)
        
        # Convolutional layers
@ -74,6 +145,26 @@ class CNNModelPyTorch(nn.Module):
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        
+        # Store conv features for extrema detection
+        conv_features = x
+        
+        # Reshape for attention: [batch, channels, window_size] -> [batch, window_size, channels]
+        x_attention = x.permute(0, 2, 1)
+        
+        # Apply attention
+        attention_output, attention_weights = self.attention(x_attention)
+        
+        # We'll use attention directly without the residual connection
+        # to avoid dimension mismatch issues
+        attention_reshaped = attention_output.permute(0, 2, 1)  # [batch, channels, window_size]
+        
+        # Apply extrema detection specialized layer
+        extrema_features = F.relu(self.extrema_bn(self.extrema_conv(conv_features)))
+        
+        # Use attention features directly instead of residual connection
+        # to avoid dimension mismatches
+        x = conv_features  # Just use the convolutional features
+        
        # Flatten
        x = x.view(batch_size, -1)
        
@ -88,7 +179,11 @@ class CNNModelPyTorch(nn.Module):
        # Combine value and advantage
        q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
        
-        return q_values, value
+        # Also compute extrema prediction from the same features
+        extrema_flat = extrema_features.view(batch_size, -1)
+        extrema_pred = self.extrema_fc(x)  # Use the same features for extrema prediction
+        
+        return q_values, extrema_pred
    
    def predict(self, X):
        """Make predictions"""
@ -101,11 +196,15 @@ class CNNModelPyTorch(nn.Module):
            X_tensor = X.to(self.device)
        
        with torch.no_grad():
-            q_values, value = self(X_tensor)
+            q_values, extrema_pred = self(X_tensor)
            q_values_np = q_values.cpu().numpy()
            actions = np.argmax(q_values_np, axis=1)
            
-        return actions, q_values_np
+            # Also return extrema predictions
+            extrema_np = extrema_pred.cpu().numpy()
+            extrema_classes = np.argmax(extrema_np, axis=1)
+            
+        return actions, q_values_np, extrema_classes
    
    def save(self, path: str):
        """Save model weights"""