added leverage slider

2025-05-30 22:33:41 +03:00
parent d870f74d0c
commit 7d8eca995e
21 changed files with 3205 additions and 2923 deletions
--- a/NN/models/enhanced_cnn.py
+++ b/NN/models/enhanced_cnn.py
@@ -110,96 +110,119 @@ class EnhancedCNN(nn.Module):
        logger.info(f"EnhancedCNN initialized with input shape: {input_shape}, actions: {n_actions}")
    
    def _build_network(self):
-        """Build the MASSIVELY enhanced neural network for 4GB VRAM budget"""
+        """Build the ULTRA MASSIVE enhanced neural network for maximum learning capacity"""
        
-        # MASSIVELY SCALED ARCHITECTURE for 4GB VRAM (up to ~50M parameters)
+        # ULTRA MASSIVE SCALED ARCHITECTURE for maximum learning (up to ~100M parameters)
        if self.channels > 1:
-            # Massive convolutional backbone with deeper residual blocks
+            # Ultra massive convolutional backbone with much deeper residual blocks
            self.conv_layers = nn.Sequential(
-                # Initial large conv block
-                nn.Conv1d(self.channels, 256, kernel_size=7, padding=3),  # Much wider initial layer
-                nn.BatchNorm1d(256),
+                # Initial ultra large conv block
+                nn.Conv1d(self.channels, 512, kernel_size=7, padding=3),  # Ultra wide initial layer
+                nn.BatchNorm1d(512),
                nn.ReLU(),
                nn.Dropout(0.1),
                
-                # First residual stage - 256 channels
-                ResidualBlock(256, 512),
-                ResidualBlock(512, 512),
-                ResidualBlock(512, 512),
+                # First residual stage - 512 channels
+                ResidualBlock(512, 768),
+                ResidualBlock(768, 768),
+                ResidualBlock(768, 768),
+                ResidualBlock(768, 768),  # Additional layer
                nn.MaxPool1d(kernel_size=2, stride=2),
                nn.Dropout(0.2),
                
-                # Second residual stage - 512 channels  
-                ResidualBlock(512, 1024),
+                # Second residual stage - 768 to 1024 channels  
+                ResidualBlock(768, 1024),
                ResidualBlock(1024, 1024),
                ResidualBlock(1024, 1024),
+                ResidualBlock(1024, 1024),  # Additional layer
                nn.MaxPool1d(kernel_size=2, stride=2),
                nn.Dropout(0.25),
                
-                # Third residual stage - 1024 channels
+                # Third residual stage - 1024 to 1536 channels
                ResidualBlock(1024, 1536),
                ResidualBlock(1536, 1536),
                ResidualBlock(1536, 1536),
+                ResidualBlock(1536, 1536),  # Additional layer
                nn.MaxPool1d(kernel_size=2, stride=2),
                nn.Dropout(0.3),
                
-                # Fourth residual stage - 1536 channels (MASSIVE)
+                # Fourth residual stage - 1536 to 2048 channels
                ResidualBlock(1536, 2048),
                ResidualBlock(2048, 2048),
                ResidualBlock(2048, 2048),
+                ResidualBlock(2048, 2048),  # Additional layer
+                nn.MaxPool1d(kernel_size=2, stride=2),
+                nn.Dropout(0.3),
+                
+                # Fifth residual stage - ULTRA MASSIVE 2048 to 3072 channels
+                ResidualBlock(2048, 3072),
+                ResidualBlock(3072, 3072),
+                ResidualBlock(3072, 3072),
+                ResidualBlock(3072, 3072),
                nn.AdaptiveAvgPool1d(1)  # Global average pooling
            )
-            # Massive feature dimension after conv layers
-            self.conv_features = 2048
+            # Ultra massive feature dimension after conv layers
+            self.conv_features = 3072
        else:
-            # For 1D vectors, use massive dense preprocessing
+            # For 1D vectors, use ultra massive dense preprocessing
            self.conv_layers = None
            self.conv_features = 0
        
-        # MASSIVE fully connected feature extraction layers
+        # ULTRA MASSIVE fully connected feature extraction layers
        if self.conv_layers is None:
-            # For 1D inputs - massive feature extraction
-            self.fc1 = nn.Linear(self.feature_dim, 2048)
-            self.features_dim = 2048
+            # For 1D inputs - ultra massive feature extraction
+            self.fc1 = nn.Linear(self.feature_dim, 3072)
+            self.features_dim = 3072
        else:
-            # For data processed by massive conv layers
-            self.fc1 = nn.Linear(self.conv_features, 2048)
-            self.features_dim = 2048
+            # For data processed by ultra massive conv layers
+            self.fc1 = nn.Linear(self.conv_features, 3072)
+            self.features_dim = 3072
        
-        # MASSIVE common feature extraction with multiple attention layers
+        # ULTRA MASSIVE common feature extraction with multiple deep layers
        self.fc_layers = nn.Sequential(
            self.fc1,
            nn.ReLU(),
            nn.Dropout(0.3),
-            nn.Linear(2048, 2048),  # Keep massive width
+            nn.Linear(3072, 3072),  # Keep ultra massive width
            nn.ReLU(),
            nn.Dropout(0.3),
-            nn.Linear(2048, 1536),  # Still very wide
+            nn.Linear(3072, 2560),  # Ultra wide hidden layer
            nn.ReLU(),
            nn.Dropout(0.3),
-            nn.Linear(1536, 1024),  # Large hidden layer
+            nn.Linear(2560, 2048),  # Still very wide
            nn.ReLU(),
            nn.Dropout(0.3),
-            nn.Linear(1024, 768),   # Final feature representation
+            nn.Linear(2048, 1536),  # Large hidden layer
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(1536, 1024),  # Final feature representation
            nn.ReLU()
        )
        
-        # Multiple attention mechanisms for different aspects
-        self.price_attention = SelfAttention(768)
-        self.volume_attention = SelfAttention(768) 
-        self.trend_attention = SelfAttention(768)
-        self.volatility_attention = SelfAttention(768)
+        # Multiple attention mechanisms for different aspects (larger capacity)
+        self.price_attention = SelfAttention(1024)      # Increased from 768
+        self.volume_attention = SelfAttention(1024) 
+        self.trend_attention = SelfAttention(1024)
+        self.volatility_attention = SelfAttention(1024)
+        self.momentum_attention = SelfAttention(1024)   # Additional attention
+        self.microstructure_attention = SelfAttention(1024)  # Additional attention
        
-        # Attention fusion layer
+        # Ultra massive attention fusion layer
        self.attention_fusion = nn.Sequential(
-            nn.Linear(768 * 4, 1024),  # Combine all attention outputs
+            nn.Linear(1024 * 6, 2048),  # Combine all 6 attention outputs
            nn.ReLU(),
            nn.Dropout(0.3),
-            nn.Linear(1024, 768)
+            nn.Linear(2048, 1536),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(1536, 1024)
        )
        
-        # MASSIVE dueling architecture with deeper networks
+        # ULTRA MASSIVE dueling architecture with much deeper networks
        self.advantage_stream = nn.Sequential(
+            nn.Linear(1024, 768),
+            nn.ReLU(),
+            nn.Dropout(0.3),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
@@ -212,6 +235,9 @@ class EnhancedCNN(nn.Module):
        )
        
        self.value_stream = nn.Sequential(
+            nn.Linear(1024, 768),
+            nn.ReLU(),
+            nn.Dropout(0.3),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
@@ -223,8 +249,11 @@ class EnhancedCNN(nn.Module):
            nn.Linear(128, 1)
        )
        
-        # MASSIVE extrema detection head with ensemble predictions
+        # ULTRA MASSIVE extrema detection head with deeper ensemble predictions
        self.extrema_head = nn.Sequential(
+            nn.Linear(1024, 768),
+            nn.ReLU(),
+            nn.Dropout(0.3),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
@@ -236,9 +265,12 @@ class EnhancedCNN(nn.Module):
            nn.Linear(128, 3)  # 0=bottom, 1=top, 2=neither
        )
        
-        # MASSIVE multi-timeframe price prediction heads
+        # ULTRA MASSIVE multi-timeframe price prediction heads
        self.price_pred_immediate = nn.Sequential(
-            nn.Linear(768, 256),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
@@ -247,7 +279,10 @@ class EnhancedCNN(nn.Module):
        )
        
        self.price_pred_midterm = nn.Sequential(
-            nn.Linear(768, 256),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
@@ -256,7 +291,10 @@ class EnhancedCNN(nn.Module):
        )
        
        self.price_pred_longterm = nn.Sequential(
-            nn.Linear(768, 256),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
@@ -264,8 +302,11 @@ class EnhancedCNN(nn.Module):
            nn.Linear(128, 3)  # Up, Down, Sideways
        )
        
-        # MASSIVE value prediction with ensemble approaches
+        # ULTRA MASSIVE value prediction with ensemble approaches
        self.price_pred_value = nn.Sequential(
+            nn.Linear(1024, 768),
+            nn.ReLU(),
+            nn.Dropout(0.3),
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
@@ -280,7 +321,10 @@ class EnhancedCNN(nn.Module):
        # Additional specialized prediction heads for better accuracy
        # Volatility prediction head
        self.volatility_head = nn.Sequential(
-            nn.Linear(768, 256),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
@@ -290,7 +334,10 @@ class EnhancedCNN(nn.Module):
        
        # Support/Resistance level detection head
        self.support_resistance_head = nn.Sequential(
-            nn.Linear(768, 256),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
@@ -300,7 +347,10 @@ class EnhancedCNN(nn.Module):
        
        # Market regime classification head
        self.market_regime_head = nn.Sequential(
-            nn.Linear(768, 256),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
@@ -310,7 +360,10 @@ class EnhancedCNN(nn.Module):
        
        # Risk assessment head
        self.risk_head = nn.Sequential(
-            nn.Linear(768, 256),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
@@ -330,7 +383,7 @@ class EnhancedCNN(nn.Module):
        return False
        
    def forward(self, x):
-        """Forward pass through the MASSIVE network"""
+        """Forward pass through the ULTRA MASSIVE network"""
        batch_size = x.size(0)
        
        # Process different input shapes
@@ -349,7 +402,7 @@ class EnhancedCNN(nn.Module):
                    total_features = x_reshaped.size(1) * x_reshaped.size(2)
                    self._check_rebuild_network(total_features)
                
-                # Apply massive convolutions
+                # Apply ultra massive convolutions
                x_conv = self.conv_layers(x_reshaped)
                # Flatten: [batch, channels, 1] -> [batch, channels]
                x_flat = x_conv.view(batch_size, -1)
@@ -364,33 +417,40 @@ class EnhancedCNN(nn.Module):
            if x_flat.size(1) != self.feature_dim:
                self._check_rebuild_network(x_flat.size(1))
        
-        # Apply MASSIVE FC layers to get base features
-        features = self.fc_layers(x_flat)  # [batch, 768]
+        # Apply ULTRA MASSIVE FC layers to get base features
+        features = self.fc_layers(x_flat)  # [batch, 1024]
        
        # Apply multiple specialized attention mechanisms
-        features_3d = features.unsqueeze(1)  # [batch, 1, 768]
+        features_3d = features.unsqueeze(1)  # [batch, 1, 1024]
        
        # Get attention-refined features for different aspects
        price_features, _ = self.price_attention(features_3d)
-        price_features = price_features.squeeze(1)  # [batch, 768]
+        price_features = price_features.squeeze(1)  # [batch, 1024]
        
        volume_features, _ = self.volume_attention(features_3d)
-        volume_features = volume_features.squeeze(1)  # [batch, 768]
+        volume_features = volume_features.squeeze(1)  # [batch, 1024]
        
        trend_features, _ = self.trend_attention(features_3d)
-        trend_features = trend_features.squeeze(1)  # [batch, 768]
+        trend_features = trend_features.squeeze(1)  # [batch, 1024]
        
        volatility_features, _ = self.volatility_attention(features_3d)
-        volatility_features = volatility_features.squeeze(1)  # [batch, 768]
+        volatility_features = volatility_features.squeeze(1)  # [batch, 1024]
+        
+        momentum_features, _ = self.momentum_attention(features_3d)
+        momentum_features = momentum_features.squeeze(1)  # [batch, 1024]
+        
+        microstructure_features, _ = self.microstructure_attention(features_3d)
+        microstructure_features = microstructure_features.squeeze(1)  # [batch, 1024]
        
        # Fuse all attention outputs
        combined_attention = torch.cat([
            price_features, volume_features, 
-            trend_features, volatility_features
-        ], dim=1)  # [batch, 768*4]
+            trend_features, volatility_features,
+            momentum_features, microstructure_features
+        ], dim=1)  # [batch, 1024*6]
        
        # Apply attention fusion to get final refined features
-        features_refined = self.attention_fusion(combined_attention)  # [batch, 768]
+        features_refined = self.attention_fusion(combined_attention)  # [batch, 1024]
        
        # Calculate advantage and value (Dueling DQN architecture)
        advantage = self.advantage_stream(features_refined)
@@ -399,7 +459,7 @@ class EnhancedCNN(nn.Module):
        # Combine for Q-values (Dueling architecture)
        q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
        
-        # Get massive ensemble of predictions
+        # Get ultra massive ensemble of predictions
        
        # Extrema predictions (bottom/top/neither detection)
        extrema_pred = self.extrema_head(features_refined)
@@ -435,7 +495,7 @@ class EnhancedCNN(nn.Module):
        return q_values, extrema_pred, price_predictions, features_refined, advanced_predictions
    
    def act(self, state, explore=True):
-        """Enhanced action selection with massive model predictions"""
+        """Enhanced action selection with ultra massive model predictions"""
        if explore and np.random.random() < 0.1:  # 10% random exploration
            return np.random.choice(self.n_actions)
        
@@ -471,7 +531,7 @@ class EnhancedCNN(nn.Module):
                risk_class = torch.argmax(risk, dim=1).item()
                risk_labels = ['Low Risk', 'Medium Risk', 'High Risk', 'Extreme Risk']
                
-                logger.info(f"MASSIVE Model Predictions:")
+                logger.info(f"ULTRA MASSIVE Model Predictions:")
                logger.info(f"  Volatility: {volatility_labels[volatility_class]} ({volatility[0, volatility_class]:.3f})")
                logger.info(f"  Support/Resistance: {sr_labels[sr_class]} ({sr[0, sr_class]:.3f})")
                logger.info(f"  Market Regime: {regime_labels[regime_class]} ({regime[0, regime_class]:.3f})")