fixed CNN training

models overhaul
ui fixes
2025-07-29 20:11:22 +03:00 · 2025-07-29 19:22:04 +03:00 · 2025-07-29 19:02:44 +03:00 · 2025-07-29 18:20:07 +03:00
10 changed files with 744 additions and 515 deletions
--- a/NN/models/cnn_model.py
+++ b/NN/models/cnn_model.py
@@ -1,201 +1,201 @@
-"""
-Legacy CNN Model Compatibility Layer
+# """
+# Legacy CNN Model Compatibility Layer

-This module provides compatibility redirects to the unified StandardizedCNN model.
-All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired
-in favor of the StandardizedCNN architecture.
-"""
+# This module provides compatibility redirects to the unified StandardizedCNN model.
+# All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired
+# in favor of the StandardizedCNN architecture.
+# """

-import logging
-import warnings
-from typing import Tuple, Dict, Any, Optional
-import torch
-import numpy as np
+# import logging
+# import warnings
+# from typing import Tuple, Dict, Any, Optional
+# import torch
+# import numpy as np

-# Import the standardized CNN model
-from .standardized_cnn import StandardizedCNN
+# # Import the standardized CNN model
+# from .standardized_cnn import StandardizedCNN

-logger = logging.getLogger(__name__)
+# logger = logging.getLogger(__name__)

-# Compatibility aliases and wrappers
-class EnhancedCNNModel:
-    """Legacy compatibility wrapper - redirects to StandardizedCNN"""
+# # Compatibility aliases and wrappers
+# class EnhancedCNNModel:
+#     """Legacy compatibility wrapper - redirects to StandardizedCNN"""
    
-    def __init__(self, *args, **kwargs):
-        warnings.warn(
-            "EnhancedCNNModel is deprecated. Use StandardizedCNN instead.",
-            DeprecationWarning,
-            stacklevel=2
-        )
-        # Create StandardizedCNN with default parameters
-        self.standardized_cnn = StandardizedCNN()
-        logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN")
+#     def __init__(self, *args, **kwargs):
+#         warnings.warn(
+#             "EnhancedCNNModel is deprecated. Use StandardizedCNN instead.",
+#             DeprecationWarning,
+#             stacklevel=2
+#         )
+#         # Create StandardizedCNN with default parameters
+#         self.standardized_cnn = StandardizedCNN()
+#         logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN")
    
-    def __getattr__(self, name):
-        """Delegate all method calls to StandardizedCNN"""
-        return getattr(self.standardized_cnn, name)
+#     def __getattr__(self, name):
+#         """Delegate all method calls to StandardizedCNN"""
+#         return getattr(self.standardized_cnn, name)


-class CNNModelTrainer:
-    """Legacy compatibility wrapper for CNN training"""
+# class CNNModelTrainer:
+#     """Legacy compatibility wrapper for CNN training"""
    
-    def __init__(self, model=None, *args, **kwargs):
-        warnings.warn(
-            "CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.",
-            DeprecationWarning,
-            stacklevel=2
-        )
-        if isinstance(model, EnhancedCNNModel):
-            self.model = model.standardized_cnn
-        else:
-            self.model = StandardizedCNN()
-        logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()")
+#     def __init__(self, model=None, *args, **kwargs):
+#         warnings.warn(
+#             "CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.",
+#             DeprecationWarning,
+#             stacklevel=2
+#         )
+#         if isinstance(model, EnhancedCNNModel):
+#             self.model = model.standardized_cnn
+#         else:
+#             self.model = StandardizedCNN()
+#         logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()")
    
-    def train_step(self, x, y, *args, **kwargs):
-        """Legacy train step wrapper"""
-        try:
-            # Convert to BaseDataInput format if needed
-            if hasattr(x, 'get_feature_vector'):
-                # Already BaseDataInput
-                base_input = x
-            else:
-                # Create mock BaseDataInput for legacy compatibility
-                from core.data_models import BaseDataInput
-                base_input = BaseDataInput()
-                # Set mock feature vector
-                if isinstance(x, torch.Tensor):
-                    feature_vector = x.flatten().cpu().numpy()
-                else:
-                    feature_vector = np.array(x).flatten()
+#     def train_step(self, x, y, *args, **kwargs):
+#         """Legacy train step wrapper"""
+#         try:
+#             # Convert to BaseDataInput format if needed
+#             if hasattr(x, 'get_feature_vector'):
+#                 # Already BaseDataInput
+#                 base_input = x
+#             else:
+#                 # Create mock BaseDataInput for legacy compatibility
+#                 from core.data_models import BaseDataInput
+#                 base_input = BaseDataInput()
+#                 # Set mock feature vector
+#                 if isinstance(x, torch.Tensor):
+#                     feature_vector = x.flatten().cpu().numpy()
+#                 else:
+#                     feature_vector = np.array(x).flatten()
                
-                # Pad or truncate to expected size
-                expected_size = self.model.expected_feature_dim
-                if len(feature_vector) < expected_size:
-                    padding = np.zeros(expected_size - len(feature_vector))
-                    feature_vector = np.concatenate([feature_vector, padding])
-                else:
-                    feature_vector = feature_vector[:expected_size]
+#                 # Pad or truncate to expected size
+#                 expected_size = self.model.expected_feature_dim
+#                 if len(feature_vector) < expected_size:
+#                     padding = np.zeros(expected_size - len(feature_vector))
+#                     feature_vector = np.concatenate([feature_vector, padding])
+#                 else:
+#                     feature_vector = feature_vector[:expected_size]
                
-                base_input._feature_vector = feature_vector
+#                 base_input._feature_vector = feature_vector
            
-            # Convert target to string format
-            if isinstance(y, torch.Tensor):
-                y_val = y.item() if y.numel() == 1 else y.argmax().item()
-            else:
-                y_val = int(y) if np.isscalar(y) else int(np.argmax(y))
+#             # Convert target to string format
+#             if isinstance(y, torch.Tensor):
+#                 y_val = y.item() if y.numel() == 1 else y.argmax().item()
+#             else:
+#                 y_val = int(y) if np.isscalar(y) else int(np.argmax(y))
            
-            target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'}
-            target = target_map.get(y_val, 'HOLD')
+#             target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'}
+#             target = target_map.get(y_val, 'HOLD')
            
-            # Use StandardizedCNN training
-            optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
-            loss = self.model.train_step([base_input], [target], optimizer)
+#             # Use StandardizedCNN training
+#             optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
+#             loss = self.model.train_step([base_input], [target], optimizer)
            
-            return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5}
+#             return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5}
            
-        except Exception as e:
-            logger.error(f"Legacy train_step error: {e}")
-            return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5}
+#         except Exception as e:
+#             logger.error(f"Legacy train_step error: {e}")
+#             return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5}


-class CNNModel:
-    """Legacy compatibility wrapper for CNN model interface"""
+# # class CNNModel:
+# #     """Legacy compatibility wrapper for CNN model interface"""
    
-    def __init__(self, input_shape=(900, 50), output_size=3, model_path=None):
-        warnings.warn(
-            "CNNModel is deprecated. Use StandardizedCNN directly.",
-            DeprecationWarning,
-            stacklevel=2
-        )
-        self.input_shape = input_shape
-        self.output_size = output_size
-        self.standardized_cnn = StandardizedCNN()
-        self.trainer = CNNModelTrainer(self.standardized_cnn)
-        logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN")
+# #     def __init__(self, input_shape=(900, 50), output_size=3, model_path=None):
+# #         warnings.warn(
+# #             "CNNModel is deprecated. Use StandardizedCNN directly.",
+# #             DeprecationWarning,
+# #             stacklevel=2
+# #         )
+# #         self.input_shape = input_shape
+# #         self.output_size = output_size
+# #         self.standardized_cnn = StandardizedCNN()
+# #         self.trainer = CNNModelTrainer(self.standardized_cnn)
+# #         logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN")
    
-    def build_model(self, **kwargs):
-        """Legacy build method - no-op for StandardizedCNN"""
-        return self
+# #     def build_model(self, **kwargs):
+# #         """Legacy build method - no-op for StandardizedCNN"""
+# #         return self
    
-    def predict(self, X):
-        """Legacy predict method"""
-        try:
-            # Convert input to BaseDataInput
-            from core.data_models import BaseDataInput
-            base_input = BaseDataInput()
+# #     def predict(self, X):
+# #         """Legacy predict method"""
+# #         try:
+# #             # Convert input to BaseDataInput
+# #             from core.data_models import BaseDataInput
+# #             base_input = BaseDataInput()
            
-            if isinstance(X, np.ndarray):
-                feature_vector = X.flatten()
-            else:
-                feature_vector = np.array(X).flatten()
+# #             if isinstance(X, np.ndarray):
+# #                 feature_vector = X.flatten()
+# #             else:
+# #                 feature_vector = np.array(X).flatten()
            
-            # Pad or truncate to expected size
-            expected_size = self.standardized_cnn.expected_feature_dim
-            if len(feature_vector) < expected_size:
-                padding = np.zeros(expected_size - len(feature_vector))
-                feature_vector = np.concatenate([feature_vector, padding])
-            else:
-                feature_vector = feature_vector[:expected_size]
+# #             # Pad or truncate to expected size
+# #             expected_size = self.standardized_cnn.expected_feature_dim
+# #             if len(feature_vector) < expected_size:
+# #                 padding = np.zeros(expected_size - len(feature_vector))
+# #                 feature_vector = np.concatenate([feature_vector, padding])
+# #             else:
+# #                 feature_vector = feature_vector[:expected_size]
            
-            base_input._feature_vector = feature_vector
+# #             base_input._feature_vector = feature_vector
            
-            # Get prediction from StandardizedCNN
-            result = self.standardized_cnn.predict_from_base_input(base_input)
+# #             # Get prediction from StandardizedCNN
+# #             result = self.standardized_cnn.predict_from_base_input(base_input)
            
-            # Convert to legacy format
-            action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
-            pred_class = np.array([action_map.get(result.predictions['action'], 2)])
-            pred_proba = np.array([result.predictions['action_probabilities']])
+# #             # Convert to legacy format
+# #             action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
+# #             pred_class = np.array([action_map.get(result.predictions['action'], 2)])
+# #             pred_proba = np.array([result.predictions['action_probabilities']])
            
-            return pred_class, pred_proba
+# #             return pred_class, pred_proba
            
-        except Exception as e:
-            logger.error(f"Legacy predict error: {e}")
-            # Return safe defaults
-            pred_class = np.array([2])  # HOLD
-            pred_proba = np.array([[0.33, 0.33, 0.34]])
-            return pred_class, pred_proba
+# #         except Exception as e:
+# #             logger.error(f"Legacy predict error: {e}")
+# #             # Return safe defaults
+# #             pred_class = np.array([2])  # HOLD
+# #             pred_proba = np.array([[0.33, 0.33, 0.34]])
+# #             return pred_class, pred_proba
    
-    def fit(self, X, y, **kwargs):
-        """Legacy fit method"""
-        try:
-            return self.trainer.train_step(X, y)
-        except Exception as e:
-            logger.error(f"Legacy fit error: {e}")
-            return self
+# #     def fit(self, X, y, **kwargs):
+# #         """Legacy fit method"""
+# #         try:
+# #             return self.trainer.train_step(X, y)
+# #         except Exception as e:
+# #             logger.error(f"Legacy fit error: {e}")
+# #             return self
    
-    def save(self, filepath: str):
-        """Legacy save method"""
-        try:
-            torch.save(self.standardized_cnn.state_dict(), filepath)
-            logger.info(f"StandardizedCNN saved to {filepath}")
-        except Exception as e:
-            logger.error(f"Error saving model: {e}")
+# #     def save(self, filepath: str):
+# #         """Legacy save method"""
+# #         try:
+# #             torch.save(self.standardized_cnn.state_dict(), filepath)
+# #             logger.info(f"StandardizedCNN saved to {filepath}")
+# #         except Exception as e:
+# #             logger.error(f"Error saving model: {e}")


-def create_enhanced_cnn_model(input_size: int = 60, 
-                            feature_dim: int = 50, 
-                            output_size: int = 3,
-                            base_channels: int = 256,
-                            device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]:
-    """Legacy compatibility function - returns StandardizedCNN"""
-    warnings.warn(
-        "create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.",
-        DeprecationWarning,
-        stacklevel=2
-    )
+# def create_enhanced_cnn_model(input_size: int = 60, 
+#                             feature_dim: int = 50, 
+#                             output_size: int = 3,
+#                             base_channels: int = 256,
+#                             device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]:
+#     """Legacy compatibility function - returns StandardizedCNN"""
+#     warnings.warn(
+#         "create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.",
+#         DeprecationWarning,
+#         stacklevel=2
+#     )
    
-    model = StandardizedCNN()
-    trainer = CNNModelTrainer(model)
+#     model = StandardizedCNN()
+#     trainer = CNNModelTrainer(model)
    
-    logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly")
-    return model, trainer
+#     logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly")
+#     return model, trainer


-# Export compatibility symbols
-__all__ = [
-    'EnhancedCNNModel',
-    'CNNModelTrainer', 
-    'CNNModel',
-    'create_enhanced_cnn_model'
-]
+# # Export compatibility symbols
+# __all__ = [
+#     'EnhancedCNNModel',
+#     'CNNModelTrainer', 
+#     # 'CNNModel',
+#     'create_enhanced_cnn_model'
+# ]
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -23,11 +23,11 @@ logger = logging.getLogger(__name__)

 class DQNNetwork(nn.Module):
    """
-    Massive Deep Q-Network specifically designed for RL trading with unified BaseDataInput features
+    Configurable Deep Q-Network specifically designed for RL trading with unified BaseDataInput features
    Handles 7850 input features from multi-timeframe, multi-asset data
-    TARGET: 50M parameters for enhanced learning capacity
+    Architecture is configurable via config.yaml
    """
-    def __init__(self, input_dim: int, n_actions: int):
+    def __init__(self, input_dim: int, n_actions: int, config: dict = None):
        super(DQNNetwork, self).__init__()
        
        # Handle different input dimension formats
@@ -41,59 +41,65 @@ class DQNNetwork(nn.Module):
        
        self.n_actions = n_actions
        
-        # MASSIVE network architecture optimized for trading features
-        # Target: ~50M parameters
-        self.feature_extractor = nn.Sequential(
-            # Initial feature extraction with massive width
-            nn.Linear(self.input_size, 8192),  # 7850 -> 8192 = ~64M weights
-            nn.LayerNorm(8192),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            
-            # Deep feature processing layers
-            nn.Linear(8192, 6144),  # 8192 -> 6144 = ~50M weights  
-            nn.LayerNorm(6144),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            
-            nn.Linear(6144, 4096),  # 6144 -> 4096 = ~25M weights
-            nn.LayerNorm(4096),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            
-            nn.Linear(4096, 3072),  # 4096 -> 3072 = ~12M weights
-            nn.LayerNorm(3072),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            
-            nn.Linear(3072, 2048),  # 3072 -> 2048 = ~6M weights
-            nn.LayerNorm(2048),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-        )
+        # Get network architecture from config or use defaults
+        if config and 'network_architecture' in config:
+            arch_config = config['network_architecture']
+            feature_layers = arch_config.get('feature_layers', [4096, 3072, 2048, 1536, 1024])
+            regime_head = arch_config.get('regime_head', [512, 256])
+            price_direction_head = arch_config.get('price_direction_head', [512, 256])
+            volatility_head = arch_config.get('volatility_head', [512, 128])
+            value_head = arch_config.get('value_head', [512, 256])
+            advantage_head = arch_config.get('advantage_head', [512, 256])
+            dropout_rate = arch_config.get('dropout_rate', 0.1)
+            use_layer_norm = arch_config.get('use_layer_norm', True)
+        else:
+            # Default reduced architecture (half the original size)
+            feature_layers = [4096, 3072, 2048, 1536, 1024]
+            regime_head = [512, 256]
+            price_direction_head = [512, 256]
+            volatility_head = [512, 128]
+            value_head = [512, 256]
+            advantage_head = [512, 256]
+            dropout_rate = 0.1
+            use_layer_norm = True
+        
+        # Build configurable feature extractor
+        feature_layers_list = []
+        prev_size = self.input_size
+        
+        for layer_size in feature_layers:
+            feature_layers_list.append(nn.Linear(prev_size, layer_size))
+            if use_layer_norm:
+                feature_layers_list.append(nn.LayerNorm(layer_size))
+            feature_layers_list.append(nn.ReLU(inplace=True))
+            feature_layers_list.append(nn.Dropout(dropout_rate))
+            prev_size = layer_size
+        
+        self.feature_extractor = nn.Sequential(*feature_layers_list)
+        self.feature_size = feature_layers[-1]  # Final feature size
+        
+        # Build configurable network heads
+        def build_head_layers(input_size, layer_sizes, output_size):
+            layers = []
+            prev_size = input_size
+            for layer_size in layer_sizes:
+                layers.append(nn.Linear(prev_size, layer_size))
+                if use_layer_norm:
+                    layers.append(nn.LayerNorm(layer_size))
+                layers.append(nn.ReLU(inplace=True))
+                layers.append(nn.Dropout(dropout_rate))
+                prev_size = layer_size
+            layers.append(nn.Linear(prev_size, output_size))
+            return nn.Sequential(*layers)
        
        # Market regime detection head
-        self.regime_head = nn.Sequential(
-            nn.Linear(2048, 1024),
-            nn.LayerNorm(1024),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            nn.Linear(1024, 512),
-            nn.LayerNorm(512), 
-            nn.ReLU(inplace=True),
-            nn.Linear(512, 4)  # trending, ranging, volatile, mixed
+        self.regime_head = build_head_layers(
+            self.feature_size, regime_head, 4  # trending, ranging, volatile, mixed
        )
        
        # Price direction prediction head - outputs direction and confidence
-        self.price_direction_head = nn.Sequential(
-            nn.Linear(2048, 1024),
-            nn.LayerNorm(1024),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            nn.Linear(1024, 512),
-            nn.LayerNorm(512),
-            nn.ReLU(inplace=True),
-            nn.Linear(512, 2)  # [direction, confidence]
+        self.price_direction_head = build_head_layers(
+            self.feature_size, price_direction_head, 2  # [direction, confidence]
        )
        
        # Direction activation (tanh for -1 to 1)
@@ -102,38 +108,18 @@ class DQNNetwork(nn.Module):
        self.confidence_activation = nn.Sigmoid()
        
        # Volatility prediction head
-        self.volatility_head = nn.Sequential(
-            nn.Linear(2048, 1024),
-            nn.LayerNorm(1024),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            nn.Linear(1024, 256),
-            nn.LayerNorm(256),
-            nn.ReLU(inplace=True),
-            nn.Linear(256, 4)  # predicted volatility for 4 timeframes
+        self.volatility_head = build_head_layers(
+            self.feature_size, volatility_head, 4  # predicted volatility for 4 timeframes
        )
        
        # Main Q-value head (dueling architecture)
-        self.value_head = nn.Sequential(
-            nn.Linear(2048, 1024),
-            nn.LayerNorm(1024),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            nn.Linear(1024, 512),
-            nn.LayerNorm(512),
-            nn.ReLU(inplace=True),
-            nn.Linear(512, 1)  # State value
+        self.value_head = build_head_layers(
+            self.feature_size, value_head, 1  # Single value for dueling architecture
        )
        
-        self.advantage_head = nn.Sequential(
-            nn.Linear(2048, 1024),
-            nn.LayerNorm(1024),
-            nn.ReLU(inplace=True),
-            nn.Dropout(0.1),
-            nn.Linear(1024, 512),
-            nn.LayerNorm(512),
-            nn.ReLU(inplace=True),
-            nn.Linear(512, n_actions)  # Action advantages
+        # Advantage head (dueling architecture)
+        self.advantage_head = build_head_layers(
+            self.feature_size, advantage_head, n_actions  # Action advantages
        )
        
        # Initialize weights
@@ -248,7 +234,8 @@ class DQNAgent:
                 priority_memory: bool = True,
                 device=None,
                 model_name: str = "dqn_agent",
-                 enable_checkpoints: bool = True):
+                 enable_checkpoints: bool = True,
+                 config: dict = None):
        
        # Checkpoint management
        self.model_name = model_name
@@ -292,8 +279,8 @@ class DQNAgent:
        logger.info(f"DQN Agent using device: {self.device}")
        
        # Initialize models with RL-specific network architecture
-        self.policy_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device)
-        self.target_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device)
+        self.policy_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
+        self.target_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
        
        # Ensure models are on the correct device
        self.policy_net = self.policy_net.to(self.device)
--- a/config.yaml
+++ b/config.yaml
@@ -88,119 +88,14 @@ data:
    market_regime_detection: true
    volatility_analysis: true

-# Enhanced CNN Configuration
-cnn:
-  window_size: 20
-  features: ["open", "high", "low", "close", "volume"]
-  timeframes: ["1m", "5m", "15m", "1h", "4h", "1d"]
-  hidden_layers: [64, 128, 256]
-  dropout: 0.2
-  learning_rate: 0.001
-  batch_size: 32
-  epochs: 100
-  confidence_threshold: 0.6
-  early_stopping_patience: 10
-  model_dir: "models/enhanced_cnn"  # Ultra-fast scalping weights (500x leverage)
-  timeframe_importance:
-    "1s": 0.60   # Primary scalping signal
-    "1m": 0.20   # Short-term confirmation
-    "1h": 0.15   # Medium-term trend
-    "1d": 0.05   # Long-term direction (minimal)
-
-# Enhanced RL Agent Configuration
-rl:
-  state_size: 100  # Will be calculated dynamically based on features
-  action_space: 3  # BUY, HOLD, SELL
-  hidden_size: 256
-  epsilon: 1.0
-  epsilon_decay: 0.995
-  epsilon_min: 0.01
-  learning_rate: 0.0001
-  gamma: 0.99
-  memory_size: 10000
-  batch_size: 64
-  target_update_freq: 1000
-  buffer_size: 10000
-  model_dir: "models/enhanced_rl"
-  # Market regime adaptation
-  market_regime_weights:
-    trending: 1.2    # Higher confidence in trending markets
-    ranging: 0.8     # Lower confidence in ranging markets
-    volatile: 0.6    # Much lower confidence in volatile markets
-  # Prioritized experience replay
-  replay_alpha: 0.6  # Priority exponent
-  replay_beta: 0.4   # Importance sampling exponent
-
-# Enhanced Orchestrator Settings
-orchestrator:
-  # Model weights for decision combination
-  cnn_weight: 0.7      # Weight for CNN predictions
-  rl_weight: 0.3       # Weight for RL decisions
-  confidence_threshold: 0.45
-  confidence_threshold_close: 0.35
-  decision_frequency: 30
-  
-  # Multi-symbol coordination
-  symbol_correlation_matrix:
-    "ETH/USDT-BTC/USDT": 0.85  # ETH-BTC correlation
-    
-  # Perfect move marking
-  perfect_move_threshold: 0.02  # 2% price change to mark as significant
-  perfect_move_buffer_size: 10000
-  
-  # RL evaluation settings
-  evaluation_delay: 3600  # Evaluate actions after 1 hour
-  reward_calculation:
-    success_multiplier: 10    # Reward for correct predictions
-    failure_penalty: 5        # Penalty for wrong predictions
-    confidence_scaling: true   # Scale rewards by confidence
-
-  # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
-  entry_aggressiveness: 0.5
-  # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
-  exit_aggressiveness: 0.5
-  
-  # Decision Fusion Configuration
-  decision_fusion:
-    enabled: true                    # Use neural network decision fusion instead of programmatic
-    mode: "neural"                   # "neural" or "programmatic"
-    input_size: 128                  # Size of input features for decision fusion network
-    hidden_size: 256                 # Hidden layer size
-    history_length: 20               # Number of recent decisions to include
-    training_interval: 10            # Train decision fusion every 10 decisions in programmatic mode
-    learning_rate: 0.001             # Learning rate for decision fusion network
-    batch_size: 32                   # Training batch size
-    min_samples_for_training: 20     # Lower threshold for faster training in programmatic mode
-
-# Training Configuration
-training:
-  learning_rate: 0.001
-  batch_size: 32
-  epochs: 100
-  validation_split: 0.2
-  early_stopping_patience: 10
-  
-  # CNN specific training
-  cnn_training_interval: 3600    # Train CNN every hour (was 6 hours)
-  min_perfect_moves: 50          # Reduced from 200 for faster learning
-  
-  # RL specific training  
-  rl_training_interval: 300      # Train RL every 5 minutes (was 1 hour)
-  min_experiences: 50            # Reduced from 100 for faster learning
-  training_steps_per_cycle: 20   # Increased from 10 for more learning
-
-  model_type: "optimized_short_term"
-  use_realtime: true
-  use_ticks: true
-  checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
-  save_best_model: true
-  save_final_model: false  # We only want to keep the best performing model
-  
-  # Continuous learning settings
-  continuous_learning: true
-  learning_from_trades: true
-  pattern_recognition: true
-  retrospective_learning: true
+# Model configurations have been moved to models.yml for better organization
+# See models.yml for all model-specific settings including:
+# - CNN configuration
+# - RL/DQN configuration  
+# - Orchestrator settings
+# - Training configuration
+# - Enhanced training system
+# - Real-time RL COB trader

 # Universal Trading Configuration (applies to all exchanges)
 trading:
@@ -227,69 +122,7 @@ memory:
  model_limit_gb: 4.0       # Per-model memory limit
  cleanup_interval: 1800    # Memory cleanup every 30 minutes
  
-# Enhanced Training System Configuration
-enhanced_training:
-  enabled: true                    # Enable enhanced real-time training
-  auto_start: true                 # Automatically start training when orchestrator starts
-  training_intervals:
-    cob_rl_training_interval: 1    # Train COB RL every 1 second (HIGHEST PRIORITY)
-    dqn_training_interval: 5       # Train DQN every 5 seconds
-    cnn_training_interval: 10      # Train CNN every 10 seconds
-    validation_interval: 60        # Validate every minute
-  batch_size: 64                   # Training batch size
-  memory_size: 10000              # Experience buffer size
-  min_training_samples: 100       # Minimum samples before training starts
-  adaptation_threshold: 0.1        # Performance threshold for adaptation
-  forward_looking_predictions: true # Enable forward-looking prediction validation
-  
-  # COB RL Priority Settings (since order book imbalance predicts price moves)
-  cob_rl_priority: true            # Enable COB RL as highest priority model
-  cob_rl_batch_size: 16           # Smaller batches for faster COB updates
-  cob_rl_min_samples: 5           # Lower threshold for COB training
-  
-# Real-time RL COB Trader Configuration
-realtime_rl:
-  # Model parameters for 400M parameter network (faster startup)
-  model:
-    input_size: 2000          # COB feature dimensions
-    hidden_size: 2048         # Optimized hidden layer size for 400M params
-    num_layers: 8             # Efficient transformer layers for faster training
-    learning_rate: 0.0001     # Higher learning rate for faster convergence
-    weight_decay: 0.00001     # Balanced L2 regularization
-    
-  # Inference configuration  
-  inference_interval_ms: 200  # Inference every 200ms
-  min_confidence_threshold: 0.7  # Minimum confidence for signal accumulation
-  required_confident_predictions: 3  # Need 3 confident predictions for trade
-  
-  # Training configuration
-  training_interval_s: 1.0    # Train every second
-  batch_size: 32              # Training batch size
-  replay_buffer_size: 1000    # Store last 1000 predictions for training
-  
-  # Signal accumulation
-  signal_buffer_size: 10      # Buffer size for signal accumulation
-  consensus_threshold: 3      # Need 3 signals in same direction
-  
-  # Model checkpointing
-  model_checkpoint_dir: "models/realtime_rl_cob"
-  save_interval_s: 300        # Save models every 5 minutes
-  
-  # COB integration
-  symbols: ["BTC/USDT", "ETH/USDT"]  # Symbols to trade
-  cob_feature_normalization: "robust"  # Feature normalization method
-  
-  # Reward engineering for RL
-  reward_structure:
-    correct_direction_base: 1.0     # Base reward for correct prediction
-    confidence_scaling: true        # Scale reward by confidence
-    magnitude_bonus: 0.5           # Bonus for predicting magnitude accurately
-    overconfidence_penalty: 1.5    # Penalty multiplier for wrong high-confidence predictions
-    trade_execution_multiplier: 10.0  # Higher weight for actual trade outcomes
-    
-  # Performance monitoring
-  statistics_interval_s: 60   # Print stats every minute
-  detailed_logging: true      # Enable detailed performance logging
+# Enhanced training and real-time RL configurations moved to models.yml

 # Web Dashboard
 web:
--- a/core/config.py
+++ b/core/config.py
@@ -24,16 +24,31 @@ class Config:
        self._setup_directories()
        
    def _load_config(self) -> Dict[str, Any]:
-        """Load configuration from YAML file"""
+        """Load configuration from YAML files (config.yaml + models.yml)"""
        try:
+            # Load main config
            if not self.config_path.exists():
                logger.warning(f"Config file {self.config_path} not found, using defaults")
-                return self._get_default_config()
-                
-            with open(self.config_path, 'r') as f:
-                config = yaml.safe_load(f)
-                
-            logger.info(f"Loaded configuration from {self.config_path}")
+                config = self._get_default_config()
+            else:
+                with open(self.config_path, 'r') as f:
+                    config = yaml.safe_load(f)
+                logger.info(f"Loaded main configuration from {self.config_path}")
+            
+            # Load models config
+            models_config_path = Path("models.yml")
+            if models_config_path.exists():
+                try:
+                    with open(models_config_path, 'r') as f:
+                        models_config = yaml.safe_load(f)
+                    # Merge models config into main config
+                    config.update(models_config)
+                    logger.info(f"Loaded models configuration from {models_config_path}")
+                except Exception as e:
+                    logger.warning(f"Error loading models.yml: {e}, using main config only")
+            else:
+                logger.info("models.yml not found, using main config only")
+            
            return config
            
        except Exception as e:
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -605,7 +605,9 @@ class TradingOrchestrator:

                action_size = self.config.rl.get("action_space", 3)
                self.rl_agent = DQNAgent(
-                    state_shape=actual_state_size, n_actions=action_size
+                    state_shape=actual_state_size, 
+                    n_actions=action_size,
+                    config=self.config.rl
                )
                self.rl_agent.to(self.device)  # Move DQN agent to the determined device

@@ -2182,7 +2184,7 @@ class TradingOrchestrator:
            )

            # Clean up memory periodically
-            if len(self.recent_decisions[symbol]) % 200 == 0:  # Reduced from 50 to 200
+            if len(self.recent_decisions[symbol]) % 20 == 0:  # Reduced from 50 to 20
                self.model_registry.cleanup_all_models()

            return decision
@@ -2196,55 +2198,108 @@ class TradingOrchestrator:
    ):
        """Add training samples to models based on current predictions and market conditions"""
        try:
-            if not hasattr(self, "cnn_adapter") or not self.cnn_adapter:
-                return
-
            # Get recent price data to evaluate if predictions would be correct
-            recent_prices = self.data_provider.get_recent_prices(symbol, limit=10)
-            if not recent_prices or len(recent_prices) < 2:
-                return
+            # Use available methods from data provider
+            try:
+                # Try to get recent prices using get_price_at_index
+                recent_prices = []
+                for i in range(10):
+                    price = self.data_provider.get_price_at_index(symbol, i, '1m')
+                    if price is not None:
+                        recent_prices.append(price)
+                    else:
+                        break
+                
+                if len(recent_prices) < 2:
+                    # Fallback: use current price and a small assumed change
+                    price_change_pct = 0.1  # Assume small positive change
+                else:
+                    # Calculate recent price change
+                    price_change_pct = (
+                        (current_price - recent_prices[-2]) / recent_prices[-2] * 100
+                    )
+            except Exception as e:
+                logger.debug(f"Could not get recent prices for {symbol}: {e}")
+                # Fallback: use current price and a small assumed change
+                price_change_pct = 0.1  # Assume small positive change

-            # Calculate recent price change
-            price_change_pct = (
-                (current_price - recent_prices[-2]) / recent_prices[-2] * 100
-            )
+            # Get current position P&L for sophisticated reward calculation
+            current_position_pnl = self._get_current_position_pnl(symbol)
+            has_position = self._has_open_position(symbol)

-            # Add training samples for CNN predictions
+            # Add training samples for CNN predictions using sophisticated reward system
            for prediction in predictions:
                if "cnn" in prediction.model_name.lower():
-                    # Determine reward based on prediction accuracy
-                    reward = 0.0
-
-                    if prediction.action == "BUY" and price_change_pct > 0.1:
-                        reward = min(
-                            price_change_pct * 0.1, 1.0
-                        )  # Positive reward for correct BUY
-                    elif prediction.action == "SELL" and price_change_pct < -0.1:
-                        reward = min(
-                            abs(price_change_pct) * 0.1, 1.0
-                        )  # Positive reward for correct SELL
-                    elif prediction.action == "HOLD" and abs(price_change_pct) < 0.1:
-                        reward = 0.1  # Small positive reward for correct HOLD
-                    else:
-                        reward = -0.05  # Small negative reward for incorrect prediction
-
-                    # Add training sample
-                    self.cnn_adapter.add_training_sample(
-                        symbol, prediction.action, reward
-                    )
-                    logger.debug(
-                        f"Added CNN training sample: {prediction.action}, reward={reward:.3f}, price_change={price_change_pct:.2f}%"
+                    # Calculate sophisticated reward using the new PnL penalty/reward system
+                    sophisticated_reward, was_correct = self._calculate_sophisticated_reward(
+                        predicted_action=prediction.action,
+                        prediction_confidence=prediction.confidence,
+                        price_change_pct=price_change_pct,
+                        time_diff_minutes=1.0,  # Assume 1 minute for now
+                        has_price_prediction=False,
+                        symbol=symbol,
+                        has_position=has_position,
+                        current_position_pnl=current_position_pnl
                    )

-            # Trigger training if we have enough samples
-            if len(self.cnn_adapter.training_data) >= self.cnn_adapter.batch_size:
-                training_results = self.cnn_adapter.train(epochs=1)
-                logger.info(
-                    f"CNN training completed: loss={training_results.get('loss', 0):.4f}, accuracy={training_results.get('accuracy', 0):.4f}"
-                )
+                    # Create training record for the new training system
+                    training_record = {
+                        "symbol": symbol,
+                        "model_name": prediction.model_name,
+                        "action": prediction.action,
+                        "confidence": prediction.confidence,
+                        "timestamp": prediction.timestamp,
+                        "current_price": current_price,
+                        "price_change_pct": price_change_pct,
+                        "was_correct": was_correct,
+                        "sophisticated_reward": sophisticated_reward,
+                        "current_position_pnl": current_position_pnl,
+                        "has_position": has_position
+                    }
+
+                    # Use the new training system instead of old cnn_adapter
+                    if hasattr(self, "cnn_model") and self.cnn_model:
+                        # Train CNN model directly using the new system
+                        training_success = await self._train_cnn_model(
+                            model=self.cnn_model,
+                            model_name=prediction.model_name,
+                            record=training_record,
+                            prediction={"action": prediction.action, "confidence": prediction.confidence},
+                            reward=sophisticated_reward
+                        )
+                        
+                        if training_success:
+                            logger.debug(
+                                f"CNN training completed: action={prediction.action}, reward={sophisticated_reward:.3f}, "
+                                f"price_change={price_change_pct:.2f}%, was_correct={was_correct}, "
+                                f"position_pnl={current_position_pnl:.2f}"
+                            )
+                        else:
+                            logger.warning(f"CNN training failed for {prediction.model_name}")
+                    
+                    # Also try training through model registry if available
+                    elif self.model_registry and prediction.model_name in self.model_registry.models:
+                        model = self.model_registry.models[prediction.model_name]
+                        training_success = await self._train_cnn_model(
+                            model=model,
+                            model_name=prediction.model_name,
+                            record=training_record,
+                            prediction={"action": prediction.action, "confidence": prediction.confidence},
+                            reward=sophisticated_reward
+                        )
+                        
+                        if training_success:
+                            logger.debug(
+                                f"CNN training via registry completed: {prediction.model_name}, "
+                                f"reward={sophisticated_reward:.3f}, was_correct={was_correct}"
+                            )
+                        else:
+                            logger.warning(f"CNN training via registry failed for {prediction.model_name}")

        except Exception as e:
            logger.error(f"Error adding training samples from predictions: {e}")
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")

    async def _get_all_predictions(self, symbol: str) -> List[Prediction]:
        """Get predictions from all registered models with input data storage"""
--- a/data/ui_state.json
+++ b/data/ui_state.json
@@ -9,15 +9,21 @@
            "training_enabled": true
        },
        "cob_rl": {
-            "inference_enabled": true,
+            "inference_enabled": false,
            "training_enabled": true
        },
        "decision_fusion": {
            "inference_enabled": false,
            "training_enabled": false
+        },
+        "transformer": {
+            "inference_enabled": false,
+            "training_enabled": true
+        },
+        "dqn_agent": {
+            "inference_enabled": false,
+            "training_enabled": true
        }
-
-
    },
-    "timestamp": "2025-07-29T15:55:43.690404"
+    "timestamp": "2025-07-29T19:17:32.971226"
 }
--- a/models.yml
+++ b/models.yml
@@ -0,0 +1,198 @@
+# Model Configurations
+# This file contains all model-specific configurations to keep the main config.yaml clean
+
+# Enhanced CNN Configuration (cnn model do not use yml config. do not change this)
+# cnn:
+#   window_size: 20
+#   features: ["open", "high", "low", "close", "volume"]
+#   timeframes: ["1s", "1m", "1h", "1d"]
+#   hidden_layers: [64, 128, 256]
+#   dropout: 0.2
+#   learning_rate: 0.001
+#   batch_size: 32
+#   epochs: 100
+#   confidence_threshold: 0.6
+#   early_stopping_patience: 10
+#   model_dir: "models/enhanced_cnn"  # Ultra-fast scalping weights (500x leverage)
+#   timeframe_importance:
+#     "1s": 0.60   # Primary scalping signal
+#     "1m": 0.20   # Short-term confirmation
+#     "1h": 0.15   # Medium-term trend
+#     "1d": 0.05   # Long-term direction (minimal)
+
+# Enhanced RL Agent Configuration
+rl:
+  state_size: 100  # Will be calculated dynamically based on features
+  action_space: 3  # BUY, HOLD, SELL
+  hidden_size: 256
+  epsilon: 1.0
+  epsilon_decay: 0.995
+  epsilon_min: 0.01
+  learning_rate: 0.0001
+  gamma: 0.99
+  memory_size: 10000
+  batch_size: 64
+  target_update_freq: 1000
+  buffer_size: 10000
+  model_dir: "models/enhanced_rl"
+  
+  # DQN Network Architecture Configuration
+  network_architecture:
+    # Feature extractor layers (reduced by half from original)
+    feature_layers: [4096, 3072, 2048, 1536, 1024]  # Reduced from [8192, 6144, 4096, 3072, 2048]
+    # Market regime detection head
+    regime_head: [512, 256]  # Reduced from [1024, 512]
+    # Price direction prediction head
+    price_direction_head: [512, 256]  # Reduced from [1024, 512]
+    # Volatility prediction head
+    volatility_head: [512, 128]  # Reduced from [1024, 256]
+    # Main Q-value head (dueling architecture)
+    value_head: [512, 256]  # Reduced from [1024, 512]
+    advantage_head: [512, 256]  # Reduced from [1024, 512]
+    # Dropout rate
+    dropout_rate: 0.1
+    # Layer normalization
+    use_layer_norm: true
+  
+  # Market regime adaptation
+  market_regime_weights:
+    trending: 1.2    # Higher confidence in trending markets
+    ranging: 0.8     # Lower confidence in ranging markets
+    volatile: 0.6    # Much lower confidence in volatile markets
+  # Prioritized experience replay
+  replay_alpha: 0.6  # Priority exponent
+  replay_beta: 0.4   # Importance sampling exponent
+
+# Real-time RL COB Trader Configuration
+realtime_rl:
+  # Model parameters for 400M parameter network (faster startup)
+  model:
+    input_size: 2000          # COB feature dimensions
+    hidden_size: 2048         # Optimized hidden layer size for 400M params
+    num_layers: 8             # Efficient transformer layers for faster training
+    learning_rate: 0.0001     # Higher learning rate for faster convergence
+    weight_decay: 0.00001     # Balanced L2 regularization
+    
+  # Inference configuration  
+  inference_interval_ms: 200  # Inference every 200ms
+  min_confidence_threshold: 0.7  # Minimum confidence for signal accumulation
+  required_confident_predictions: 3  # Need 3 confident predictions for trade
+  
+  # Training configuration
+  training_interval_s: 1.0    # Train every second
+  batch_size: 32              # Training batch size
+  replay_buffer_size: 1000    # Store last 1000 predictions for training
+  
+  # Signal accumulation
+  signal_buffer_size: 10      # Buffer size for signal accumulation
+  consensus_threshold: 3      # Need 3 signals in same direction
+  
+  # Model checkpointing
+  model_checkpoint_dir: "models/realtime_rl_cob"
+  save_interval_s: 300        # Save models every 5 minutes
+  
+  # COB integration
+  symbols: ["BTC/USDT", "ETH/USDT"]  # Symbols to trade
+  cob_feature_normalization: "robust"  # Feature normalization method
+  
+  # Reward engineering for RL
+  reward_structure:
+    correct_direction_base: 1.0     # Base reward for correct prediction
+    confidence_scaling: true        # Scale reward by confidence
+    magnitude_bonus: 0.5           # Bonus for predicting magnitude accurately
+    overconfidence_penalty: 1.5    # Penalty multiplier for wrong high-confidence predictions
+    trade_execution_multiplier: 10.0  # Higher weight for actual trade outcomes
+    
+  # Performance monitoring
+  statistics_interval_s: 60   # Print stats every minute
+  detailed_logging: true      # Enable detailed performance logging
+
+# Enhanced Orchestrator Settings
+orchestrator:
+  # Model weights for decision combination
+  cnn_weight: 0.7      # Weight for CNN predictions
+  rl_weight: 0.3       # Weight for RL decisions
+  confidence_threshold: 0.45
+  confidence_threshold_close: 0.35
+  decision_frequency: 30
+  
+  # Multi-symbol coordination
+  symbol_correlation_matrix:
+    "ETH/USDT-BTC/USDT": 0.85  # ETH-BTC correlation
+    
+  # Perfect move marking
+  perfect_move_threshold: 0.02  # 2% price change to mark as significant
+  perfect_move_buffer_size: 10000
+  
+  # RL evaluation settings
+  evaluation_delay: 3600  # Evaluate actions after 1 hour
+  reward_calculation:
+    success_multiplier: 10    # Reward for correct predictions
+    failure_penalty: 5        # Penalty for wrong predictions
+    confidence_scaling: true   # Scale rewards by confidence
+
+  # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
+  entry_aggressiveness: 0.5
+  # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
+  exit_aggressiveness: 0.5
+  
+  # Decision Fusion Configuration
+  decision_fusion:
+    enabled: true                    # Use neural network decision fusion instead of programmatic
+    mode: "neural"                   # "neural" or "programmatic"
+    input_size: 128                  # Size of input features for decision fusion network
+    hidden_size: 256                 # Hidden layer size
+    history_length: 20               # Number of recent decisions to include
+    training_interval: 10            # Train decision fusion every 10 decisions in programmatic mode
+    learning_rate: 0.001             # Learning rate for decision fusion network
+    batch_size: 32                   # Training batch size
+    min_samples_for_training: 20     # Lower threshold for faster training in programmatic mode
+
+# Training Configuration
+training:
+  learning_rate: 0.001
+  batch_size: 32
+  epochs: 100
+  validation_split: 0.2
+  early_stopping_patience: 10
+  
+  # CNN specific training
+  cnn_training_interval: 3600    # Train CNN every hour (was 6 hours)
+  min_perfect_moves: 50          # Reduced from 200 for faster learning
+  
+  # RL specific training  
+  rl_training_interval: 300      # Train RL every 5 minutes (was 1 hour)
+  min_experiences: 50            # Reduced from 100 for faster learning
+  training_steps_per_cycle: 20   # Increased from 10 for more learning
+
+  model_type: "optimized_short_term"
+  use_realtime: true
+  use_ticks: true
+  checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
+  save_best_model: true
+  save_final_model: false  # We only want to keep the best performing model
+  
+  # Continuous learning settings
+  continuous_learning: true
+  adaptive_learning_rate: true
+  performance_threshold: 0.6
+
+# Enhanced Training System Configuration
+enhanced_training:
+  enabled: true                    # Enable enhanced real-time training
+  auto_start: true                 # Automatically start training when orchestrator starts
+  training_intervals:
+    cob_rl_training_interval: 1    # Train COB RL every 1 second (HIGHEST PRIORITY)
+    dqn_training_interval: 5       # Train DQN every 5 seconds
+    cnn_training_interval: 10      # Train CNN every 10 seconds
+    validation_interval: 60        # Validate every minute
+  batch_size: 64                   # Training batch size
+  memory_size: 10000              # Experience buffer size
+  min_training_samples: 100       # Minimum samples before training starts
+  adaptation_threshold: 0.1        # Performance threshold for adaptation
+  forward_looking_predictions: true # Enable forward-looking prediction validation
+  
+  # COB RL Priority Settings (since order book imbalance predicts price moves)
+  cob_rl_priority: true            # Enable COB RL as highest priority model
+  cob_rl_batch_size: 16           # Smaller batches for faster COB updates
+  cob_rl_min_samples: 5           # Lower threshold for COB training 
--- a/web/clean_dashboard.py
+++ b/web/clean_dashboard.py
@@ -328,6 +328,7 @@ class CleanTradingDashboard:
            'https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css',
            'https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css'
        ])
+                        #, suppress_callback_exceptions=True)
        
        # Suppress Dash development mode logging
        self.app.enable_dev_tools(debug=False, dev_tools_silence_routes_logging=True)
@@ -864,14 +865,32 @@ class CleanTradingDashboard:
                        available_models[model_name] = {'name': model_name, 'type': 'unknown'}
                logger.debug(f"Found {len(toggle_models)} models in toggle states")
            
+            # Apply model name mapping to match orchestrator's internal mapping
+            # This ensures component IDs match what the orchestrator expects
+            mapped_models = {}
+            model_mapping = {
+                'dqn_agent': 'dqn',
+                'enhanced_cnn': 'cnn',
+                'extrema_trainer': 'extrema_trainer',
+                'decision': 'decision_fusion',
+                'cob_rl': 'cob_rl',
+                'transformer': 'transformer'
+            }
+            
+            for model_name, model_info in available_models.items():
+                # Use mapped name if available, otherwise use original name
+                mapped_name = model_mapping.get(model_name, model_name)
+                mapped_models[mapped_name] = model_info
+                logger.debug(f"Mapped model name: {model_name} -> {mapped_name}")
+            
            # Fallback: Add known models if none found
-            if not available_models:
+            if not mapped_models:
                fallback_models = ['dqn', 'cnn', 'cob_rl', 'decision_fusion', 'transformer']
                for model_name in fallback_models:
-                    available_models[model_name] = {'name': model_name, 'type': 'fallback'}
+                    mapped_models[model_name] = {'name': model_name, 'type': 'fallback'}
                logger.warning(f"Using fallback models: {fallback_models}")
            
-            return available_models
+            return mapped_models
            
        except Exception as e:
            logger.error(f"Error getting available models: {e}")
@@ -916,13 +935,25 @@ class CleanTradingDashboard:
            enabled = bool(value and len(value) > 0)  # Convert list to boolean
            
            if self.orchestrator:
+                # Map component model name back to orchestrator's expected model name
+                reverse_mapping = {
+                    'dqn': 'dqn_agent',
+                    'cnn': 'enhanced_cnn',
+                    'decision_fusion': 'decision',
+                    'extrema_trainer': 'extrema_trainer',
+                    'cob_rl': 'cob_rl',
+                    'transformer': 'transformer'
+                }
+                
+                orchestrator_model_name = reverse_mapping.get(model_name, model_name)
+                
                # Update orchestrator toggle state
                if toggle_type == 'inference':
-                    self.orchestrator.set_model_toggle_state(model_name, inference_enabled=enabled)
+                    self.orchestrator.set_model_toggle_state(orchestrator_model_name, inference_enabled=enabled)
                elif toggle_type == 'training':
-                    self.orchestrator.set_model_toggle_state(model_name, training_enabled=enabled)
+                    self.orchestrator.set_model_toggle_state(orchestrator_model_name, training_enabled=enabled)
                
-                logger.info(f"Model {model_name} {toggle_type} toggle: {enabled}")
+                logger.info(f"Model {model_name} ({orchestrator_model_name}) {toggle_type} toggle: {enabled}")
                
                # Update dashboard state variables for backward compatibility
                self._update_dashboard_state_variable(model_name, toggle_type, enabled)
@@ -1333,18 +1364,25 @@ class CleanTradingDashboard:
                error_msg = html.P(f"COB Error: {str(e)}", className="text-danger small")
                return error_msg, error_msg
        
+        # Original training metrics callback - temporarily disabled for testing
+        # @self.app.callback(
+        #     Output('training-metrics', 'children'),
        @self.app.callback(
            Output('training-metrics', 'children'),
-            [Input('slow-interval-component', 'n_intervals')]  # OPTIMIZED: Move to 10s interval
+            [Input('slow-interval-component', 'n_intervals'),
+             Input('fast-interval-component', 'n_intervals'),  # Add fast interval for testing
+             Input('refresh-training-metrics-btn', 'n_clicks')]  # Add manual refresh button
        )
-        def update_training_metrics(n):
+        def update_training_metrics(slow_intervals, fast_intervals, n_clicks):
            """Update training metrics"""
+            logger.info(f"update_training_metrics callback triggered with slow_intervals={slow_intervals}, fast_intervals={fast_intervals}, n_clicks={n_clicks}")
            try:
                # Get toggle states from orchestrator
                toggle_states = {}
                if self.orchestrator:
                    # Get all available models dynamically
                    available_models = self._get_available_models()
+                    logger.info(f"Available models: {list(available_models.keys())}")
                    for model_name in available_models.keys():
                        toggle_states[model_name] = self.orchestrator.get_model_toggle_state(model_name)
                else:
@@ -1354,24 +1392,48 @@ class CleanTradingDashboard:
                        toggle_states[model_name] = state
                # Now using slow-interval-component (10s) - no batching needed
                
+                logger.info(f"Getting training metrics with toggle_states: {toggle_states}")
                metrics_data = self._get_training_metrics(toggle_states)
-                logger.debug(f"update_training_metrics callback: got metrics_data type={type(metrics_data)}")
+                logger.info(f"update_training_metrics callback: got metrics_data type={type(metrics_data)}")
                if metrics_data and isinstance(metrics_data, dict):
-                    logger.debug(f"Metrics data keys: {list(metrics_data.keys())}")
+                    logger.info(f"Metrics data keys: {list(metrics_data.keys())}")
                    if 'loaded_models' in metrics_data:
-                        logger.debug(f"Loaded models count: {len(metrics_data['loaded_models'])}")
-                        logger.debug(f"Loaded model names: {list(metrics_data['loaded_models'].keys())}")
+                        logger.info(f"Loaded models count: {len(metrics_data['loaded_models'])}")
+                        logger.info(f"Loaded model names: {list(metrics_data['loaded_models'].keys())}")
                    else:
                        logger.warning("No 'loaded_models' key in metrics_data!")
                else:
                    logger.warning(f"Invalid metrics_data: {metrics_data}")
-                return self.component_manager.format_training_metrics(metrics_data)
+                
+                logger.info("Formatting training metrics...")
+                formatted_metrics = self.component_manager.format_training_metrics(metrics_data)
+                logger.info(f"Formatted metrics type: {type(formatted_metrics)}, length: {len(formatted_metrics) if isinstance(formatted_metrics, list) else 'N/A'}")
+                return formatted_metrics
            except PreventUpdate:
+                logger.info("PreventUpdate raised in training metrics callback")
                raise
            except Exception as e:
                logger.error(f"Error updating training metrics: {e}")
+                import traceback
+                logger.error(f"Traceback: {traceback.format_exc()}")
                return [html.P(f"Error: {str(e)}", className="text-danger")]
        
+        # Test callback for training metrics (commented out - using real callback now)
+        # @self.app.callback(
+        #     Output('training-metrics', 'children'),
+        #     [Input('refresh-training-metrics-btn', 'n_clicks')],
+        #     prevent_initial_call=False
+        # )
+        # def test_training_metrics_callback(n_clicks):
+        #     """Test callback for training metrics"""
+        #     logger.info(f"test_training_metrics_callback triggered with n_clicks={n_clicks}")
+        #     try:
+        #         # Return a simple test message
+        #         return [html.P("Training metrics test - callback is working!", className="text-success")]
+        #     except Exception as e:
+        #         logger.error(f"Error in test callback: {e}")
+        #         return [html.P(f"Error: {str(e)}", className="text-danger")]
+        
        # Manual trading buttons
        @self.app.callback(
            Output('manual-buy-btn', 'children'),
@@ -3651,7 +3713,17 @@ class CleanTradingDashboard:
                    available_models = self._get_available_models()
                    toggle_states = {}
                    for model_name in available_models.keys():
-                        toggle_states[model_name] = self.orchestrator.get_model_toggle_state(model_name)
+                        # Map component model name to orchestrator model name for getting toggle state
+                        reverse_mapping = {
+                            'dqn': 'dqn_agent',
+                            'cnn': 'enhanced_cnn',
+                            'decision_fusion': 'decision',
+                            'extrema_trainer': 'extrema_trainer',
+                            'cob_rl': 'cob_rl',
+                            'transformer': 'transformer'
+                        }
+                        orchestrator_model_name = reverse_mapping.get(model_name, model_name)
+                        toggle_states[model_name] = self.orchestrator.get_model_toggle_state(orchestrator_model_name)
                else:
                    # Fallback to default states for known models
                    toggle_states = {
@@ -3711,8 +3783,19 @@ class CleanTradingDashboard:
                
                try:
                    if self.orchestrator:
+                        # Map component model name to orchestrator model name for getting statistics
+                        reverse_mapping = {
+                            'dqn': 'dqn_agent',
+                            'cnn': 'enhanced_cnn',
+                            'decision_fusion': 'decision',
+                            'extrema_trainer': 'extrema_trainer',
+                            'cob_rl': 'cob_rl',
+                            'transformer': 'transformer'
+                        }
+                        orchestrator_model_name = reverse_mapping.get(model_name, model_name)
+                        
                        # Use the new model statistics system
-                        model_stats = self.orchestrator.get_model_statistics(model_name.lower())
+                        model_stats = self.orchestrator.get_model_statistics(orchestrator_model_name)
                        if model_stats:
                            # Last inference time
                            timing['last_inference'] = model_stats.last_inference_time
@@ -3755,7 +3838,7 @@ class CleanTradingDashboard:
            dqn_prediction_count = len(self.recent_decisions) if signal_generation_active else 0
            
            # Get latest DQN prediction from orchestrator statistics
-            dqn_stats = orchestrator_stats.get('dqn_agent')
+            dqn_stats = orchestrator_stats.get('dqn_agent')  # Use orchestrator's internal name
            if dqn_stats and dqn_stats.predictions_history:
                # Get the most recent prediction
                latest_pred = list(dqn_stats.predictions_history)[-1]
@@ -3786,8 +3869,8 @@ class CleanTradingDashboard:
                        last_confidence = 0.68
                        last_timestamp = datetime.now().strftime('%H:%M:%S')
            
-            # Get real DQN statistics from orchestrator (try both old and new names)
-            dqn_stats = orchestrator_stats.get('dqn_agent') or orchestrator_stats.get('dqn')
+            # Get real DQN statistics from orchestrator (use orchestrator's internal name)
+            dqn_stats = orchestrator_stats.get('dqn_agent')
            dqn_current_loss = dqn_stats.current_loss if dqn_stats else None
            dqn_best_loss = dqn_stats.best_loss if dqn_stats else None
            dqn_accuracy = dqn_stats.accuracy if dqn_stats else None
@@ -3867,8 +3950,8 @@ class CleanTradingDashboard:
            cnn_state = model_states.get('cnn', {})
            cnn_timing = get_model_timing_info('CNN')
            
-            # Get real CNN statistics from orchestrator (try both old and new names)
-            cnn_stats = orchestrator_stats.get('enhanced_cnn') or orchestrator_stats.get('cnn')
+            # Get real CNN statistics from orchestrator (use orchestrator's internal name)
+            cnn_stats = orchestrator_stats.get('enhanced_cnn')
            cnn_active = cnn_stats is not None
            
            # Get latest CNN prediction from orchestrator statistics
@@ -4095,7 +4178,10 @@ class CleanTradingDashboard:
            # 4. COB RL Model Status - using orchestrator SSOT
            cob_state = model_states.get('cob_rl', {})
            cob_timing = get_model_timing_info('COB_RL')
-            cob_active = True
+            
+            # Get real COB RL statistics from orchestrator (use orchestrator's internal name)
+            cob_stats = orchestrator_stats.get('cob_rl')
+            cob_active = cob_stats is not None
            cob_predictions_count = len(self.recent_decisions) * 2
            
            # Get COB RL toggle states
@@ -4154,10 +4240,8 @@ class CleanTradingDashboard:
            decision_inference_enabled = decision_toggle_state.get("inference_enabled", True)
            decision_training_enabled = decision_toggle_state.get("training_enabled", True)
            
-            # Get real decision fusion statistics from orchestrator
-            decision_stats = None
-            if self.orchestrator and hasattr(self.orchestrator, 'model_statistics'):
-                decision_stats = self.orchestrator.model_statistics.get('decision_fusion')
+            # Get real decision fusion statistics from orchestrator (use orchestrator's internal name)
+            decision_stats = orchestrator_stats.get('decision')
            
            # Get real last prediction
            last_prediction = 'HOLD'
--- a/web/component_manager.py
+++ b/web/component_manager.py
@@ -140,7 +140,8 @@ class DashboardComponentManager:
            # Create table headers
            headers = html.Thead([
                html.Tr([
-                    html.Th("Time", className="small"),
+                    html.Th("Entry Time", className="small"),
+                    html.Th("Exit Time", className="small"),
                    html.Th("Side", className="small"),
                    html.Th("Size", className="small"),
                    html.Th("Entry", className="small"),
@@ -158,6 +159,7 @@ class DashboardComponentManager:
                if hasattr(trade, 'entry_time'):
                    # This is a trade object
                    entry_time = getattr(trade, 'entry_time', 'Unknown')
+                    exit_time = getattr(trade, 'exit_time', 'Unknown')
                    side = getattr(trade, 'side', 'UNKNOWN')
                    size = getattr(trade, 'size', 0)
                    entry_price = getattr(trade, 'entry_price', 0)
@@ -168,6 +170,7 @@ class DashboardComponentManager:
                else:
                    # This is a dictionary format
                    entry_time = trade.get('entry_time', 'Unknown')
+                    exit_time = trade.get('exit_time', 'Unknown')
                    side = trade.get('side', 'UNKNOWN')
                    size = trade.get('quantity', trade.get('size', 0))  # Try 'quantity' first, then 'size'
                    entry_price = trade.get('entry_price', 0)
@@ -176,11 +179,17 @@ class DashboardComponentManager:
                    fees = trade.get('fees', 0)
                    hold_time_seconds = trade.get('hold_time_seconds', 0.0)
                
-                # Format time
+                # Format entry time
                if isinstance(entry_time, datetime):
-                    time_str = entry_time.strftime('%H:%M:%S')
+                    entry_time_str = entry_time.strftime('%H:%M:%S')
                else:
-                    time_str = str(entry_time)
+                    entry_time_str = str(entry_time)
+                
+                # Format exit time
+                if isinstance(exit_time, datetime):
+                    exit_time_str = exit_time.strftime('%H:%M:%S')
+                else:
+                    exit_time_str = str(exit_time)
                
                # Determine P&L color
                pnl_class = "text-success" if pnl >= 0 else "text-danger"
@@ -197,7 +206,8 @@ class DashboardComponentManager:
                net_pnl = pnl - fees
                
                row = html.Tr([
-                    html.Td(time_str, className="small"),
+                    html.Td(entry_time_str, className="small"),
+                    html.Td(exit_time_str, className="small"),
                    html.Td(side, className=f"small {side_class}"),
                    html.Td(f"${position_size_usd:.2f}", className="small"),  # Show size in USD
                    html.Td(f"${entry_price:.2f}", className="small"),
@@ -714,11 +724,11 @@ class DashboardComponentManager:
        """Format training metrics for display - Enhanced with loaded models"""
        try:
            # DEBUG: Log what we're receiving
-            logger.debug(f"format_training_metrics received: {type(metrics_data)}")
+            logger.info(f"format_training_metrics received: {type(metrics_data)}")
            if metrics_data:
-                logger.debug(f"Metrics keys: {list(metrics_data.keys()) if isinstance(metrics_data, dict) else 'Not a dict'}")
+                logger.info(f"Metrics keys: {list(metrics_data.keys()) if isinstance(metrics_data, dict) else 'Not a dict'}")
                if isinstance(metrics_data, dict) and 'loaded_models' in metrics_data:
-                    logger.debug(f"Loaded models: {list(metrics_data['loaded_models'].keys())}")
+                    logger.info(f"Loaded models: {list(metrics_data['loaded_models'].keys())}")
            
            if not metrics_data or 'error' in metrics_data:
                logger.warning(f"No training data or error in metrics_data: {metrics_data}")
@@ -772,6 +782,7 @@ class DashboardComponentManager:
                        checkpoint_status = "LOADED" if model_info.get('checkpoint_loaded', False) else "FRESH"
                        
                        # Model card
+                        logger.info(f"Creating model card for {model_name} with toggles: inference={model_info.get('inference_enabled', True)}, training={model_info.get('training_enabled', True)}")
                        model_card = html.Div([
                            # Header with model name and toggle
                            html.Div([
@@ -1043,10 +1054,15 @@ class DashboardComponentManager:
                            html.Span(f"{enhanced_stats['recent_validation_score']:.3f}", className="text-primary small fw-bold")
                        ], className="mb-1"))
            
+            logger.info(f"format_training_metrics returning {len(content)} components")
+            for i, component in enumerate(content[:3]):  # Log first 3 components
+                logger.info(f"  Component {i}: {type(component)}")
            return content
            
        except Exception as e:
            logger.error(f"Error formatting training metrics: {e}")
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
            return [html.P(f"Error: {str(e)}", className="text-danger small")]
    
    def _format_cnn_pivot_prediction(self, model_info):
--- a/web/layout_manager.py
+++ b/web/layout_manager.py
@@ -17,11 +17,32 @@ class DashboardLayoutManager:
    
    def create_main_layout(self):
        """Create the main dashboard layout"""
-        return html.Div([
-            self._create_header(),
-            self._create_interval_component(),
-            self._create_main_content()
-        ], className="container-fluid")
+        try:
+            print("Creating main layout...")
+            header = self._create_header()
+            print("Header created")
+            interval_component = self._create_interval_component()
+            print("Interval component created")
+            main_content = self._create_main_content()
+            print("Main content created")
+            
+            layout = html.Div([
+                header,
+                interval_component,
+                main_content
+            ], className="container-fluid")
+            
+            print("Main layout created successfully")
+            return layout
+        except Exception as e:
+            print(f"Error creating main layout: {e}")
+            import traceback
+            traceback.print_exc()
+            # Return a simple error layout
+            return html.Div([
+                html.H1("Dashboard Error", className="text-danger"),
+                html.P(f"Error creating layout: {str(e)}", className="text-danger")
+            ])
    
    def _create_header(self):
        """Create the dashboard header"""
@@ -52,7 +73,15 @@ class DashboardLayoutManager:
            dcc.Interval(
                id='slow-interval-component',
                interval=10000,  # Update every 10 seconds (0.1 Hz) - OPTIMIZED
-                n_intervals=0
+                n_intervals=0,
+                disabled=False
+            ),
+            # Fast interval for testing (5 seconds)
+            dcc.Interval(
+                id='fast-interval-component',
+                interval=5000,  # Update every 5 seconds for testing
+                n_intervals=0,
+                disabled=False
            ),
            # WebSocket-based updates for high-frequency data (no interval needed)
            html.Div(id='websocket-updates-container', style={'display': 'none'})
@@ -357,10 +386,16 @@ class DashboardLayoutManager:
                html.Div([
                    html.Div([
                        html.Div([
-                            html.H6([
-                                html.I(className="fas fa-brain me-2"),
-                                "Models & Training Progress",
-                            ], className="card-title mb-2"),
+                            html.Div([
+                                html.H6([
+                                    html.I(className="fas fa-brain me-2"),
+                                    "Models & Training Progress",
+                                ], className="card-title mb-2"),
+                                html.Button([
+                                    html.I(className="fas fa-sync-alt me-1"),
+                                    "Refresh"
+                                ], id="refresh-training-metrics-btn", className="btn btn-sm btn-outline-primary")
+                            ], className="d-flex justify-content-between align-items-center mb-2"),
                            html.Div(
                                id="training-metrics",
                                style={"height": "300px", "overflowY": "auto"},
Author	SHA1	Message	Date
Dobromir Popov	aa2a1bf7ee	fixed CNN training	2025-07-29 20:11:22 +03:00
Dobromir Popov	b1ae557843	models overhaul	2025-07-29 19:22:04 +03:00
Dobromir Popov	0b5fa07498	ui fixes	2025-07-29 19:02:44 +03:00
Dobromir Popov	ac4068c168	suppress_callback_exceptions	2025-07-29 18:20:07 +03:00