Merge commit 'd49a473ed6f4aef55bfdd47d6370e53582be6b7b' into cleanup

2025-10-01 00:32:19 +03:00
parent a03b9c5701 d49a473ed6
commit 388334e4a8
353 changed files with 81004 additions and 35899 deletions
--- a/NN/models/init.py
+++ b/NN/models/init.py
@@ -1,21 +0,0 @@
-"""
-Neural Network Models
-====================
-
-This package contains the neural network models used in the trading system:
- CNN Model: Deep convolutional neural network for feature extraction  
- DQN Agent: Deep Q-Network for reinforcement learning
- COB RL Model: Specialized RL model for order book data
- Advanced Transformer: High-performance transformer for trading
-
-PyTorch implementation only.
-"""
-
-from NN.models.cnn_model import EnhancedCNNModel as CNNModel
-from NN.models.dqn_agent import DQNAgent
-from NN.models.cob_rl_model import MassiveRLNetwork, COBRLModelInterface
-from NN.models.advanced_transformer_trading import AdvancedTradingTransformer, TradingTransformerConfig
-from NN.models.model_interfaces import ModelInterface, CNNModelInterface, RLAgentInterface, ExtremaTrainerInterface
-
-__all__ = ['CNNModel', 'DQNAgent', 'MassiveRLNetwork', 'COBRLModelInterface', 'AdvancedTradingTransformer', 'TradingTransformerConfig',
-           'ModelInterface', 'CNNModelInterface', 'RLAgentInterface', 'ExtremaTrainerInterface']
--- a/NN/models/cob_rl_model.py
+++ b/NN/models/cob_rl_model.py
@@ -267,7 +267,17 @@ class COBRLModelInterface(ModelInterface):
        
        logger.info(f"COB RL Model Interface initialized on {self.device}")
        
+<<<<<<< HEAD
    def predict(self, cob_features) -> Dict[str, Any]:
+=======
+    def to(self, device):
+        """PyTorch-style device movement method"""
+        self.device = device
+        self.model = self.model.to(device)
+        return self
+        
+    def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
+>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Make prediction using the model"""
        self.model.eval()
        with torch.no_grad():
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
--- a/NN/models/enhanced_cnn.py
+++ b/NN/models/enhanced_cnn.py
@@ -3,6 +3,7 @@ import torch.nn as nn
 import torch.optim as optim
 import numpy as np
 import os
+import time
 import logging
 import torch.nn.functional as F
 from typing import List, Tuple, Dict, Any, Optional, Union
@@ -80,6 +81,9 @@ class EnhancedCNN(nn.Module):
        self.n_actions = n_actions
        self.confidence_threshold = confidence_threshold
        
+        # Training data storage
+        self.training_data = []
+        
        # Calculate input dimensions
        if isinstance(input_shape, (list, tuple)):
            if len(input_shape) == 3:  # [channels, height, width]
@@ -265,8 +269,9 @@ class EnhancedCNN(nn.Module):
            nn.Linear(256, 3)  # 0=bottom, 1=top, 2=neither
        )
        
-        # ULTRA MASSIVE multi-timeframe price prediction heads
-        self.price_pred_immediate = nn.Sequential(
+        # ULTRA MASSIVE price direction prediction head
+        # Outputs single direction and confidence values
+        self.price_direction_head = nn.Sequential(
            nn.Linear(1024, 1024), # Increased from 512
            nn.ReLU(),
            nn.Dropout(0.3),
@@ -275,32 +280,13 @@ class EnhancedCNN(nn.Module):
            nn.Dropout(0.3),
            nn.Linear(512, 256), # Increased from 128
            nn.ReLU(),
-            nn.Linear(256, 3)  # Up, Down, Sideways
+            nn.Linear(256, 2)  # [direction, confidence]
        )
        
-        self.price_pred_midterm = nn.Sequential(
-            nn.Linear(1024, 1024), # Increased from 512
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(1024, 512), # Increased from 256
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(512, 256), # Increased from 128
-            nn.ReLU(),
-            nn.Linear(256, 3)  # Up, Down, Sideways
-        )
-        
-        self.price_pred_longterm = nn.Sequential(
-            nn.Linear(1024, 1024), # Increased from 512
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(1024, 512), # Increased from 256
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(512, 256), # Increased from 128
-            nn.ReLU(),
-            nn.Linear(256, 3)  # Up, Down, Sideways
-        )
+        # Direction activation (tanh for -1 to 1)
+        self.direction_activation = nn.Tanh()
+        # Confidence activation (sigmoid for 0 to 1)
+        self.confidence_activation = nn.Sigmoid()
        
        # ULTRA MASSIVE value prediction with ensemble approaches
        self.price_pred_value = nn.Sequential(
@@ -371,21 +357,45 @@ class EnhancedCNN(nn.Module):
            nn.Linear(128, 4)  # Low risk, medium risk, high risk, extreme risk
        )
    
+    def _memory_barrier(self, tensor: torch.Tensor) -> torch.Tensor:
+        """Create a memory barrier to prevent in-place operation issues"""
+        return tensor.detach().clone().requires_grad_(tensor.requires_grad)
+    
    def _check_rebuild_network(self, features):
-        """Check if network needs to be rebuilt for different feature dimensions"""
+        """DEPRECATED: Network should have fixed architecture - no runtime rebuilding"""
        if features != self.feature_dim:
-            logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
-            self.feature_dim = features
-            self._build_network()
-            # Move to device after rebuilding
-            self.to(self.device)
-            return True
+            logger.error(f"CRITICAL: Input feature dimension mismatch! Expected {self.feature_dim}, got {features}")
+            logger.error("This indicates a bug in data preprocessing - input should be fixed size!")
+            logger.error("Network architecture should NOT change at runtime!")
+            raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {features}")
        return False
        
    def forward(self, x):
        """Forward pass through the ULTRA MASSIVE network"""
        batch_size = x.size(0)
        
+        # Validate input dimensions to prevent zero-element tensor issues
+        if x.numel() == 0:
+            logger.error(f"Forward pass received empty tensor with shape {x.shape}")
+            # Return default outputs for all 5 expected values to prevent crash
+            default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
+            default_extrema = torch.zeros(batch_size, 3, device=x.device)  # bottom/top/neither
+            default_price_pred = torch.zeros(batch_size, 1, device=x.device)
+            default_features = torch.zeros(batch_size, 1024, device=x.device)
+            default_advanced = torch.zeros(batch_size, 1, device=x.device)
+            return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
+        
+        # Check for zero feature dimensions
+        if len(x.shape) > 1 and any(dim == 0 for dim in x.shape[1:]):
+            logger.error(f"Forward pass received tensor with zero feature dimensions: {x.shape}")
+            # Return default outputs for all 5 expected values to prevent crash
+            default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
+            default_extrema = torch.zeros(batch_size, 3, device=x.device)  # bottom/top/neither
+            default_price_pred = torch.zeros(batch_size, 1, device=x.device)
+            default_features = torch.zeros(batch_size, 1024, device=x.device)
+            default_advanced = torch.zeros(batch_size, 1, device=x.device)
+            return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
+        
        # Process different input shapes
        if len(x.shape) > 2:
            # Handle 4D input [batch, timeframes, window, features] or 3D input [batch, timeframes, features]
@@ -397,10 +407,11 @@ class EnhancedCNN(nn.Module):
                # Now x is 3D: [batch, timeframes, features]
                x_reshaped = x
                
-                # Check if the feature dimension has changed and rebuild if necessary
-                if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim:
-                    total_features = x_reshaped.size(1) * x_reshaped.size(2)
-                    self._check_rebuild_network(total_features)
+                # Validate input dimensions (should be fixed)
+                total_features = x_reshaped.size(1) * x_reshaped.size(2)
+                if total_features != self.feature_dim:
+                    logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
+                    raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
                
                # Apply ultra massive convolutions
                x_conv = self.conv_layers(x_reshaped)
@@ -413,9 +424,10 @@ class EnhancedCNN(nn.Module):
            # For 2D input [batch, features]
            x_flat = x
            
-            # Check if dimensions have changed
+            # Validate input dimensions (should be fixed)
            if x_flat.size(1) != self.feature_dim:
-                self._check_rebuild_network(x_flat.size(1))
+                logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
+                raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
        
        # Apply ULTRA MASSIVE FC layers to get base features
        features = self.fc_layers(x_flat)  # [batch, 1024]
@@ -464,10 +476,14 @@ class EnhancedCNN(nn.Module):
        # Extrema predictions (bottom/top/neither detection)
        extrema_pred = self.extrema_head(features_refined)
        
-        # Multi-timeframe price movement predictions
-        price_immediate = self.price_pred_immediate(features_refined)
-        price_midterm = self.price_pred_midterm(features_refined)
-        price_longterm = self.price_pred_longterm(features_refined)
+        # Price direction predictions
+        price_direction_raw = self.price_direction_head(features_refined)
+        
+        # Apply separate activations to direction and confidence
+        direction = self.direction_activation(price_direction_raw[:, 0:1])  # -1 to 1
+        confidence = self.confidence_activation(price_direction_raw[:, 1:2])  # 0 to 1
+        price_direction_pred = torch.cat([direction, confidence], dim=1)  # [batch, 2]
+        
        price_values = self.price_pred_value(features_refined)
        
        # Additional specialized predictions for enhanced accuracy
@@ -476,38 +492,42 @@ class EnhancedCNN(nn.Module):
        market_regime_pred = self.market_regime_head(features_refined)
        risk_pred = self.risk_head(features_refined)
        
-        # Package all price predictions
-        price_predictions = {
-            'immediate': price_immediate,
-            'midterm': price_midterm,
-            'longterm': price_longterm,
-            'values': price_values
-        }
+        # Use the price direction prediction directly (already [batch, 2])
+        price_direction_tensor = price_direction_pred
        
-        # Package additional predictions for enhanced decision making
-        advanced_predictions = {
-            'volatility': volatility_pred,
-            'support_resistance': support_resistance_pred,
-            'market_regime': market_regime_pred,
-            'risk_assessment': risk_pred
-        }
+        # Package additional predictions into a single tensor (use volatility as primary)
+        # For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
+        advanced_pred_tensor = volatility_pred
        
-        return q_values, extrema_pred, price_predictions, features_refined, advanced_predictions
+        return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
    
-    def act(self, state, explore=True):
+    def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
        """Enhanced action selection with ultra massive model predictions"""
-        if explore and np.random.random() < 0.1:  # 10% random exploration
-            return np.random.choice(self.n_actions)
-        
        self.eval()
-        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+
+        # Accept both NumPy arrays and already-built torch tensors
+        if isinstance(state, torch.Tensor):
+            state_tensor = state.detach().to(self.device)
+            if state_tensor.dim() == 1:
+                state_tensor = state_tensor.unsqueeze(0)
+        else:
+            # Convert to tensor **directly on the target device** to avoid intermediate CPU copies
+            state_tensor = torch.as_tensor(state, dtype=torch.float32, device=self.device)
+            if state_tensor.dim() == 1:
+                state_tensor = state_tensor.unsqueeze(0)
        
        with torch.no_grad():
-            q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor)
+            q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
+            
+            # Process price direction predictions
+            if price_direction_predictions is not None:
+                self.process_price_direction_predictions(price_direction_predictions)
            
            # Apply softmax to get action probabilities
-            action_probs = torch.softmax(q_values, dim=1)
-            action = torch.argmax(action_probs, dim=1).item()
+            action_probs_tensor = torch.softmax(q_values, dim=1)
+            action_idx = int(torch.argmax(action_probs_tensor, dim=1).item())
+            confidence = float(action_probs_tensor[0, action_idx].item())  # Confidence of the chosen action
+            action_probs = action_probs_tensor.squeeze(0).tolist()  # Convert to list of floats for return
            
            # Log advanced predictions for better decision making
            if hasattr(self, '_log_predictions') and self._log_predictions:
@@ -537,7 +557,180 @@ class EnhancedCNN(nn.Module):
                logger.info(f"  Market Regime: {regime_labels[regime_class]} ({regime[regime_class]:.3f})")
                logger.info(f"  Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
            
-            return action
+            return action_idx, confidence, action_probs
+    
+    def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
+        """
+        Process price direction predictions and convert to standardized format
+        
+        Args:
+            price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
+            
+        Returns:
+            Dict with direction (-1 to 1) and confidence (0 to 1)
+        """
+        try:
+            if price_direction_pred is None or price_direction_pred.numel() == 0:
+                return {}
+            
+            # Extract direction and confidence values
+            direction_value = float(price_direction_pred[0, 0].item())  # -1 to 1
+            confidence_value = float(price_direction_pred[0, 1].item())  # 0 to 1
+            
+            processed_directions = {
+                'direction': direction_value,
+                'confidence': confidence_value
+            }
+            
+            # Store for later access
+            self.last_price_direction = processed_directions
+            
+            return processed_directions
+            
+        except Exception as e:
+            logger.error(f"Error processing price direction predictions: {e}")
+            return {}
+    
+    def get_price_direction_vector(self) -> Dict[str, float]:
+        """
+        Get the current price direction and confidence
+        
+        Returns:
+            Dict with direction (-1 to 1) and confidence (0 to 1)
+        """
+        return getattr(self, 'last_price_direction', {})
+    
+    def get_price_direction_summary(self) -> Dict[str, Any]:
+        """
+        Get a summary of price direction prediction
+        
+        Returns:
+            Dict containing direction and confidence information
+        """
+        try:
+            last_direction = getattr(self, 'last_price_direction', {})
+            if not last_direction:
+                return {
+                    'direction_value': 0.0,
+                    'confidence_value': 0.0,
+                    'direction_label': "SIDEWAYS",
+                    'discrete_direction': 0,
+                    'strength': 0.0,
+                    'weighted_strength': 0.0
+                }
+            
+            direction_value = last_direction['direction']
+            confidence_value = last_direction['confidence']
+            
+            # Convert to discrete direction
+            if direction_value > 0.1:
+                direction_label = "UP"
+                discrete_direction = 1
+            elif direction_value < -0.1:
+                direction_label = "DOWN"
+                discrete_direction = -1
+            else:
+                direction_label = "SIDEWAYS"
+                discrete_direction = 0
+            
+            return {
+                'direction_value': float(direction_value),
+                'confidence_value': float(confidence_value),
+                'direction_label': direction_label,
+                'discrete_direction': discrete_direction,
+                'strength': abs(float(direction_value)),
+                'weighted_strength': abs(float(direction_value)) * float(confidence_value)
+            }
+            
+        except Exception as e:
+            logger.error(f"Error calculating price direction summary: {e}")
+            return {
+                'direction_value': 0.0,
+                'confidence_value': 0.0,
+                'direction_label': "SIDEWAYS",
+                'discrete_direction': 0,
+                'strength': 0.0,
+                'weighted_strength': 0.0
+            }
+    
+    def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
+        """
+        Add training data to the model's training buffer with position-based reward enhancement
+        
+        Args:
+            state: Input state
+            action: Action taken
+            reward: Base reward received
+            position_pnl: Current position P&L (0.0 if no position)
+            has_position: Whether we currently have an open position
+        """
+        try:
+            # Enhance reward based on position status
+            enhanced_reward = self._calculate_position_enhanced_reward(
+                reward, action, position_pnl, has_position
+            )
+            
+            self.training_data.append({
+                'state': state,
+                'action': action,
+                'reward': enhanced_reward,
+                'base_reward': reward,  # Keep original reward for analysis
+                'position_pnl': position_pnl,
+                'has_position': has_position,
+                'timestamp': time.time()
+            })
+            
+            # Keep only the last 1000 training samples
+            if len(self.training_data) > 1000:
+                self.training_data = self.training_data[-1000:]
+                
+        except Exception as e:
+            logger.error(f"Error adding training data: {e}")
+
+    def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
+        """
+        Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
+        
+        Args:
+            base_reward: Original reward from price prediction accuracy
+            action: Action taken ('BUY', 'SELL', 'HOLD')
+            position_pnl: Current position P&L
+            has_position: Whether we have an open position
+            
+        Returns:
+            Enhanced reward that incentivizes profitable behavior
+        """
+        try:
+            enhanced_reward = base_reward
+            
+            if has_position and position_pnl != 0.0:
+                # Position-based reward adjustments
+                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale
+                
+                if position_pnl > 0:  # Profitable position
+                    if action == "HOLD":
+                        # Reward holding profitable positions (let winners run)
+                        enhanced_reward += abs(pnl_factor) * 0.5
+                    elif action in ["BUY", "SELL"]:
+                        # Moderate reward for taking action on profitable positions
+                        enhanced_reward += abs(pnl_factor) * 0.3
+                        
+                elif position_pnl < 0:  # Losing position
+                    if action == "HOLD":
+                        # Penalty for holding losing positions (cut losses)
+                        enhanced_reward -= abs(pnl_factor) * 0.8
+                    elif action in ["BUY", "SELL"]:
+                        # Reward for taking action to close losing positions
+                        enhanced_reward += abs(pnl_factor) * 0.6
+                        
+            # Ensure reward doesn't become extreme
+            enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
+            
+            return enhanced_reward
+            
+        except Exception as e:
+            logger.error(f"Error calculating position-enhanced reward: {e}")
+            return base_reward
        
    def save(self, path):
        """Save model weights and architecture"""
--- a/NN/models/saved/dqn_agent_best_metadata.json
+++ b/NN/models/saved/dqn_agent_best_metadata.json
@@ -1 +0,0 @@
-{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}
--- a/NN/models/saved/hybrid_stats_latest.json
+++ b/NN/models/saved/hybrid_stats_latest.json
@@ -1,20 +0,0 @@
-{
-  "supervised": {
-    "epochs_completed": 22650,
-    "best_val_pnl": 0.0,
-    "best_epoch": 50,
-    "best_win_rate": 0
-  },
-  "reinforcement": {
-    "episodes_completed": 0,
-    "best_reward": -Infinity,
-    "best_episode": 0,
-    "best_win_rate": 0
-  },
-  "hybrid": {
-    "iterations_completed": 453,
-    "best_combined_score": 0.0,
-    "training_started": "2025-04-09T10:30:42.510856",
-    "last_update": "2025-04-09T10:40:02.217840"
-  }
-}
--- a/NN/models/saved/realtime_ticks_training_stats.json
+++ b/NN/models/saved/realtime_ticks_training_stats.json
@@ -1,326 +0,0 @@
-{
-  "epochs_completed": 8,
-  "best_val_pnl": 0.0,
-  "best_epoch": 1,
-  "best_win_rate": 0.0,
-  "training_started": "2025-04-02T10:43:58.946682",
-  "last_update": "2025-04-02T10:44:10.940892",
-  "epochs": [
-    {
-      "epoch": 1,
-      "train_loss": 1.0950355529785156,
-      "val_loss": 1.1657923062642415,
-      "train_acc": 0.3255208333333333,
-      "val_acc": 0.0,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:01.840889",
-      "data_age": 2,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    },
-    {
-      "epoch": 2,
-      "train_loss": 1.0831659038861592,
-      "val_loss": 1.1212460199991863,
-      "train_acc": 0.390625,
-      "val_acc": 0.0,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:03.134833",
-      "data_age": 4,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    },
-    {
-      "epoch": 3,
-      "train_loss": 1.0740693012873332,
-      "val_loss": 1.0992945830027263,
-      "train_acc": 0.4739583333333333,
-      "val_acc": 0.0,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:04.425272",
-      "data_age": 5,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    },
-    {
-      "epoch": 4,
-      "train_loss": 1.0747728943824768,
-      "val_loss": 1.0821794271469116,
-      "train_acc": 0.4609375,
-      "val_acc": 0.3229166666666667,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:05.716421",
-      "data_age": 6,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    },
-    {
-      "epoch": 5,
-      "train_loss": 1.0489931503931682,
-      "val_loss": 1.0669521888097127,
-      "train_acc": 0.5833333333333334,
-      "val_acc": 1.0,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:07.007935",
-      "data_age": 8,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    },
-    {
-      "epoch": 6,
-      "train_loss": 1.0533669590950012,
-      "val_loss": 1.0505590836207073,
-      "train_acc": 0.5104166666666666,
-      "val_acc": 1.0,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:08.296061",
-      "data_age": 9,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    },
-    {
-      "epoch": 7,
-      "train_loss": 1.0456886688868205,
-      "val_loss": 1.0351698795954387,
-      "train_acc": 0.5651041666666666,
-      "val_acc": 1.0,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:09.607584",
-      "data_age": 10,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    },
-    {
-      "epoch": 8,
-      "train_loss": 1.040040671825409,
-      "val_loss": 1.0227736632029216,
-      "train_acc": 0.6119791666666666,
-      "val_acc": 1.0,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 1.0,
-          "SELL": 0.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-04-02T10:44:10.940892",
-      "data_age": 11,
-      "cumulative_pnl": {
-        "train": 0.0,
-        "val": 0.0
-      },
-      "total_trades": {
-        "train": 0,
-        "val": 0
-      },
-      "overall_win_rate": {
-        "train": 0.0,
-        "val": 0.0
-      }
-    }
-  ],
-  "cumulative_pnl": {
-    "train": 0.0,
-    "val": 0.0
-  },
-  "total_trades": {
-    "train": 0,
-    "val": 0
-  },
-  "total_wins": {
-    "train": 0,
-    "val": 0
-  }
-}
--- a/NN/models/saved/realtime_training_stats.json
+++ b/NN/models/saved/realtime_training_stats.json
@@ -1,192 +0,0 @@
-{
-  "epochs_completed": 7,
-  "best_val_pnl": 0.002028853100759435,
-  "best_epoch": 6,
-  "best_win_rate": 0.5157894736842106,
-  "training_started": "2025-03-31T02:50:10.418670",
-  "last_update": "2025-03-31T02:50:15.227593",
-  "epochs": [
-    {
-      "epoch": 1,
-      "train_loss": 1.1206786036491394,
-      "val_loss": 1.0542699098587036,
-      "train_acc": 0.11197916666666667,
-      "val_acc": 0.25,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 0.0,
-          "SELL": 0.0,
-          "HOLD": 1.0
-        },
-        "val": {
-          "BUY": 0.0,
-          "SELL": 0.0,
-          "HOLD": 1.0
-        }
-      },
-      "timestamp": "2025-03-31T02:50:12.881423",
-      "data_age": 2
-    },
-    {
-      "epoch": 2,
-      "train_loss": 1.1266120672225952,
-      "val_loss": 1.072133183479309,
-      "train_acc": 0.1171875,
-      "val_acc": 0.25,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 0.0,
-          "SELL": 0.0,
-          "HOLD": 1.0
-        },
-        "val": {
-          "BUY": 0.0,
-          "SELL": 0.0,
-          "HOLD": 1.0
-        }
-      },
-      "timestamp": "2025-03-31T02:50:13.186840",
-      "data_age": 2
-    },
-    {
-      "epoch": 3,
-      "train_loss": 1.1415620843569438,
-      "val_loss": 1.1701548099517822,
-      "train_acc": 0.1015625,
-      "val_acc": 0.5208333333333334,
-      "train_pnl": 0.0,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.0,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 0.0,
-          "SELL": 0.0,
-          "HOLD": 1.0
-        },
-        "val": {
-          "BUY": 0.0,
-          "SELL": 0.0,
-          "HOLD": 1.0
-        }
-      },
-      "timestamp": "2025-03-31T02:50:13.442018",
-      "data_age": 3
-    },
-    {
-      "epoch": 4,
-      "train_loss": 1.1331567962964375,
-      "val_loss": 1.070081114768982,
-      "train_acc": 0.09375,
-      "val_acc": 0.22916666666666666,
-      "train_pnl": 0.010650217327384765,
-      "val_pnl": -0.0007049481907895126,
-      "train_win_rate": 0.49279538904899134,
-      "val_win_rate": 0.40625,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 0.0,
-          "SELL": 0.9036458333333334,
-          "HOLD": 0.09635416666666667
-        },
-        "val": {
-          "BUY": 0.0,
-          "SELL": 0.3333333333333333,
-          "HOLD": 0.6666666666666666
-        }
-      },
-      "timestamp": "2025-03-31T02:50:13.739899",
-      "data_age": 3
-    },
-    {
-      "epoch": 5,
-      "train_loss": 1.10965762535731,
-      "val_loss": 1.0485950708389282,
-      "train_acc": 0.12239583333333333,
-      "val_acc": 0.17708333333333334,
-      "train_pnl": 0.011924086862580204,
-      "val_pnl": 0.0,
-      "train_win_rate": 0.5070422535211268,
-      "val_win_rate": 0.0,
-      "best_position_size": 0.1,
-      "signal_distribution": {
-        "train": {
-          "BUY": 0.0,
-          "SELL": 0.7395833333333334,
-          "HOLD": 0.2604166666666667
-        },
-        "val": {
-          "BUY": 0.0,
-          "SELL": 0.0,
-          "HOLD": 1.0
-        }
-      },
-      "timestamp": "2025-03-31T02:50:14.073439",
-      "data_age": 3
-    },
-    {
-      "epoch": 6,
-      "train_loss": 1.1272419293721516,
-      "val_loss": 1.084235429763794,
-      "train_acc": 0.1015625,
-      "val_acc": 0.22916666666666666,
-      "train_pnl": 0.014825159601390072,
-      "val_pnl": 0.00405770620151887,
-      "train_win_rate": 0.4908616187989556,
-      "val_win_rate": 0.5157894736842106,
-      "best_position_size": 2.0,
-      "signal_distribution": {
-        "train": {
-          "BUY": 0.0,
-          "SELL": 1.0,
-          "HOLD": 0.0
-        },
-        "val": {
-          "BUY": 0.0,
-          "SELL": 1.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-03-31T02:50:14.658295",
-      "data_age": 4
-    },
-    {
-      "epoch": 7,
-      "train_loss": 1.1171108484268188,
-      "val_loss": 1.0741244554519653,
-      "train_acc": 0.1171875,
-      "val_acc": 0.22916666666666666,
-      "train_pnl": 0.0059474696523706605,
-      "val_pnl": 0.00405770620151887,
-      "train_win_rate": 0.4838709677419355,
-      "val_win_rate": 0.5157894736842106,
-      "best_position_size": 2.0,
-      "signal_distribution": {
-        "train": {
-          "BUY": 0.0,
-          "SELL": 0.7291666666666666,
-          "HOLD": 0.2708333333333333
-        },
-        "val": {
-          "BUY": 0.0,
-          "SELL": 1.0,
-          "HOLD": 0.0
-        }
-      },
-      "timestamp": "2025-03-31T02:50:15.227593",
-      "data_age": 4
-    }
-  ]
-}
--- a/NN/models/standardized_cnn.py
+++ b/NN/models/standardized_cnn.py
@@ -0,0 +1,512 @@
+"""
+Standardized CNN Model for Multi-Modal Trading System
+
+This module extends the existing EnhancedCNN to work with standardized BaseDataInput format
+and provides ModelOutput for cross-model feeding.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import logging
+from datetime import datetime
+from typing import Dict, List, Optional, Any, Tuple
+import sys
+import os
+
+# Add the project root to the path to import core modules
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from core.data_models import BaseDataInput, ModelOutput, create_model_output
+from .enhanced_cnn import EnhancedCNN, SelfAttention, ResidualBlock
+
+logger = logging.getLogger(__name__)
+
+class StandardizedCNN(nn.Module):
+    """
+    Standardized CNN Model that accepts BaseDataInput and outputs ModelOutput
+    
+    Features:
+    - Accepts standardized BaseDataInput format
+    - Processes COB+OHLCV data: 300 frames (1s,1m,1h,1d) ETH + 300s 1s BTC
+    - Includes COB ±20 buckets and MA (1s,5s,15s,60s) of COB imbalance ±5 buckets
+    - Outputs BUY/SELL trading action with confidence scores
+    - Provides hidden states for cross-model feeding
+    - Integrates with checkpoint management system
+    """
+    
+    def __init__(self, model_name: str = "standardized_cnn_v1", confidence_threshold: float = 0.6):
+        """
+        Initialize the standardized CNN model
+        
+        Args:
+            model_name: Name identifier for this model instance
+            confidence_threshold: Minimum confidence threshold for predictions
+        """
+        super(StandardizedCNN, self).__init__()
+        
+        self.model_name = model_name
+        self.model_type = "cnn"
+        self.confidence_threshold = confidence_threshold
+        
+        # Calculate expected input dimensions from BaseDataInput
+        self.expected_feature_dim = self._calculate_expected_features()
+        
+        # Initialize the underlying enhanced CNN with calculated dimensions
+        self.enhanced_cnn = EnhancedCNN(
+            input_shape=self.expected_feature_dim,
+            n_actions=3,  # BUY, SELL, HOLD
+            confidence_threshold=confidence_threshold
+        )
+        
+        # Additional layers for processing BaseDataInput structure
+        self.input_processor = self._build_input_processor()
+        
+        # Output processing layers
+        self.output_processor = self._build_output_processor()
+        
+        # Optional numeric return head (predicts percent change for 1s,1m,1h,1d)
+        # Uses cnn_features (1024) to regress predicted returns per timeframe
+        self.return_head = nn.Sequential(
+            nn.Linear(1024, 256),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(256, 4)  # [return_1s, return_1m, return_1h, return_1d]
+        )
+        
+        # Device management
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.to(self.device)
+        try:
+            import torch.backends.cudnn as cudnn
+            cudnn.benchmark = True
+        except Exception:
+            pass
+        
+        logger.info(f"StandardizedCNN '{model_name}' initialized")
+        logger.info(f"Expected feature dimension: {self.expected_feature_dim}")
+        logger.info(f"Device: {self.device}")
+    
+    def _calculate_expected_features(self) -> int:
+        """
+        Calculate expected feature dimension from BaseDataInput structure
+        
+        Based on actual BaseDataInput.get_feature_vector():
+        - OHLCV ETH: 300 frames x 4 timeframes x 5 features = 6000
+        - OHLCV BTC: 300 frames x 5 features = 1500
+        - COB features: ~184 features (actual from implementation)
+        - Technical indicators: 100 features (padded)
+        - Last predictions: 50 features (padded)
+        Total: ~7834 features (actual measured)
+        """
+        return 7834  # Based on actual BaseDataInput.get_feature_vector() measurement
+    
+    def _build_input_processor(self) -> nn.Module:
+        """
+        Build input processing layers for BaseDataInput
+        
+        Returns:
+            nn.Module: Input processing layers
+        """
+        return nn.Sequential(
+            # Initial processing of raw BaseDataInput features
+            nn.Linear(self.expected_feature_dim, 4096),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.BatchNorm1d(4096),
+            
+            # Feature refinement
+            nn.Linear(4096, 2048),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.BatchNorm1d(2048),
+            
+            # Final feature extraction
+            nn.Linear(2048, 1024),
+            nn.ReLU(),
+            nn.Dropout(0.1)
+        )
+    
+    def _build_output_processor(self) -> nn.Module:
+        """
+        Build output processing layers for standardized ModelOutput
+        
+        Returns:
+            nn.Module: Output processing layers
+        """
+        return nn.Sequential(
+            # Process CNN outputs for standardized format
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            
+            # Final action prediction
+            nn.Linear(512, 3),  # BUY, SELL, HOLD
+            nn.Softmax(dim=1)
+        )
+    
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
+        """
+        Forward pass through the standardized CNN
+        
+        Args:
+            x: Input tensor from BaseDataInput.get_feature_vector()
+        
+        Returns:
+            Tuple of (action_probabilities, hidden_states_dict)
+        """
+        batch_size = x.size(0)
+        
+        # Validate input dimensions
+        if x.size(1) != self.expected_feature_dim:
+            logger.warning(f"Input dimension mismatch: expected {self.expected_feature_dim}, got {x.size(1)}")
+            # Pad or truncate as needed
+            if x.size(1) < self.expected_feature_dim:
+                padding = torch.zeros(batch_size, self.expected_feature_dim - x.size(1), device=x.device)
+                x = torch.cat([x, padding], dim=1)
+            else:
+                x = x[:, :self.expected_feature_dim]
+        
+        # Process input through input processor
+        processed_features = self.input_processor(x)  # [batch, 1024]
+        
+        # Get enhanced CNN predictions (using processed features as input)
+        # We need to reshape for the enhanced CNN which expects different input format
+        cnn_input = processed_features.unsqueeze(1)  # Add sequence dimension
+        
+        try:
+            q_values, extrema_pred, price_pred, cnn_features, advanced_pred = self.enhanced_cnn(cnn_input)
+        except Exception as e:
+            logger.warning(f"Enhanced CNN forward pass failed: {e}, using fallback")
+            # Fallback to direct processing
+            cnn_features = processed_features
+            q_values = torch.zeros(batch_size, 3, device=x.device)
+            extrema_pred = torch.zeros(batch_size, 3, device=x.device)
+            price_pred = torch.zeros(batch_size, 3, device=x.device)
+            advanced_pred = torch.zeros(batch_size, 5, device=x.device)
+        
+        # Process outputs for standardized format
+        action_probs = self.output_processor(cnn_features)  # [batch, 3]
+        
+        # Predict numeric returns per timeframe from cnn_features
+        predicted_returns = self.return_head(cnn_features)  # [batch, 4]
+        
+        # Prepare hidden states for cross-model feeding
+        hidden_states = {
+            'processed_features': processed_features.detach(),
+            'cnn_features': cnn_features.detach(),
+            'q_values': q_values.detach(),
+            'extrema_predictions': extrema_pred.detach(),
+            'price_predictions': price_pred.detach(),
+            'advanced_predictions': advanced_pred.detach(),
+            'attention_weights': torch.ones(batch_size, 1, device=x.device)  # Placeholder
+        }
+        
+        return action_probs, hidden_states, predicted_returns.detach()
+    
+    def predict_from_base_input(self, base_input: BaseDataInput) -> ModelOutput:
+        """
+        Make prediction from BaseDataInput and return standardized ModelOutput
+        
+        Args:
+            base_input: Standardized input data
+        
+        Returns:
+            ModelOutput: Standardized model output
+        """
+        try:
+            # Convert BaseDataInput to feature vector
+            feature_vector = base_input.get_feature_vector()
+            
+            # Convert to tensor and add batch dimension
+            input_tensor = torch.tensor(feature_vector, dtype=torch.float32, device=self.device).unsqueeze(0)
+            
+            # Set model to evaluation mode
+            self.eval()
+            
+            with torch.no_grad():
+                # Forward pass
+                action_probs, hidden_states, predicted_returns = self.forward(input_tensor)
+                
+                # Get action and confidence
+                action_probs_np = action_probs.squeeze(0).cpu().numpy()
+                action_idx = np.argmax(action_probs_np)
+                confidence = float(action_probs_np[action_idx])
+                
+                # Map action index to action name
+                action_names = ['BUY', 'SELL', 'HOLD']
+                action = action_names[action_idx]
+                
+                # Prepare predictions dictionary
+                predictions = {
+                    'action': action,
+                    'buy_probability': float(action_probs_np[0]),
+                    'sell_probability': float(action_probs_np[1]),
+                    'hold_probability': float(action_probs_np[2]),
+                    'action_probabilities': action_probs_np.tolist(),
+                    'extrema_detected': self._interpret_extrema(hidden_states.get('extrema_predictions')),
+                    'price_direction': self._interpret_price_direction(hidden_states.get('price_predictions')),
+                    'market_conditions': self._interpret_advanced_predictions(hidden_states.get('advanced_predictions'))
+                }
+                
+                # Add numeric predicted returns per timeframe if available
+                try:
+                    pr = predicted_returns.squeeze(0).cpu().numpy().tolist()
+                    # Ensure length 4; if not, safely handle
+                    if isinstance(pr, list) and len(pr) >= 4:
+                        predictions['predicted_returns'] = pr[:4]
+                        predictions['predicted_return_1s'] = float(pr[0])
+                        predictions['predicted_return_1m'] = float(pr[1])
+                        predictions['predicted_return_1h'] = float(pr[2])
+                        predictions['predicted_return_1d'] = float(pr[3])
+                except Exception:
+                    pass
+                
+                # Prepare hidden states for cross-model feeding (convert tensors to numpy)
+                cross_model_states = {}
+                for key, tensor in hidden_states.items():
+                    if isinstance(tensor, torch.Tensor):
+                        cross_model_states[key] = tensor.squeeze(0).cpu().numpy().tolist()
+                    else:
+                        cross_model_states[key] = tensor
+                
+                # Create metadata
+                metadata = {
+                    'model_version': '1.0',
+                    'confidence_threshold': self.confidence_threshold,
+                    'feature_dimension': self.expected_feature_dim,
+                    'processing_time_ms': 0,  # Could add timing if needed
+                    'input_validation': base_input.validate()
+                }
+                
+                # Create standardized ModelOutput
+                model_output = ModelOutput(
+                    model_type=self.model_type,
+                    model_name=self.model_name,
+                    symbol=base_input.symbol,
+                    timestamp=datetime.now(),
+                    confidence=confidence,
+                    predictions=predictions,
+                    hidden_states=cross_model_states,
+                    metadata=metadata
+                )
+                
+                return model_output
+                
+        except Exception as e:
+            logger.error(f"Error in CNN prediction: {e}")
+            # Return default output
+            return self._create_default_output(base_input.symbol)
+    
+    def _interpret_extrema(self, extrema_tensor: Optional[torch.Tensor]) -> str:
+        """Interpret extrema predictions"""
+        if extrema_tensor is None:
+            return "unknown"
+        
+        try:
+            extrema_probs = torch.softmax(extrema_tensor.squeeze(0), dim=0)
+            extrema_idx = torch.argmax(extrema_probs).item()
+            extrema_labels = ['bottom', 'top', 'neither']
+            return extrema_labels[extrema_idx]
+        except:
+            return "unknown"
+    
+    def _interpret_price_direction(self, price_tensor: Optional[torch.Tensor]) -> str:
+        """Interpret price direction predictions"""
+        if price_tensor is None:
+            return "unknown"
+        
+        try:
+            price_probs = torch.softmax(price_tensor.squeeze(0), dim=0)
+            price_idx = torch.argmax(price_probs).item()
+            price_labels = ['up', 'down', 'sideways']
+            return price_labels[price_idx]
+        except:
+            return "unknown"
+    
+    def _interpret_advanced_predictions(self, advanced_tensor: Optional[torch.Tensor]) -> Dict[str, str]:
+        """Interpret advanced market predictions"""
+        if advanced_tensor is None:
+            return {"volatility": "unknown", "risk": "unknown"}
+        
+        try:
+            # Assuming advanced predictions include volatility (5 classes)
+            if advanced_tensor.size(-1) >= 5:
+                volatility_probs = torch.softmax(advanced_tensor.squeeze(0)[:5], dim=0)
+                volatility_idx = torch.argmax(volatility_probs).item()
+                volatility_labels = ['very_low', 'low', 'medium', 'high', 'very_high']
+                volatility = volatility_labels[volatility_idx]
+            else:
+                volatility = "unknown"
+            
+            return {
+                "volatility": volatility,
+                "risk": "medium"  # Placeholder
+            }
+        except:
+            return {"volatility": "unknown", "risk": "unknown"}
+    
+    def _create_default_output(self, symbol: str) -> ModelOutput:
+        """Create default ModelOutput for error cases"""
+        return create_model_output(
+            model_type=self.model_type,
+            model_name=self.model_name,
+            symbol=symbol,
+            action='HOLD',
+            confidence=0.5,
+            metadata={'error': True, 'default_output': True}
+        )
+    
+    def train_step(self, base_inputs: List[BaseDataInput], targets: List[str], 
+                   optimizer: torch.optim.Optimizer) -> float:
+        """
+        Perform a single training step
+        
+        Args:
+            base_inputs: List of BaseDataInput for training
+            targets: List of target actions ('BUY', 'SELL', 'HOLD')
+            optimizer: PyTorch optimizer
+        
+        Returns:
+            float: Training loss
+        """
+        self.train()
+        
+        try:
+            # Convert inputs to tensors
+            feature_vectors = []
+            for base_input in base_inputs:
+                feature_vector = base_input.get_feature_vector()
+                feature_vectors.append(feature_vector)
+            
+            input_tensor = torch.tensor(np.array(feature_vectors), dtype=torch.float32, device=self.device)
+            
+            # Convert targets to tensor
+            action_to_idx = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
+            target_indices = [action_to_idx.get(target, 2) for target in targets]
+            target_tensor = torch.tensor(target_indices, dtype=torch.long, device=self.device)
+            
+            # Forward pass
+            action_probs, _ = self.forward(input_tensor)
+            
+            # Calculate loss
+            loss = F.cross_entropy(action_probs, target_tensor)
+            
+            # Backward pass
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            
+            return float(loss.item())
+            
+        except Exception as e:
+            logger.error(f"Error in training step: {e}")
+            return float('inf')
+    
+    def evaluate(self, base_inputs: List[BaseDataInput], targets: List[str]) -> Dict[str, float]:
+        """
+        Evaluate model performance
+        
+        Args:
+            base_inputs: List of BaseDataInput for evaluation
+            targets: List of target actions
+        
+        Returns:
+            Dict containing evaluation metrics
+        """
+        self.eval()
+        
+        try:
+            correct = 0
+            total = len(base_inputs)
+            total_confidence = 0.0
+            
+            with torch.no_grad():
+                for base_input, target in zip(base_inputs, targets):
+                    model_output = self.predict_from_base_input(base_input)
+                    predicted_action = model_output.predictions['action']
+                    
+                    if predicted_action == target:
+                        correct += 1
+                    
+                    total_confidence += model_output.confidence
+            
+            accuracy = correct / total if total > 0 else 0.0
+            avg_confidence = total_confidence / total if total > 0 else 0.0
+            
+            return {
+                'accuracy': accuracy,
+                'avg_confidence': avg_confidence,
+                'correct_predictions': correct,
+                'total_predictions': total
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in evaluation: {e}")
+            return {'accuracy': 0.0, 'avg_confidence': 0.0, 'correct_predictions': 0, 'total_predictions': 0}
+    
+    def save_checkpoint(self, filepath: str, metadata: Optional[Dict[str, Any]] = None):
+        """
+        Save model checkpoint
+        
+        Args:
+            filepath: Path to save checkpoint
+            metadata: Optional metadata to save with checkpoint
+        """
+        try:
+            checkpoint = {
+                'model_state_dict': self.state_dict(),
+                'model_name': self.model_name,
+                'model_type': self.model_type,
+                'confidence_threshold': self.confidence_threshold,
+                'expected_feature_dim': self.expected_feature_dim,
+                'metadata': metadata or {},
+                'timestamp': datetime.now().isoformat()
+            }
+            
+            torch.save(checkpoint, filepath)
+            logger.info(f"Checkpoint saved to {filepath}")
+            
+        except Exception as e:
+            logger.error(f"Error saving checkpoint: {e}")
+    
+    def load_checkpoint(self, filepath: str) -> bool:
+        """
+        Load model checkpoint
+        
+        Args:
+            filepath: Path to checkpoint file
+        
+        Returns:
+            bool: True if loaded successfully, False otherwise
+        """
+        try:
+            checkpoint = torch.load(filepath, map_location=self.device)
+            
+            # Load model state
+            self.load_state_dict(checkpoint['model_state_dict'])
+            
+            # Load configuration
+            self.model_name = checkpoint.get('model_name', self.model_name)
+            self.confidence_threshold = checkpoint.get('confidence_threshold', self.confidence_threshold)
+            self.expected_feature_dim = checkpoint.get('expected_feature_dim', self.expected_feature_dim)
+            
+            logger.info(f"Checkpoint loaded from {filepath}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error loading checkpoint: {e}")
+            return False
+    
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get model information"""
+        return {
+            'model_name': self.model_name,
+            'model_type': self.model_type,
+            'confidence_threshold': self.confidence_threshold,
+            'expected_feature_dim': self.expected_feature_dim,
+            'device': str(self.device),
+            'parameter_count': sum(p.numel() for p in self.parameters()),
+            'trainable_parameters': sum(p.numel() for p in self.parameters() if p.requires_grad)
+        }
--- a/NN/models/transformer_model.py
+++ b/NN/models/transformer_model.py
@@ -1,821 +0,0 @@
-"""
-Transformer Neural Network for timeseries analysis
-
-This module implements a Transformer model with attention mechanisms for cryptocurrency price analysis.
-It also includes a Mixture of Experts model that combines predictions from multiple models.
-"""
-
-import os
-import logging
-import numpy as np
-import matplotlib.pyplot as plt
-import tensorflow as tf
-from tensorflow.keras.models import Model, load_model
-from tensorflow.keras.layers import (
-    Input, Dense, Dropout, BatchNormalization, 
-    Concatenate, Layer, LayerNormalization, MultiHeadAttention,
-    Add, GlobalAveragePooling1D, Conv1D, Reshape
-)
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
-import datetime
-import json
-
-logger = logging.getLogger(__name__)
-
-class TransformerBlock(Layer):
-    """
-    Transformer block implementation with multi-head attention and feed-forward networks.
-    """
-    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
-        super(TransformerBlock, self).__init__()
-        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
-        self.ffn = tf.keras.Sequential([
-            Dense(ff_dim, activation="relu"),
-            Dense(embed_dim),
-        ])
-        self.layernorm1 = LayerNormalization(epsilon=1e-6)
-        self.layernorm2 = LayerNormalization(epsilon=1e-6)
-        self.dropout1 = Dropout(rate)
-        self.dropout2 = Dropout(rate)
-        
-    def call(self, inputs, training=False):
-        attn_output = self.att(inputs, inputs)
-        attn_output = self.dropout1(attn_output, training=training)
-        out1 = self.layernorm1(inputs + attn_output)
-        ffn_output = self.ffn(out1)
-        ffn_output = self.dropout2(ffn_output, training=training)
-        return self.layernorm2(out1 + ffn_output)
-    
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'att': self.att,
-            'ffn': self.ffn,
-            'layernorm1': self.layernorm1,
-            'layernorm2': self.layernorm2,
-            'dropout1': self.dropout1,
-            'dropout2': self.dropout2
-        })
-        return config
-
-class PositionalEncoding(Layer):
-    """
-    Positional encoding layer to add position information to input embeddings.
-    """
-    def __init__(self, position, d_model):
-        super(PositionalEncoding, self).__init__()
-        self.position = position
-        self.d_model = d_model
-        self.pos_encoding = self.positional_encoding(position, d_model)
-        
-    def get_angles(self, position, i, d_model):
-        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
-        return position * angles
-    
-    def positional_encoding(self, position, d_model):
-        angle_rads = self.get_angles(
-            position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
-            i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
-            d_model=d_model
-        )
-        
-        # Apply sin to even indices in the array
-        sines = tf.math.sin(angle_rads[:, 0::2])
-        
-        # Apply cos to odd indices in the array
-        cosines = tf.math.cos(angle_rads[:, 1::2])
-        
-        pos_encoding = tf.concat([sines, cosines], axis=-1)
-        pos_encoding = pos_encoding[tf.newaxis, ...]
-        
-        return tf.cast(pos_encoding, tf.float32)
-    
-    def call(self, inputs):
-        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
-    
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'position': self.position,
-            'd_model': self.d_model,
-            'pos_encoding': self.pos_encoding
-        })
-        return config
-
-class TransformerModel:
-    """
-    Transformer Neural Network for time series analysis.
-    
-    This model uses self-attention mechanisms to capture relationships between
-    different time points in the input data.
-    """
-    
-    def __init__(self, ts_input_shape=(20, 5), feature_input_shape=64, output_size=1, model_dir="NN/models/saved"):
-        """
-        Initialize the Transformer model.
-        
-        Args:
-            ts_input_shape (tuple): Shape of time series input data (sequence_length, features)
-            feature_input_shape (int): Shape of additional feature input (e.g., from CNN)
-            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
-            model_dir (str): Directory to save trained models
-        """
-        self.ts_input_shape = ts_input_shape
-        self.feature_input_shape = feature_input_shape
-        self.output_size = output_size
-        self.model_dir = model_dir
-        self.model = None
-        self.history = None
-        
-        # Create model directory if it doesn't exist
-        os.makedirs(self.model_dir, exist_ok=True)
-        
-        logger.info(f"Initialized Transformer model with TS input shape {ts_input_shape}, "
-                   f"feature input shape {feature_input_shape}, and output size {output_size}")
-    
-    def build_model(self, embed_dim=32, num_heads=4, ff_dim=64, num_transformer_blocks=2, dropout_rate=0.1, learning_rate=0.001):
-        """
-        Build the Transformer model architecture.
-        
-        Args:
-            embed_dim (int): Embedding dimension for transformer
-            num_heads (int): Number of attention heads
-            ff_dim (int): Hidden dimension of the feed forward network
-            num_transformer_blocks (int): Number of transformer blocks
-            dropout_rate (float): Dropout rate for regularization
-            learning_rate (float): Learning rate for Adam optimizer
-            
-        Returns:
-            The compiled model
-        """
-        # Time series input
-        ts_inputs = Input(shape=self.ts_input_shape, name="ts_input")
-        
-        # Additional feature input (e.g., from CNN)
-        feature_inputs = Input(shape=(self.feature_input_shape,), name="feature_input")
-        
-        # Process time series with transformer
-        # First, project the input to the embedding dimension
-        x = Conv1D(embed_dim, 1, activation="relu")(ts_inputs)
-        
-        # Add positional encoding
-        x = PositionalEncoding(self.ts_input_shape[0], embed_dim)(x)
-        
-        # Add transformer blocks
-        for _ in range(num_transformer_blocks):
-            x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
-        
-        # Global pooling to get a single vector representation
-        x = GlobalAveragePooling1D()(x)
-        x = Dropout(dropout_rate)(x)
-        
-        # Combine with additional features
-        combined = Concatenate()([x, feature_inputs])
-        
-        # Dense layers for final classification/regression
-        x = Dense(64, activation="relu")(combined)
-        x = BatchNormalization()(x)
-        x = Dropout(dropout_rate)(x)
-        
-        # Output layer
-        if self.output_size == 1:
-            # Binary classification (up/down)
-            outputs = Dense(1, activation='sigmoid', name='output')(x)
-            loss = 'binary_crossentropy'
-            metrics = ['accuracy']
-        elif self.output_size == 3:
-            # Multi-class classification (buy/hold/sell)
-            outputs = Dense(3, activation='softmax', name='output')(x)
-            loss = 'categorical_crossentropy'
-            metrics = ['accuracy']
-        else:
-            # Regression
-            outputs = Dense(self.output_size, activation='linear', name='output')(x)
-            loss = 'mse'
-            metrics = ['mae']
-        
-        # Create and compile model
-        self.model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
-        
-        # Compile with Adam optimizer
-        self.model.compile(
-            optimizer=Adam(learning_rate=learning_rate),
-            loss=loss,
-            metrics=metrics
-        )
-        
-        # Log model summary
-        self.model.summary(print_fn=lambda x: logger.info(x))
-        
-        return self.model
-    
-    def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
-             callbacks=None, class_weights=None):
-        """
-        Train the Transformer model on the provided data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            y (numpy.ndarray): Target labels
-            batch_size (int): Batch size
-            epochs (int): Number of epochs
-            validation_split (float): Fraction of data to use for validation
-            callbacks (list): List of Keras callbacks
-            class_weights (dict): Class weights for imbalanced datasets
-            
-        Returns:
-            History object containing training metrics
-        """
-        if self.model is None:
-            self.build_model()
-        
-        # Default callbacks if none provided
-        if callbacks is None:
-            # Create a timestamp for model checkpoints
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            
-            callbacks = [
-                EarlyStopping(
-                    monitor='val_loss',
-                    patience=10,
-                    restore_best_weights=True
-                ),
-                ReduceLROnPlateau(
-                    monitor='val_loss',
-                    factor=0.5,
-                    patience=5,
-                    min_lr=1e-6
-                ),
-                ModelCheckpoint(
-                    filepath=os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5"),
-                    monitor='val_loss',
-                    save_best_only=True
-                )
-            ]
-        
-        # Check if y needs to be one-hot encoded for multi-class
-        if self.output_size == 3 and len(y.shape) == 1:
-            y = tf.keras.utils.to_categorical(y, num_classes=3)
-        
-        # Train the model
-        logger.info(f"Training Transformer model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
-        self.history = self.model.fit(
-            [X_ts, X_features], y,
-            batch_size=batch_size,
-            epochs=epochs,
-            validation_split=validation_split,
-            callbacks=callbacks,
-            class_weight=class_weights,
-            verbose=2
-        )
-        
-        # Save the trained model
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        model_path = os.path.join(self.model_dir, f"transformer_model_final_{timestamp}.h5")
-        self.model.save(model_path)
-        logger.info(f"Model saved to {model_path}")
-        
-        # Save training history
-        history_path = os.path.join(self.model_dir, f"transformer_model_history_{timestamp}.json")
-        with open(history_path, 'w') as f:
-            # Convert numpy values to Python native types for JSON serialization
-            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
-            json.dump(history_dict, f, indent=2)
-        
-        return self.history
-    
-    def evaluate(self, X_ts, X_features, y):
-        """
-        Evaluate the model on test data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            y (numpy.ndarray): Target labels
-            
-        Returns:
-            dict: Evaluation metrics
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built or trained yet")
-        
-        # Convert y to one-hot encoding for multi-class
-        if self.output_size == 3 and len(y.shape) == 1:
-            y = tf.keras.utils.to_categorical(y, num_classes=3)
-        
-        # Evaluate model
-        logger.info(f"Evaluating Transformer model on {len(X_ts)} samples")
-        eval_results = self.model.evaluate([X_ts, X_features], y, verbose=0)
-        
-        metrics = {}
-        for metric, value in zip(self.model.metrics_names, eval_results):
-            metrics[metric] = value
-            logger.info(f"{metric}: {value:.4f}")
-        
-        return metrics
-    
-    def predict(self, X_ts, X_features=None):
-        """
-        Make predictions on new data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            
-        Returns:
-            tuple: (y_pred, y_proba) where:
-                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
-                y_proba is the class probability
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built or trained yet")
-        
-        # Ensure X_ts has the right shape
-        if len(X_ts.shape) == 2:
-            # Single sample, add batch dimension
-            X_ts = np.expand_dims(X_ts, axis=0)
-        
-        # Ensure X_features has the right shape
-        if X_features is None:
-            # Extract features from time series data if no external features provided
-            X_features = self._extract_features_from_timeseries(X_ts)
-        elif len(X_features.shape) == 1:
-            # Single sample, add batch dimension
-            X_features = np.expand_dims(X_features, axis=0)
-        
-    def _extract_features_from_timeseries(self, X_ts: np.ndarray) -> np.ndarray:
-        """Extract meaningful features from time series data instead of using dummy zeros"""
-        try:
-            batch_size = X_ts.shape[0]
-            features = []
-            
-            for i in range(batch_size):
-                sample = X_ts[i]  # Shape: (timesteps, features)
-                
-                # Extract statistical features from each feature dimension
-                sample_features = []
-                
-                for feature_idx in range(sample.shape[1]):
-                    feature_data = sample[:, feature_idx]
-                    
-                    # Basic statistical features
-                    sample_features.extend([
-                        np.mean(feature_data),      # Mean
-                        np.std(feature_data),       # Standard deviation
-                        np.min(feature_data),       # Minimum
-                        np.max(feature_data),       # Maximum
-                        np.percentile(feature_data, 25),  # 25th percentile
-                        np.percentile(feature_data, 75),  # 75th percentile
-                    ])
-                    
-                    # Trend features
-                    if len(feature_data) > 1:
-                        # Linear trend (slope)
-                        x = np.arange(len(feature_data))
-                        slope = np.polyfit(x, feature_data, 1)[0]
-                        sample_features.append(slope)
-                        
-                        # Rate of change
-                        rate_of_change = (feature_data[-1] - feature_data[0]) / feature_data[0] if feature_data[0] != 0 else 0
-                        sample_features.append(rate_of_change)
-                    else:
-                        sample_features.extend([0.0, 0.0])
-                
-                # Pad or truncate to expected feature size
-                while len(sample_features) < self.feature_input_shape:
-                    sample_features.append(0.0)
-                sample_features = sample_features[:self.feature_input_shape]
-                
-                features.append(sample_features)
-            
-            return np.array(features, dtype=np.float32)
-            
-        except Exception as e:
-            logger.error(f"Error extracting features from time series: {e}")
-            # Fallback to zeros if extraction fails
-            return np.zeros((X_ts.shape[0], self.feature_input_shape), dtype=np.float32)
-
-        # Get predictions
-        y_proba = self.model.predict([X_ts, X_features])
-        
-        # Process based on output type
-        if self.output_size == 1:
-            # Binary classification
-            y_pred = (y_proba > 0.5).astype(int).flatten()
-            return y_pred, y_proba.flatten()
-        elif self.output_size == 3:
-            # Multi-class classification
-            y_pred = np.argmax(y_proba, axis=1)
-            return y_pred, y_proba
-        else:
-            # Regression
-            return y_proba, y_proba
-    
-    def save(self, filepath=None):
-        """
-        Save the model to disk.
-        
-        Args:
-            filepath (str): Path to save the model
-            
-        Returns:
-            str: Path where the model was saved
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built yet")
-        
-        if filepath is None:
-            # Create a default filepath with timestamp
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            filepath = os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5")
-        
-        self.model.save(filepath)
-        logger.info(f"Model saved to {filepath}")
-        return filepath
-    
-    def load(self, filepath):
-        """
-        Load a saved model from disk.
-        
-        Args:
-            filepath (str): Path to the saved model
-            
-        Returns:
-            The loaded model
-        """
-        # Register custom layers
-        custom_objects = {
-            'TransformerBlock': TransformerBlock,
-            'PositionalEncoding': PositionalEncoding
-        }
-        
-        self.model = load_model(filepath, custom_objects=custom_objects)
-        logger.info(f"Model loaded from {filepath}")
-        return self.model
-    
-    def plot_training_history(self):
-        """
-        Plot training history (loss and metrics).
-        
-        Returns:
-            str: Path to the saved plot
-        """
-        if self.history is None:
-            raise ValueError("Model has not been trained yet")
-        
-        plt.figure(figsize=(12, 5))
-        
-        # Plot loss
-        plt.subplot(1, 2, 1)
-        plt.plot(self.history.history['loss'], label='Training Loss')
-        if 'val_loss' in self.history.history:
-            plt.plot(self.history.history['val_loss'], label='Validation Loss')
-        plt.title('Model Loss')
-        plt.xlabel('Epoch')
-        plt.ylabel('Loss')
-        plt.legend()
-        
-        # Plot accuracy
-        plt.subplot(1, 2, 2)
-        
-        if 'accuracy' in self.history.history:
-            plt.plot(self.history.history['accuracy'], label='Training Accuracy')
-            if 'val_accuracy' in self.history.history:
-                plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
-            plt.title('Model Accuracy')
-            plt.ylabel('Accuracy')
-        elif 'mae' in self.history.history:
-            plt.plot(self.history.history['mae'], label='Training MAE')
-            if 'val_mae' in self.history.history:
-                plt.plot(self.history.history['val_mae'], label='Validation MAE')
-            plt.title('Model MAE')
-            plt.ylabel('MAE')
-        
-        plt.xlabel('Epoch')
-        plt.legend()
-        
-        plt.tight_layout()
-        
-        # Save figure
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        fig_path = os.path.join(self.model_dir, f"transformer_training_history_{timestamp}.png")
-        plt.savefig(fig_path)
-        plt.close()
-        
-        logger.info(f"Training history plot saved to {fig_path}")
-        return fig_path
-
-
-class MixtureOfExpertsModel:
-    """
-    Mixture of Experts (MoE) model.
-    
-    This model combines predictions from multiple expert models (such as CNN and Transformer)
-    using a weighted ensemble approach.
-    """
-    
-    def __init__(self, output_size=1, model_dir="NN/models/saved"):
-        """
-        Initialize the MoE model.
-        
-        Args:
-            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
-            model_dir (str): Directory to save trained models
-        """
-        self.output_size = output_size
-        self.model_dir = model_dir
-        self.model = None
-        self.history = None
-        self.experts = {}
-        
-        # Create model directory if it doesn't exist
-        os.makedirs(self.model_dir, exist_ok=True)
-        
-        logger.info(f"Initialized Mixture of Experts model with output size {output_size}")
-    
-    def add_expert(self, name, model):
-        """
-        Add an expert model to the MoE.
-        
-        Args:
-            name (str): Name of the expert model
-            model: The expert model instance
-            
-        Returns:
-            None
-        """
-        self.experts[name] = model
-        logger.info(f"Added expert model '{name}' to MoE")
-    
-    def build_model(self, ts_input_shape=(20, 5), expert_weights=None, learning_rate=0.001):
-        """
-        Build the MoE model by combining expert models.
-        
-        Args:
-            ts_input_shape (tuple): Shape of time series input data
-            expert_weights (dict): Weights for each expert model
-            learning_rate (float): Learning rate for Adam optimizer
-            
-        Returns:
-            The compiled model
-        """
-        # Time series input
-        ts_inputs = Input(shape=ts_input_shape, name="ts_input")
-        
-        # Additional feature input (from CNN)
-        feature_inputs = Input(shape=(64,), name="feature_input")  # Default size for features
-        
-        # Process with each expert model
-        expert_outputs = []
-        expert_names = []
-        
-        for name, expert in self.experts.items():
-            # Skip if expert model is not valid or doesn't have a call/predict method
-            if expert is None:
-                logger.warning(f"Expert model '{name}' is None, skipping")
-                continue
-                
-            try:
-                # Different handling based on model type
-                if name == 'cnn':
-                    # CNN model takes only time series input
-                    expert_output = expert(ts_inputs)
-                    expert_outputs.append(expert_output)
-                    expert_names.append(name)
-                elif name == 'transformer':
-                    # Transformer model takes both time series and feature inputs
-                    expert_output = expert([ts_inputs, feature_inputs])
-                    expert_outputs.append(expert_output)
-                    expert_names.append(name)
-                else:
-                    logger.warning(f"Unknown expert model type: {name}")
-            except Exception as e:
-                logger.error(f"Error adding expert '{name}': {str(e)}")
-        
-        if not expert_outputs:
-            logger.error("No valid expert models found")
-            return None
-        
-        # Use expert weighting
-        if expert_weights is None:
-            # Equal weighting
-            weights = [1.0 / len(expert_outputs)] * len(expert_outputs)
-        else:
-            # User-provided weights
-            weights = [expert_weights.get(name, 1.0 / len(expert_outputs)) for name in expert_names]
-            # Normalize weights
-            weights = [w / sum(weights) for w in weights]
-        
-        # Combine expert outputs using weighted average
-        if len(expert_outputs) == 1:
-            # Only one expert, use its output directly
-            combined_output = expert_outputs[0]
-        else:
-            # Multiple experts, compute weighted average
-            weighted_outputs = [output * weight for output, weight in zip(expert_outputs, weights)]
-            combined_output = Add()(weighted_outputs)
-        
-        # Create the MoE model
-        moe_model = Model(inputs=[ts_inputs, feature_inputs], outputs=combined_output)
-        
-        # Compile the model
-        if self.output_size == 1:
-            # Binary classification
-            moe_model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='binary_crossentropy',
-                metrics=['accuracy']
-            )
-        elif self.output_size == 3:
-            # Multi-class classification for BUY/HOLD/SELL
-            moe_model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='categorical_crossentropy',
-                metrics=['accuracy']
-            )
-        else:
-            # Regression
-            moe_model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='mse',
-                metrics=['mae']
-            )
-        
-        self.model = moe_model
-        
-        # Log model summary
-        self.model.summary(print_fn=lambda x: logger.info(x))
-        
-        logger.info(f"Built MoE model with weights: {weights}")
-        return self.model
-    
-    def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
-             callbacks=None, class_weights=None):
-        """
-        Train the MoE model on the provided data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            y (numpy.ndarray): Target labels
-            batch_size (int): Batch size
-            epochs (int): Number of epochs
-            validation_split (float): Fraction of data to use for validation
-            callbacks (list): List of Keras callbacks
-            class_weights (dict): Class weights for imbalanced datasets
-            
-        Returns:
-            History object containing training metrics
-        """
-        if self.model is None:
-            logger.error("MoE model has not been built yet")
-            return None
-        
-        # Default callbacks if none provided
-        if callbacks is None:
-            # Create a timestamp for model checkpoints
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            
-            callbacks = [
-                EarlyStopping(
-                    monitor='val_loss',
-                    patience=10,
-                    restore_best_weights=True
-                ),
-                ReduceLROnPlateau(
-                    monitor='val_loss',
-                    factor=0.5,
-                    patience=5,
-                    min_lr=1e-6
-                ),
-                ModelCheckpoint(
-                    filepath=os.path.join(self.model_dir, f"moe_model_{timestamp}.h5"),
-                    monitor='val_loss',
-                    save_best_only=True
-                )
-            ]
-        
-        # Check if y needs to be one-hot encoded for multi-class
-        if self.output_size == 3 and len(y.shape) == 1:
-            y = tf.keras.utils.to_categorical(y, num_classes=3)
-        
-        # Train the model
-        logger.info(f"Training MoE model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
-        self.history = self.model.fit(
-            [X_ts, X_features], y,
-            batch_size=batch_size,
-            epochs=epochs,
-            validation_split=validation_split,
-            callbacks=callbacks,
-            class_weight=class_weights,
-            verbose=2
-        )
-        
-        # Save the trained model
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        model_path = os.path.join(self.model_dir, f"moe_model_final_{timestamp}.h5")
-        self.model.save(model_path)
-        logger.info(f"Model saved to {model_path}")
-        
-        # Save training history
-        history_path = os.path.join(self.model_dir, f"moe_model_history_{timestamp}.json")
-        with open(history_path, 'w') as f:
-            # Convert numpy values to Python native types for JSON serialization
-            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
-            json.dump(history_dict, f, indent=2)
-        
-        return self.history
-    
-    def predict(self, X_ts, X_features=None):
-        """
-        Make predictions on new data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            
-        Returns:
-            tuple: (y_pred, y_proba) where:
-                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
-                y_proba is the class probability
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built or trained yet")
-        
-        # Ensure X_ts has the right shape
-        if len(X_ts.shape) == 2:
-            # Single sample, add batch dimension
-            X_ts = np.expand_dims(X_ts, axis=0)
-        
-        # Ensure X_features has the right shape
-        if X_features is None:
-            # Create dummy features with zeros
-            X_features = np.zeros((X_ts.shape[0], 64))  # Default size
-        elif len(X_features.shape) == 1:
-            # Single sample, add batch dimension
-            X_features = np.expand_dims(X_features, axis=0)
-        
-        # Get predictions
-        y_proba = self.model.predict([X_ts, X_features])
-        
-        # Process based on output type
-        if self.output_size == 1:
-            # Binary classification
-            y_pred = (y_proba > 0.5).astype(int).flatten()
-            return y_pred, y_proba.flatten()
-        elif self.output_size == 3:
-            # Multi-class classification
-            y_pred = np.argmax(y_proba, axis=1)
-            return y_pred, y_proba
-        else:
-            # Regression
-            return y_proba, y_proba
-    
-    def save(self, filepath=None):
-        """
-        Save the model to disk.
-        
-        Args:
-            filepath (str): Path to save the model
-            
-        Returns:
-            str: Path where the model was saved
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built yet")
-        
-        if filepath is None:
-            # Create a default filepath with timestamp
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            filepath = os.path.join(self.model_dir, f"moe_model_{timestamp}.h5")
-        
-        self.model.save(filepath)
-        logger.info(f"Model saved to {filepath}")
-        return filepath
-    
-    def load(self, filepath):
-        """
-        Load a saved model from disk.
-        
-        Args:
-            filepath (str): Path to the saved model
-            
-        Returns:
-            The loaded model
-        """
-        # Register custom layers
-        custom_objects = {
-            'TransformerBlock': TransformerBlock,
-            'PositionalEncoding': PositionalEncoding
-        }
-        
-        self.model = load_model(filepath, custom_objects=custom_objects)
-        logger.info(f"Model loaded from {filepath}")
-        return self.model
-
-# Example usage:
-if __name__ == "__main__":
-    # This would be a complete implementation in a real system
-    print("Transformer and MoE models defined, but not implemented here.")
				`@@ -1 +0,0 @@`
				`{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}`