BIG CLEANUP

2025-08-08 14:58:55 +03:00
parent e39e9ee95a
commit 2b0d2679c6
162 changed files with 455 additions and 42814 deletions
--- a/NN/models/init.py
+++ b/NN/models/init.py
@@ -1,27 +0,0 @@
-"""
-Neural Network Models
-====================
-
-This package contains the neural network models used in the trading system:
- CNN Model: Deep convolutional neural network for feature extraction  
- DQN Agent: Deep Q-Network for reinforcement learning
- COB RL Model: Specialized RL model for order book data
- Advanced Transformer: High-performance transformer for trading
-
-PyTorch implementation only.
-"""
-
-# Import core models
-from NN.models.dqn_agent import DQNAgent
-from NN.models.cob_rl_model import COBRLModelInterface
-from NN.models.advanced_transformer_trading import AdvancedTradingTransformer, TradingTransformerConfig
-from NN.models.standardized_cnn import StandardizedCNN  # Use the unified CNN model
-
-# Import model interfaces
-from NN.models.model_interfaces import ModelInterface, CNNModelInterface, RLAgentInterface, ExtremaTrainerInterface
-
-# Export the unified StandardizedCNN as CNNModel for compatibility
-CNNModel = StandardizedCNN
-
-__all__ = ['CNNModel', 'StandardizedCNN', 'DQNAgent', 'COBRLModelInterface', 'AdvancedTradingTransformer', 'TradingTransformerConfig',
-'ModelInterface', 'CNNModelInterface', 'RLAgentInterface', 'ExtremaTrainerInterface']
--- a/NN/models/cnn_model.py
+++ b/NN/models/cnn_model.py
@@ -1,201 +0,0 @@
-# """
-# Legacy CNN Model Compatibility Layer
-
-# This module provides compatibility redirects to the unified StandardizedCNN model.
-# All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired
-# in favor of the StandardizedCNN architecture.
-# """
-
-# import logging
-# import warnings
-# from typing import Tuple, Dict, Any, Optional
-# import torch
-# import numpy as np
-
-# # Import the standardized CNN model
-# from .standardized_cnn import StandardizedCNN
-
-# logger = logging.getLogger(__name__)
-
-# # Compatibility aliases and wrappers
-# class EnhancedCNNModel:
-#     """Legacy compatibility wrapper - redirects to StandardizedCNN"""
-    
-#     def __init__(self, *args, **kwargs):
-#         warnings.warn(
-#             "EnhancedCNNModel is deprecated. Use StandardizedCNN instead.",
-#             DeprecationWarning,
-#             stacklevel=2
-#         )
-#         # Create StandardizedCNN with default parameters
-#         self.standardized_cnn = StandardizedCNN()
-#         logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN")
-    
-#     def __getattr__(self, name):
-#         """Delegate all method calls to StandardizedCNN"""
-#         return getattr(self.standardized_cnn, name)
-
-
-# class CNNModelTrainer:
-#     """Legacy compatibility wrapper for CNN training"""
-    
-#     def __init__(self, model=None, *args, **kwargs):
-#         warnings.warn(
-#             "CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.",
-#             DeprecationWarning,
-#             stacklevel=2
-#         )
-#         if isinstance(model, EnhancedCNNModel):
-#             self.model = model.standardized_cnn
-#         else:
-#             self.model = StandardizedCNN()
-#         logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()")
-    
-#     def train_step(self, x, y, *args, **kwargs):
-#         """Legacy train step wrapper"""
-#         try:
-#             # Convert to BaseDataInput format if needed
-#             if hasattr(x, 'get_feature_vector'):
-#                 # Already BaseDataInput
-#                 base_input = x
-#             else:
-#                 # Create mock BaseDataInput for legacy compatibility
-#                 from core.data_models import BaseDataInput
-#                 base_input = BaseDataInput()
-#                 # Set mock feature vector
-#                 if isinstance(x, torch.Tensor):
-#                     feature_vector = x.flatten().cpu().numpy()
-#                 else:
-#                     feature_vector = np.array(x).flatten()
-                
-#                 # Pad or truncate to expected size
-#                 expected_size = self.model.expected_feature_dim
-#                 if len(feature_vector) < expected_size:
-#                     padding = np.zeros(expected_size - len(feature_vector))
-#                     feature_vector = np.concatenate([feature_vector, padding])
-#                 else:
-#                     feature_vector = feature_vector[:expected_size]
-                
-#                 base_input._feature_vector = feature_vector
-            
-#             # Convert target to string format
-#             if isinstance(y, torch.Tensor):
-#                 y_val = y.item() if y.numel() == 1 else y.argmax().item()
-#             else:
-#                 y_val = int(y) if np.isscalar(y) else int(np.argmax(y))
-            
-#             target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'}
-#             target = target_map.get(y_val, 'HOLD')
-            
-#             # Use StandardizedCNN training
-#             optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
-#             loss = self.model.train_step([base_input], [target], optimizer)
-            
-#             return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5}
-            
-#         except Exception as e:
-#             logger.error(f"Legacy train_step error: {e}")
-#             return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5}
-
-
-# # class CNNModel:
-# #     """Legacy compatibility wrapper for CNN model interface"""
-    
-# #     def __init__(self, input_shape=(900, 50), output_size=3, model_path=None):
-# #         warnings.warn(
-# #             "CNNModel is deprecated. Use StandardizedCNN directly.",
-# #             DeprecationWarning,
-# #             stacklevel=2
-# #         )
-# #         self.input_shape = input_shape
-# #         self.output_size = output_size
-# #         self.standardized_cnn = StandardizedCNN()
-# #         self.trainer = CNNModelTrainer(self.standardized_cnn)
-# #         logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN")
-    
-# #     def build_model(self, **kwargs):
-# #         """Legacy build method - no-op for StandardizedCNN"""
-# #         return self
-    
-# #     def predict(self, X):
-# #         """Legacy predict method"""
-# #         try:
-# #             # Convert input to BaseDataInput
-# #             from core.data_models import BaseDataInput
-# #             base_input = BaseDataInput()
-            
-# #             if isinstance(X, np.ndarray):
-# #                 feature_vector = X.flatten()
-# #             else:
-# #                 feature_vector = np.array(X).flatten()
-            
-# #             # Pad or truncate to expected size
-# #             expected_size = self.standardized_cnn.expected_feature_dim
-# #             if len(feature_vector) < expected_size:
-# #                 padding = np.zeros(expected_size - len(feature_vector))
-# #                 feature_vector = np.concatenate([feature_vector, padding])
-# #             else:
-# #                 feature_vector = feature_vector[:expected_size]
-            
-# #             base_input._feature_vector = feature_vector
-            
-# #             # Get prediction from StandardizedCNN
-# #             result = self.standardized_cnn.predict_from_base_input(base_input)
-            
-# #             # Convert to legacy format
-# #             action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
-# #             pred_class = np.array([action_map.get(result.predictions['action'], 2)])
-# #             pred_proba = np.array([result.predictions['action_probabilities']])
-            
-# #             return pred_class, pred_proba
-            
-# #         except Exception as e:
-# #             logger.error(f"Legacy predict error: {e}")
-# #             # Return safe defaults
-# #             pred_class = np.array([2])  # HOLD
-# #             pred_proba = np.array([[0.33, 0.33, 0.34]])
-# #             return pred_class, pred_proba
-    
-# #     def fit(self, X, y, **kwargs):
-# #         """Legacy fit method"""
-# #         try:
-# #             return self.trainer.train_step(X, y)
-# #         except Exception as e:
-# #             logger.error(f"Legacy fit error: {e}")
-# #             return self
-    
-# #     def save(self, filepath: str):
-# #         """Legacy save method"""
-# #         try:
-# #             torch.save(self.standardized_cnn.state_dict(), filepath)
-# #             logger.info(f"StandardizedCNN saved to {filepath}")
-# #         except Exception as e:
-# #             logger.error(f"Error saving model: {e}")
-
-
-# def create_enhanced_cnn_model(input_size: int = 60, 
-#                             feature_dim: int = 50, 
-#                             output_size: int = 3,
-#                             base_channels: int = 256,
-#                             device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]:
-#     """Legacy compatibility function - returns StandardizedCNN"""
-#     warnings.warn(
-#         "create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.",
-#         DeprecationWarning,
-#         stacklevel=2
-#     )
-    
-#     model = StandardizedCNN()
-#     trainer = CNNModelTrainer(model)
-    
-#     logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly")
-#     return model, trainer
-
-
-# # Export compatibility symbols
-# __all__ = [
-#     'EnhancedCNNModel',
-#     'CNNModelTrainer', 
-#     # 'CNNModel',
-#     'create_enhanced_cnn_model'
-# ]
--- a/NN/models/transformer_model.py
+++ b/NN/models/transformer_model.py
@@ -1,821 +0,0 @@
-"""
-Transformer Neural Network for timeseries analysis
-
-This module implements a Transformer model with attention mechanisms for cryptocurrency price analysis.
-It also includes a Mixture of Experts model that combines predictions from multiple models.
-"""
-
-import os
-import logging
-import numpy as np
-import matplotlib.pyplot as plt
-import tensorflow as tf
-from tensorflow.keras.models import Model, load_model
-from tensorflow.keras.layers import (
-    Input, Dense, Dropout, BatchNormalization, 
-    Concatenate, Layer, LayerNormalization, MultiHeadAttention,
-    Add, GlobalAveragePooling1D, Conv1D, Reshape
-)
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
-import datetime
-import json
-
-logger = logging.getLogger(__name__)
-
-class TransformerBlock(Layer):
-    """
-    Transformer block implementation with multi-head attention and feed-forward networks.
-    """
-    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
-        super(TransformerBlock, self).__init__()
-        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
-        self.ffn = tf.keras.Sequential([
-            Dense(ff_dim, activation="relu"),
-            Dense(embed_dim),
-        ])
-        self.layernorm1 = LayerNormalization(epsilon=1e-6)
-        self.layernorm2 = LayerNormalization(epsilon=1e-6)
-        self.dropout1 = Dropout(rate)
-        self.dropout2 = Dropout(rate)
-        
-    def call(self, inputs, training=False):
-        attn_output = self.att(inputs, inputs)
-        attn_output = self.dropout1(attn_output, training=training)
-        out1 = self.layernorm1(inputs + attn_output)
-        ffn_output = self.ffn(out1)
-        ffn_output = self.dropout2(ffn_output, training=training)
-        return self.layernorm2(out1 + ffn_output)
-    
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'att': self.att,
-            'ffn': self.ffn,
-            'layernorm1': self.layernorm1,
-            'layernorm2': self.layernorm2,
-            'dropout1': self.dropout1,
-            'dropout2': self.dropout2
-        })
-        return config
-
-class PositionalEncoding(Layer):
-    """
-    Positional encoding layer to add position information to input embeddings.
-    """
-    def __init__(self, position, d_model):
-        super(PositionalEncoding, self).__init__()
-        self.position = position
-        self.d_model = d_model
-        self.pos_encoding = self.positional_encoding(position, d_model)
-        
-    def get_angles(self, position, i, d_model):
-        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
-        return position * angles
-    
-    def positional_encoding(self, position, d_model):
-        angle_rads = self.get_angles(
-            position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
-            i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
-            d_model=d_model
-        )
-        
-        # Apply sin to even indices in the array
-        sines = tf.math.sin(angle_rads[:, 0::2])
-        
-        # Apply cos to odd indices in the array
-        cosines = tf.math.cos(angle_rads[:, 1::2])
-        
-        pos_encoding = tf.concat([sines, cosines], axis=-1)
-        pos_encoding = pos_encoding[tf.newaxis, ...]
-        
-        return tf.cast(pos_encoding, tf.float32)
-    
-    def call(self, inputs):
-        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
-    
-    def get_config(self):
-        config = super().get_config()
-        config.update({
-            'position': self.position,
-            'd_model': self.d_model,
-            'pos_encoding': self.pos_encoding
-        })
-        return config
-
-class TransformerModel:
-    """
-    Transformer Neural Network for time series analysis.
-    
-    This model uses self-attention mechanisms to capture relationships between
-    different time points in the input data.
-    """
-    
-    def __init__(self, ts_input_shape=(20, 5), feature_input_shape=64, output_size=1, model_dir="NN/models/saved"):
-        """
-        Initialize the Transformer model.
-        
-        Args:
-            ts_input_shape (tuple): Shape of time series input data (sequence_length, features)
-            feature_input_shape (int): Shape of additional feature input (e.g., from CNN)
-            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
-            model_dir (str): Directory to save trained models
-        """
-        self.ts_input_shape = ts_input_shape
-        self.feature_input_shape = feature_input_shape
-        self.output_size = output_size
-        self.model_dir = model_dir
-        self.model = None
-        self.history = None
-        
-        # Create model directory if it doesn't exist
-        os.makedirs(self.model_dir, exist_ok=True)
-        
-        logger.info(f"Initialized Transformer model with TS input shape {ts_input_shape}, "
-                   f"feature input shape {feature_input_shape}, and output size {output_size}")
-    
-    def build_model(self, embed_dim=32, num_heads=4, ff_dim=64, num_transformer_blocks=2, dropout_rate=0.1, learning_rate=0.001):
-        """
-        Build the Transformer model architecture.
-        
-        Args:
-            embed_dim (int): Embedding dimension for transformer
-            num_heads (int): Number of attention heads
-            ff_dim (int): Hidden dimension of the feed forward network
-            num_transformer_blocks (int): Number of transformer blocks
-            dropout_rate (float): Dropout rate for regularization
-            learning_rate (float): Learning rate for Adam optimizer
-            
-        Returns:
-            The compiled model
-        """
-        # Time series input
-        ts_inputs = Input(shape=self.ts_input_shape, name="ts_input")
-        
-        # Additional feature input (e.g., from CNN)
-        feature_inputs = Input(shape=(self.feature_input_shape,), name="feature_input")
-        
-        # Process time series with transformer
-        # First, project the input to the embedding dimension
-        x = Conv1D(embed_dim, 1, activation="relu")(ts_inputs)
-        
-        # Add positional encoding
-        x = PositionalEncoding(self.ts_input_shape[0], embed_dim)(x)
-        
-        # Add transformer blocks
-        for _ in range(num_transformer_blocks):
-            x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
-        
-        # Global pooling to get a single vector representation
-        x = GlobalAveragePooling1D()(x)
-        x = Dropout(dropout_rate)(x)
-        
-        # Combine with additional features
-        combined = Concatenate()([x, feature_inputs])
-        
-        # Dense layers for final classification/regression
-        x = Dense(64, activation="relu")(combined)
-        x = BatchNormalization()(x)
-        x = Dropout(dropout_rate)(x)
-        
-        # Output layer
-        if self.output_size == 1:
-            # Binary classification (up/down)
-            outputs = Dense(1, activation='sigmoid', name='output')(x)
-            loss = 'binary_crossentropy'
-            metrics = ['accuracy']
-        elif self.output_size == 3:
-            # Multi-class classification (buy/hold/sell)
-            outputs = Dense(3, activation='softmax', name='output')(x)
-            loss = 'categorical_crossentropy'
-            metrics = ['accuracy']
-        else:
-            # Regression
-            outputs = Dense(self.output_size, activation='linear', name='output')(x)
-            loss = 'mse'
-            metrics = ['mae']
-        
-        # Create and compile model
-        self.model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
-        
-        # Compile with Adam optimizer
-        self.model.compile(
-            optimizer=Adam(learning_rate=learning_rate),
-            loss=loss,
-            metrics=metrics
-        )
-        
-        # Log model summary
-        self.model.summary(print_fn=lambda x: logger.info(x))
-        
-        return self.model
-    
-    def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
-             callbacks=None, class_weights=None):
-        """
-        Train the Transformer model on the provided data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            y (numpy.ndarray): Target labels
-            batch_size (int): Batch size
-            epochs (int): Number of epochs
-            validation_split (float): Fraction of data to use for validation
-            callbacks (list): List of Keras callbacks
-            class_weights (dict): Class weights for imbalanced datasets
-            
-        Returns:
-            History object containing training metrics
-        """
-        if self.model is None:
-            self.build_model()
-        
-        # Default callbacks if none provided
-        if callbacks is None:
-            # Create a timestamp for model checkpoints
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            
-            callbacks = [
-                EarlyStopping(
-                    monitor='val_loss',
-                    patience=10,
-                    restore_best_weights=True
-                ),
-                ReduceLROnPlateau(
-                    monitor='val_loss',
-                    factor=0.5,
-                    patience=5,
-                    min_lr=1e-6
-                ),
-                ModelCheckpoint(
-                    filepath=os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5"),
-                    monitor='val_loss',
-                    save_best_only=True
-                )
-            ]
-        
-        # Check if y needs to be one-hot encoded for multi-class
-        if self.output_size == 3 and len(y.shape) == 1:
-            y = tf.keras.utils.to_categorical(y, num_classes=3)
-        
-        # Train the model
-        logger.info(f"Training Transformer model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
-        self.history = self.model.fit(
-            [X_ts, X_features], y,
-            batch_size=batch_size,
-            epochs=epochs,
-            validation_split=validation_split,
-            callbacks=callbacks,
-            class_weight=class_weights,
-            verbose=2
-        )
-        
-        # Save the trained model
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        model_path = os.path.join(self.model_dir, f"transformer_model_final_{timestamp}.h5")
-        self.model.save(model_path)
-        logger.info(f"Model saved to {model_path}")
-        
-        # Save training history
-        history_path = os.path.join(self.model_dir, f"transformer_model_history_{timestamp}.json")
-        with open(history_path, 'w') as f:
-            # Convert numpy values to Python native types for JSON serialization
-            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
-            json.dump(history_dict, f, indent=2)
-        
-        return self.history
-    
-    def evaluate(self, X_ts, X_features, y):
-        """
-        Evaluate the model on test data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            y (numpy.ndarray): Target labels
-            
-        Returns:
-            dict: Evaluation metrics
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built or trained yet")
-        
-        # Convert y to one-hot encoding for multi-class
-        if self.output_size == 3 and len(y.shape) == 1:
-            y = tf.keras.utils.to_categorical(y, num_classes=3)
-        
-        # Evaluate model
-        logger.info(f"Evaluating Transformer model on {len(X_ts)} samples")
-        eval_results = self.model.evaluate([X_ts, X_features], y, verbose=0)
-        
-        metrics = {}
-        for metric, value in zip(self.model.metrics_names, eval_results):
-            metrics[metric] = value
-            logger.info(f"{metric}: {value:.4f}")
-        
-        return metrics
-    
-    def predict(self, X_ts, X_features=None):
-        """
-        Make predictions on new data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            
-        Returns:
-            tuple: (y_pred, y_proba) where:
-                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
-                y_proba is the class probability
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built or trained yet")
-        
-        # Ensure X_ts has the right shape
-        if len(X_ts.shape) == 2:
-            # Single sample, add batch dimension
-            X_ts = np.expand_dims(X_ts, axis=0)
-        
-        # Ensure X_features has the right shape
-        if X_features is None:
-            # Extract features from time series data if no external features provided
-            X_features = self._extract_features_from_timeseries(X_ts)
-        elif len(X_features.shape) == 1:
-            # Single sample, add batch dimension
-            X_features = np.expand_dims(X_features, axis=0)
-        
-    def _extract_features_from_timeseries(self, X_ts: np.ndarray) -> np.ndarray:
-        """Extract meaningful features from time series data instead of using dummy zeros"""
-        try:
-            batch_size = X_ts.shape[0]
-            features = []
-            
-            for i in range(batch_size):
-                sample = X_ts[i]  # Shape: (timesteps, features)
-                
-                # Extract statistical features from each feature dimension
-                sample_features = []
-                
-                for feature_idx in range(sample.shape[1]):
-                    feature_data = sample[:, feature_idx]
-                    
-                    # Basic statistical features
-                    sample_features.extend([
-                        np.mean(feature_data),      # Mean
-                        np.std(feature_data),       # Standard deviation
-                        np.min(feature_data),       # Minimum
-                        np.max(feature_data),       # Maximum
-                        np.percentile(feature_data, 25),  # 25th percentile
-                        np.percentile(feature_data, 75),  # 75th percentile
-                    ])
-                    
-                    # Trend features
-                    if len(feature_data) > 1:
-                        # Linear trend (slope)
-                        x = np.arange(len(feature_data))
-                        slope = np.polyfit(x, feature_data, 1)[0]
-                        sample_features.append(slope)
-                        
-                        # Rate of change
-                        rate_of_change = (feature_data[-1] - feature_data[0]) / feature_data[0] if feature_data[0] != 0 else 0
-                        sample_features.append(rate_of_change)
-                    else:
-                        sample_features.extend([0.0, 0.0])
-                
-                # Pad or truncate to expected feature size
-                while len(sample_features) < self.feature_input_shape:
-                    sample_features.append(0.0)
-                sample_features = sample_features[:self.feature_input_shape]
-                
-                features.append(sample_features)
-            
-            return np.array(features, dtype=np.float32)
-            
-        except Exception as e:
-            logger.error(f"Error extracting features from time series: {e}")
-            # Fallback to zeros if extraction fails
-            return np.zeros((X_ts.shape[0], self.feature_input_shape), dtype=np.float32)
-
-        # Get predictions
-        y_proba = self.model.predict([X_ts, X_features])
-        
-        # Process based on output type
-        if self.output_size == 1:
-            # Binary classification
-            y_pred = (y_proba > 0.5).astype(int).flatten()
-            return y_pred, y_proba.flatten()
-        elif self.output_size == 3:
-            # Multi-class classification
-            y_pred = np.argmax(y_proba, axis=1)
-            return y_pred, y_proba
-        else:
-            # Regression
-            return y_proba, y_proba
-    
-    def save(self, filepath=None):
-        """
-        Save the model to disk.
-        
-        Args:
-            filepath (str): Path to save the model
-            
-        Returns:
-            str: Path where the model was saved
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built yet")
-        
-        if filepath is None:
-            # Create a default filepath with timestamp
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            filepath = os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5")
-        
-        self.model.save(filepath)
-        logger.info(f"Model saved to {filepath}")
-        return filepath
-    
-    def load(self, filepath):
-        """
-        Load a saved model from disk.
-        
-        Args:
-            filepath (str): Path to the saved model
-            
-        Returns:
-            The loaded model
-        """
-        # Register custom layers
-        custom_objects = {
-            'TransformerBlock': TransformerBlock,
-            'PositionalEncoding': PositionalEncoding
-        }
-        
-        self.model = load_model(filepath, custom_objects=custom_objects)
-        logger.info(f"Model loaded from {filepath}")
-        return self.model
-    
-    def plot_training_history(self):
-        """
-        Plot training history (loss and metrics).
-        
-        Returns:
-            str: Path to the saved plot
-        """
-        if self.history is None:
-            raise ValueError("Model has not been trained yet")
-        
-        plt.figure(figsize=(12, 5))
-        
-        # Plot loss
-        plt.subplot(1, 2, 1)
-        plt.plot(self.history.history['loss'], label='Training Loss')
-        if 'val_loss' in self.history.history:
-            plt.plot(self.history.history['val_loss'], label='Validation Loss')
-        plt.title('Model Loss')
-        plt.xlabel('Epoch')
-        plt.ylabel('Loss')
-        plt.legend()
-        
-        # Plot accuracy
-        plt.subplot(1, 2, 2)
-        
-        if 'accuracy' in self.history.history:
-            plt.plot(self.history.history['accuracy'], label='Training Accuracy')
-            if 'val_accuracy' in self.history.history:
-                plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
-            plt.title('Model Accuracy')
-            plt.ylabel('Accuracy')
-        elif 'mae' in self.history.history:
-            plt.plot(self.history.history['mae'], label='Training MAE')
-            if 'val_mae' in self.history.history:
-                plt.plot(self.history.history['val_mae'], label='Validation MAE')
-            plt.title('Model MAE')
-            plt.ylabel('MAE')
-        
-        plt.xlabel('Epoch')
-        plt.legend()
-        
-        plt.tight_layout()
-        
-        # Save figure
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        fig_path = os.path.join(self.model_dir, f"transformer_training_history_{timestamp}.png")
-        plt.savefig(fig_path)
-        plt.close()
-        
-        logger.info(f"Training history plot saved to {fig_path}")
-        return fig_path
-
-
-class MixtureOfExpertsModel:
-    """
-    Mixture of Experts (MoE) model.
-    
-    This model combines predictions from multiple expert models (such as CNN and Transformer)
-    using a weighted ensemble approach.
-    """
-    
-    def __init__(self, output_size=1, model_dir="NN/models/saved"):
-        """
-        Initialize the MoE model.
-        
-        Args:
-            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
-            model_dir (str): Directory to save trained models
-        """
-        self.output_size = output_size
-        self.model_dir = model_dir
-        self.model = None
-        self.history = None
-        self.experts = {}
-        
-        # Create model directory if it doesn't exist
-        os.makedirs(self.model_dir, exist_ok=True)
-        
-        logger.info(f"Initialized Mixture of Experts model with output size {output_size}")
-    
-    def add_expert(self, name, model):
-        """
-        Add an expert model to the MoE.
-        
-        Args:
-            name (str): Name of the expert model
-            model: The expert model instance
-            
-        Returns:
-            None
-        """
-        self.experts[name] = model
-        logger.info(f"Added expert model '{name}' to MoE")
-    
-    def build_model(self, ts_input_shape=(20, 5), expert_weights=None, learning_rate=0.001):
-        """
-        Build the MoE model by combining expert models.
-        
-        Args:
-            ts_input_shape (tuple): Shape of time series input data
-            expert_weights (dict): Weights for each expert model
-            learning_rate (float): Learning rate for Adam optimizer
-            
-        Returns:
-            The compiled model
-        """
-        # Time series input
-        ts_inputs = Input(shape=ts_input_shape, name="ts_input")
-        
-        # Additional feature input (from CNN)
-        feature_inputs = Input(shape=(64,), name="feature_input")  # Default size for features
-        
-        # Process with each expert model
-        expert_outputs = []
-        expert_names = []
-        
-        for name, expert in self.experts.items():
-            # Skip if expert model is not valid or doesn't have a call/predict method
-            if expert is None:
-                logger.warning(f"Expert model '{name}' is None, skipping")
-                continue
-                
-            try:
-                # Different handling based on model type
-                if name == 'cnn':
-                    # CNN model takes only time series input
-                    expert_output = expert(ts_inputs)
-                    expert_outputs.append(expert_output)
-                    expert_names.append(name)
-                elif name == 'transformer':
-                    # Transformer model takes both time series and feature inputs
-                    expert_output = expert([ts_inputs, feature_inputs])
-                    expert_outputs.append(expert_output)
-                    expert_names.append(name)
-                else:
-                    logger.warning(f"Unknown expert model type: {name}")
-            except Exception as e:
-                logger.error(f"Error adding expert '{name}': {str(e)}")
-        
-        if not expert_outputs:
-            logger.error("No valid expert models found")
-            return None
-        
-        # Use expert weighting
-        if expert_weights is None:
-            # Equal weighting
-            weights = [1.0 / len(expert_outputs)] * len(expert_outputs)
-        else:
-            # User-provided weights
-            weights = [expert_weights.get(name, 1.0 / len(expert_outputs)) for name in expert_names]
-            # Normalize weights
-            weights = [w / sum(weights) for w in weights]
-        
-        # Combine expert outputs using weighted average
-        if len(expert_outputs) == 1:
-            # Only one expert, use its output directly
-            combined_output = expert_outputs[0]
-        else:
-            # Multiple experts, compute weighted average
-            weighted_outputs = [output * weight for output, weight in zip(expert_outputs, weights)]
-            combined_output = Add()(weighted_outputs)
-        
-        # Create the MoE model
-        moe_model = Model(inputs=[ts_inputs, feature_inputs], outputs=combined_output)
-        
-        # Compile the model
-        if self.output_size == 1:
-            # Binary classification
-            moe_model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='binary_crossentropy',
-                metrics=['accuracy']
-            )
-        elif self.output_size == 3:
-            # Multi-class classification for BUY/HOLD/SELL
-            moe_model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='categorical_crossentropy',
-                metrics=['accuracy']
-            )
-        else:
-            # Regression
-            moe_model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='mse',
-                metrics=['mae']
-            )
-        
-        self.model = moe_model
-        
-        # Log model summary
-        self.model.summary(print_fn=lambda x: logger.info(x))
-        
-        logger.info(f"Built MoE model with weights: {weights}")
-        return self.model
-    
-    def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
-             callbacks=None, class_weights=None):
-        """
-        Train the MoE model on the provided data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            y (numpy.ndarray): Target labels
-            batch_size (int): Batch size
-            epochs (int): Number of epochs
-            validation_split (float): Fraction of data to use for validation
-            callbacks (list): List of Keras callbacks
-            class_weights (dict): Class weights for imbalanced datasets
-            
-        Returns:
-            History object containing training metrics
-        """
-        if self.model is None:
-            logger.error("MoE model has not been built yet")
-            return None
-        
-        # Default callbacks if none provided
-        if callbacks is None:
-            # Create a timestamp for model checkpoints
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            
-            callbacks = [
-                EarlyStopping(
-                    monitor='val_loss',
-                    patience=10,
-                    restore_best_weights=True
-                ),
-                ReduceLROnPlateau(
-                    monitor='val_loss',
-                    factor=0.5,
-                    patience=5,
-                    min_lr=1e-6
-                ),
-                ModelCheckpoint(
-                    filepath=os.path.join(self.model_dir, f"moe_model_{timestamp}.h5"),
-                    monitor='val_loss',
-                    save_best_only=True
-                )
-            ]
-        
-        # Check if y needs to be one-hot encoded for multi-class
-        if self.output_size == 3 and len(y.shape) == 1:
-            y = tf.keras.utils.to_categorical(y, num_classes=3)
-        
-        # Train the model
-        logger.info(f"Training MoE model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
-        self.history = self.model.fit(
-            [X_ts, X_features], y,
-            batch_size=batch_size,
-            epochs=epochs,
-            validation_split=validation_split,
-            callbacks=callbacks,
-            class_weight=class_weights,
-            verbose=2
-        )
-        
-        # Save the trained model
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        model_path = os.path.join(self.model_dir, f"moe_model_final_{timestamp}.h5")
-        self.model.save(model_path)
-        logger.info(f"Model saved to {model_path}")
-        
-        # Save training history
-        history_path = os.path.join(self.model_dir, f"moe_model_history_{timestamp}.json")
-        with open(history_path, 'w') as f:
-            # Convert numpy values to Python native types for JSON serialization
-            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
-            json.dump(history_dict, f, indent=2)
-        
-        return self.history
-    
-    def predict(self, X_ts, X_features=None):
-        """
-        Make predictions on new data.
-        
-        Args:
-            X_ts (numpy.ndarray): Time series input features
-            X_features (numpy.ndarray): Additional input features
-            
-        Returns:
-            tuple: (y_pred, y_proba) where:
-                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
-                y_proba is the class probability
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built or trained yet")
-        
-        # Ensure X_ts has the right shape
-        if len(X_ts.shape) == 2:
-            # Single sample, add batch dimension
-            X_ts = np.expand_dims(X_ts, axis=0)
-        
-        # Ensure X_features has the right shape
-        if X_features is None:
-            # Create dummy features with zeros
-            X_features = np.zeros((X_ts.shape[0], 64))  # Default size
-        elif len(X_features.shape) == 1:
-            # Single sample, add batch dimension
-            X_features = np.expand_dims(X_features, axis=0)
-        
-        # Get predictions
-        y_proba = self.model.predict([X_ts, X_features])
-        
-        # Process based on output type
-        if self.output_size == 1:
-            # Binary classification
-            y_pred = (y_proba > 0.5).astype(int).flatten()
-            return y_pred, y_proba.flatten()
-        elif self.output_size == 3:
-            # Multi-class classification
-            y_pred = np.argmax(y_proba, axis=1)
-            return y_pred, y_proba
-        else:
-            # Regression
-            return y_proba, y_proba
-    
-    def save(self, filepath=None):
-        """
-        Save the model to disk.
-        
-        Args:
-            filepath (str): Path to save the model
-            
-        Returns:
-            str: Path where the model was saved
-        """
-        if self.model is None:
-            raise ValueError("Model has not been built yet")
-        
-        if filepath is None:
-            # Create a default filepath with timestamp
-            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-            filepath = os.path.join(self.model_dir, f"moe_model_{timestamp}.h5")
-        
-        self.model.save(filepath)
-        logger.info(f"Model saved to {filepath}")
-        return filepath
-    
-    def load(self, filepath):
-        """
-        Load a saved model from disk.
-        
-        Args:
-            filepath (str): Path to the saved model
-            
-        Returns:
-            The loaded model
-        """
-        # Register custom layers
-        custom_objects = {
-            'TransformerBlock': TransformerBlock,
-            'PositionalEncoding': PositionalEncoding
-        }
-        
-        self.model = load_model(filepath, custom_objects=custom_objects)
-        logger.info(f"Model loaded from {filepath}")
-        return self.model
-
-# Example usage:
-if __name__ == "__main__":
-    # This would be a complete implementation in a real system
-    print("Transformer and MoE models defined, but not implemented here.")