new nn wip

2025-03-25 13:38:25 +02:00
parent 50eb50696b
commit 0042581275
18 changed files with 3358 additions and 294 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,3 +14,4 @@ models/trading_agent_final.pt
 models/trading_agent_final.pt.backup
 *.pt
 *.backup
+logs/
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -0,0 +1,38 @@
+{
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "Start TensorBoard",
+            "type": "shell",
+            "command": "python",
+            "args": [
+                "-m",
+                "tensorboard.main",
+                "--logdir=NN/models/saved/logs",
+                "--port=6006",
+                "--host=localhost"
+            ],
+            "isBackground": true,
+            "problemMatcher": {
+                "pattern": {
+                    "regexp": "^.*$",
+                    "file": 1,
+                    "location": 2,
+                    "message": 3
+                },
+                "background": {
+                    "activeOnStart": true,
+                    "beginsPattern": ".*TensorBoard.*",
+                    "endsPattern": ".*TensorBoard.*"
+                }
+            },
+            "presentation": {
+                "reveal": "always",
+                "panel": "new"
+            },
+            "runOptions": {
+                "runOn": "folderOpen"
+            }
+        }
+    ]
+} 
--- a/NN/pycache/init.cpython-312.pyc
+++ b/NN/pycache/init.cpython-312.pyc
--- a/NN/pycache/main.cpython-312.pyc
+++ b/NN/pycache/main.cpython-312.pyc
--- a/NN/_notes.md
+++ b/NN/_notes.md
@ -0,0 +1,13 @@
+great. realtime.py works. now let's examine and contunue with our  500m NN in a NN folder with different modules - first module will be around 100m Convolutional NN that is historically used for image recognition with great success by detecting features on multiple levels - deep NN. create the NN class and integrated RL pipeline that will use historical data to retrospectively identify buy/sell opportunities and use that to train the module. use the data from realtime.py (add easy to use realtime data interface if existing functions are not convenient enough) 
+create a new main file in the NN folder for our new MoE model. we'll use one main NN module that will orchestrate data flows. our CNN module should have training and inference pipelines implemented internally, but the orchestrator will get the realtime data and forward it. use a common interface. another module later will be Transformer module that will take as input raw data from the latest hidden layers of the CNN where high end features are learned as well as the output, which will be BUY/HOLD/SELL signals as well as key support/resistance trend lines
+
+
+
+# Train a CNN model
+python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --model-type cnn --epochs 100
+
+# Make predictions with a trained model
+python -m NN.main --mode predict --symbol BTC/USDT --timeframe 1h --model-type cnn
+
+# Run real-time analysis
+python -m NN.main --mode realtime --symbol BTC/USDT --timeframe 1h --inference-interval 60
--- a/NN/main.py
+++ b/NN/main.py
@ -0,0 +1,265 @@
+#!/usr/bin/env python3
+"""
+Neural Network Trading System Main Module
+
+This module serves as the main entry point for the NN trading system,
+coordinating data flow between different components and implementing 
+training and inference pipelines.
+"""
+
+import os
+import sys
+import logging
+import argparse
+from datetime import datetime
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler(os.path.join('logs', f'nn_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'))
+    ]
+)
+
+logger = logging.getLogger('NN')
+
+# Create logs directory if it doesn't exist
+os.makedirs('logs', exist_ok=True)
+
+def parse_arguments():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser(description='Neural Network Trading System')
+    
+    parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
+                        help='Mode to run (train, predict, realtime)')
+    parser.add_argument('--symbol', type=str, default='BTC/USDT',
+                        help='Trading pair symbol')
+    parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
+                        help='Timeframes to use')
+    parser.add_argument('--window-size', type=int, default=20,
+                        help='Window size for input data')
+    parser.add_argument('--output-size', type=int, default=3,
+                        help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
+    parser.add_argument('--batch-size', type=int, default=32,
+                        help='Batch size for training')
+    parser.add_argument('--epochs', type=int, default=100,
+                        help='Number of epochs for training')
+    parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
+                        help='Model type to use')
+    parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
+                        help='Deep learning framework to use')
+    
+    return parser.parse_args()
+
+def main():
+    """Main entry point for the NN trading system"""
+    # Parse arguments
+    args = parse_arguments()
+    
+    logger.info(f"Starting NN Trading System in {args.mode} mode")
+    logger.info(f"Configuration: Symbol={args.symbol}, Timeframes={args.timeframes}, "
+                f"Window Size={args.window_size}, Output Size={args.output_size}, "
+                f"Model Type={args.model_type}, Framework={args.framework}")
+    
+    # Import the appropriate modules based on the framework
+    if args.framework == 'pytorch':
+        try:
+            import torch
+            logger.info(f"Using PyTorch {torch.__version__}")
+            
+            # Import PyTorch-based modules
+            from NN.utils.data_interface import DataInterface
+            
+            if args.model_type == 'cnn':
+                from NN.models.cnn_model_pytorch import CNNModelPyTorch as Model
+            elif args.model_type == 'transformer':
+                from NN.models.transformer_model_pytorch import TransformerModelPyTorchWrapper as Model
+            elif args.model_type == 'moe':
+                from NN.models.transformer_model_pytorch import MixtureOfExpertsModelPyTorch as Model
+            else:
+                logger.error(f"Unknown model type: {args.model_type}")
+                return
+                
+        except ImportError as e:
+            logger.error(f"Failed to import PyTorch modules: {str(e)}")
+            logger.error("Please make sure PyTorch is installed or use the TensorFlow framework.")
+            return
+            
+    elif args.framework == 'tensorflow':
+        try:
+            import tensorflow as tf
+            logger.info(f"Using TensorFlow {tf.__version__}")
+            
+            # Import TensorFlow-based modules
+            from NN.utils.data_interface import DataInterface
+            
+            if args.model_type == 'cnn':
+                from NN.models.cnn_model import CNNModel as Model
+            elif args.model_type == 'transformer':
+                from NN.models.transformer_model import TransformerModel as Model
+            elif args.model_type == 'moe':
+                from NN.models.transformer_model import MixtureOfExpertsModel as Model
+            else:
+                logger.error(f"Unknown model type: {args.model_type}")
+                return
+                
+        except ImportError as e:
+            logger.error(f"Failed to import TensorFlow modules: {str(e)}")
+            logger.error("Please make sure TensorFlow is installed or use the PyTorch framework.")
+            return
+    else:
+        logger.error(f"Unknown framework: {args.framework}")
+        return
+    
+    # Initialize data interface
+    try:
+        logger.info("Initializing data interface...")
+        data_interface = DataInterface(
+            symbol=args.symbol,
+            timeframes=args.timeframes,
+            window_size=args.window_size,
+            output_size=args.output_size
+        )
+    except Exception as e:
+        logger.error(f"Failed to initialize data interface: {str(e)}")
+        return
+    
+    # Initialize model
+    try:
+        logger.info(f"Initializing {args.model_type.upper()} model...")
+        model = Model(
+            window_size=args.window_size,
+            num_features=data_interface.get_feature_count(),
+            output_size=args.output_size,
+            timeframes=args.timeframes
+        )
+    except Exception as e:
+        logger.error(f"Failed to initialize model: {str(e)}")
+        return
+    
+    # Execute the requested mode
+    if args.mode == 'train':
+        train(data_interface, model, args)
+    elif args.mode == 'predict':
+        predict(data_interface, model, args)
+    elif args.mode == 'realtime':
+        realtime(data_interface, model, args)
+    else:
+        logger.error(f"Unknown mode: {args.mode}")
+        return
+    
+    logger.info("Neural Network Trading System finished successfully")
+
+def train(data_interface, model, args):
+    """Train the model using the data interface"""
+    logger.info("Starting training mode...")
+    
+    try:
+        # Prepare training data
+        logger.info("Preparing training data...")
+        X_train, y_train, X_val, y_val = data_interface.prepare_training_data()
+        
+        # Train the model
+        logger.info("Training model...")
+        model.train(
+            X_train, y_train,
+            X_val, y_val,
+            batch_size=args.batch_size,
+            epochs=args.epochs
+        )
+        
+        # Save the model
+        model_path = os.path.join(
+            'models', 
+            f"{args.model_type}_{args.symbol.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        )
+        logger.info(f"Saving model to {model_path}...")
+        model.save(model_path)
+        
+        # Evaluate the model
+        logger.info("Evaluating model...")
+        metrics = model.evaluate(X_val, y_val)
+        logger.info(f"Evaluation metrics: {metrics}")
+        
+    except Exception as e:
+        logger.error(f"Error in training mode: {str(e)}")
+        return
+
+def predict(data_interface, model, args):
+    """Make predictions using the trained model"""
+    logger.info("Starting prediction mode...")
+    
+    try:
+        # Load the latest model
+        model_dir = os.path.join('models')
+        model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
+        
+        if not model_files:
+            logger.error(f"No saved model found for type {args.model_type}")
+            return
+        
+        latest_model = sorted(model_files)[-1]
+        model_path = os.path.join(model_dir, latest_model)
+        
+        logger.info(f"Loading model from {model_path}...")
+        model.load(model_path)
+        
+        # Prepare prediction data
+        logger.info("Preparing prediction data...")
+        X_pred = data_interface.prepare_prediction_data()
+        
+        # Make predictions
+        logger.info("Making predictions...")
+        predictions = model.predict(X_pred)
+        
+        # Process and display predictions
+        logger.info("Processing predictions...")
+        data_interface.process_predictions(predictions)
+        
+    except Exception as e:
+        logger.error(f"Error in prediction mode: {str(e)}")
+        return
+
+def realtime(data_interface, model, args):
+    """Run the model in real-time mode"""
+    logger.info("Starting real-time mode...")
+    
+    try:
+        # Import realtime module
+        from NN.realtime import RealtimeAnalyzer
+        
+        # Load the latest model
+        model_dir = os.path.join('models')
+        model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
+        
+        if not model_files:
+            logger.error(f"No saved model found for type {args.model_type}")
+            return
+        
+        latest_model = sorted(model_files)[-1]
+        model_path = os.path.join(model_dir, latest_model)
+        
+        logger.info(f"Loading model from {model_path}...")
+        model.load(model_path)
+        
+        # Initialize realtime analyzer
+        logger.info("Initializing real-time analyzer...")
+        realtime_analyzer = RealtimeAnalyzer(
+            data_interface=data_interface,
+            model=model,
+            symbol=args.symbol,
+            timeframes=args.timeframes
+        )
+        
+        # Start real-time analysis
+        logger.info("Starting real-time analysis...")
+        realtime_analyzer.start()
+        
+    except Exception as e:
+        logger.error(f"Error in real-time mode: {str(e)}")
+        return
+
+if __name__ == "__main__":
+    main() 
--- a/NN/models/cnn_model.py
+++ b/NN/models/cnn_model.py
@ -0,0 +1,560 @@
+"""
+Convolutional Neural Network for timeseries analysis
+
+This module implements a deep CNN model for cryptocurrency price analysis.
+The model uses multiple parallel convolutional pathways and LSTM layers
+to detect patterns at different time scales.
+"""
+
+import os
+import logging
+import numpy as np
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from tensorflow.keras.models import Model, load_model
+from tensorflow.keras.layers import (
+    Input, Conv1D, MaxPooling1D, Dense, Dropout, BatchNormalization,
+    LSTM, Bidirectional, Flatten, Concatenate, GlobalAveragePooling1D,
+    LeakyReLU, Attention
+)
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
+from tensorflow.keras.metrics import AUC
+from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
+import datetime
+import json
+
+logger = logging.getLogger(__name__)
+
+class CNNModel:
+    """
+    Convolutional Neural Network for time series analysis.
+    
+    This model uses a multi-pathway architecture with different filter sizes
+    to detect patterns at different time scales, combined with LSTM layers
+    for temporal dependencies.
+    """
+    
+    def __init__(self, input_shape=(20, 5), output_size=1, model_dir="NN/models/saved"):
+        """
+        Initialize the CNN model.
+        
+        Args:
+            input_shape (tuple): Shape of input data (sequence_length, features)
+            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
+            model_dir (str): Directory to save trained models
+        """
+        self.input_shape = input_shape
+        self.output_size = output_size
+        self.model_dir = model_dir
+        self.model = None
+        self.history = None
+        
+        # Create model directory if it doesn't exist
+        os.makedirs(self.model_dir, exist_ok=True)
+        
+        logger.info(f"Initialized CNN model with input shape {input_shape} and output size {output_size}")
+    
+    def build_model(self, filters=(32, 64, 128), kernel_sizes=(3, 5, 7), 
+                   dropout_rate=0.3, learning_rate=0.001):
+        """
+        Build the CNN model architecture.
+        
+        Args:
+            filters (tuple): Number of filters for each convolutional pathway
+            kernel_sizes (tuple): Kernel sizes for each convolutional pathway
+            dropout_rate (float): Dropout rate for regularization
+            learning_rate (float): Learning rate for Adam optimizer
+            
+        Returns:
+            The compiled model
+        """
+        # Input layer
+        inputs = Input(shape=self.input_shape)
+        
+        # Multiple parallel convolutional pathways with different kernel sizes
+        # to capture patterns at different time scales
+        conv_layers = []
+        
+        for i, (filter_size, kernel_size) in enumerate(zip(filters, kernel_sizes)):
+            conv_path = Conv1D(
+                filters=filter_size,
+                kernel_size=kernel_size,
+                padding='same',
+                name=f'conv1d_{i+1}'
+            )(inputs)
+            conv_path = BatchNormalization()(conv_path)
+            conv_path = LeakyReLU(alpha=0.1)(conv_path)
+            conv_path = MaxPooling1D(pool_size=2, padding='same')(conv_path)
+            conv_path = Dropout(dropout_rate)(conv_path)
+            conv_layers.append(conv_path)
+        
+        # Merge convolutional pathways
+        if len(conv_layers) > 1:
+            merged = Concatenate()(conv_layers)
+        else:
+            merged = conv_layers[0]
+        
+        # Add another Conv1D layer after merging
+        x = Conv1D(filters=filters[-1], kernel_size=3, padding='same')(merged)
+        x = BatchNormalization()(x)
+        x = LeakyReLU(alpha=0.1)(x)
+        x = MaxPooling1D(pool_size=2, padding='same')(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Bidirectional LSTM for temporal dependencies
+        x = Bidirectional(LSTM(128, return_sequences=True))(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Attention mechanism to focus on important time steps
+        x = Bidirectional(LSTM(64, return_sequences=True))(x)
+        
+        # Global average pooling to reduce parameters
+        x = GlobalAveragePooling1D()(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Dense layers for final classification/regression
+        x = Dense(64, activation='relu')(x)
+        x = BatchNormalization()(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Output layer
+        if self.output_size == 1:
+            # Binary classification (up/down)
+            outputs = Dense(1, activation='sigmoid', name='output')(x)
+            loss = 'binary_crossentropy'
+            metrics = ['accuracy', AUC()]
+        elif self.output_size == 3:
+            # Multi-class classification (buy/hold/sell)
+            outputs = Dense(3, activation='softmax', name='output')(x)
+            loss = 'categorical_crossentropy'
+            metrics = ['accuracy']
+        else:
+            # Regression
+            outputs = Dense(self.output_size, activation='linear', name='output')(x)
+            loss = 'mse'
+            metrics = ['mae']
+        
+        # Create and compile model
+        self.model = Model(inputs=inputs, outputs=outputs)
+        
+        # Compile with Adam optimizer
+        self.model.compile(
+            optimizer=Adam(learning_rate=learning_rate),
+            loss=loss,
+            metrics=metrics
+        )
+        
+        # Log model summary
+        self.model.summary(print_fn=lambda x: logger.info(x))
+        
+        return self.model
+    
+    def train(self, X_train, y_train, batch_size=32, epochs=100, validation_split=0.2,
+             callbacks=None, class_weights=None):
+        """
+        Train the CNN model on the provided data.
+        
+        Args:
+            X_train (numpy.ndarray): Training features
+            y_train (numpy.ndarray): Training targets
+            batch_size (int): Batch size
+            epochs (int): Number of epochs
+            validation_split (float): Fraction of data to use for validation
+            callbacks (list): List of Keras callbacks
+            class_weights (dict): Class weights for imbalanced datasets
+            
+        Returns:
+            History object containing training metrics
+        """
+        if self.model is None:
+            self.build_model()
+        
+        # Default callbacks if none provided
+        if callbacks is None:
+            # Create a timestamp for model checkpoints
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            
+            callbacks = [
+                EarlyStopping(
+                    monitor='val_loss',
+                    patience=10,
+                    restore_best_weights=True
+                ),
+                ReduceLROnPlateau(
+                    monitor='val_loss',
+                    factor=0.5,
+                    patience=5,
+                    min_lr=1e-6
+                ),
+                ModelCheckpoint(
+                    filepath=os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5"),
+                    monitor='val_loss',
+                    save_best_only=True
+                )
+            ]
+        
+        # Check if y_train needs to be one-hot encoded for multi-class
+        if self.output_size == 3 and len(y_train.shape) == 1:
+            y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
+        
+        # Train the model
+        logger.info(f"Training CNN model with {len(X_train)} samples, batch size {batch_size}, epochs {epochs}")
+        self.history = self.model.fit(
+            X_train, y_train,
+            batch_size=batch_size,
+            epochs=epochs,
+            validation_split=validation_split,
+            callbacks=callbacks,
+            class_weight=class_weights,
+            verbose=2
+        )
+        
+        # Save the trained model
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_path = os.path.join(self.model_dir, f"cnn_model_final_{timestamp}.h5")
+        self.model.save(model_path)
+        logger.info(f"Model saved to {model_path}")
+        
+        # Save training history
+        history_path = os.path.join(self.model_dir, f"cnn_model_history_{timestamp}.json")
+        with open(history_path, 'w') as f:
+            # Convert numpy values to Python native types for JSON serialization
+            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
+            json.dump(history_dict, f, indent=2)
+        
+        return self.history
+    
+    def evaluate(self, X_test, y_test, plot_results=False):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test (numpy.ndarray): Test features
+            y_test (numpy.ndarray): Test targets
+            plot_results (bool): Whether to plot evaluation results
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Convert y_test to one-hot encoding for multi-class
+        y_test_original = y_test.copy()
+        if self.output_size == 3 and len(y_test.shape) == 1:
+            y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)
+        
+        # Evaluate model
+        logger.info(f"Evaluating CNN model on {len(X_test)} samples")
+        eval_results = self.model.evaluate(X_test, y_test, verbose=0)
+        
+        metrics = {}
+        for metric, value in zip(self.model.metrics_names, eval_results):
+            metrics[metric] = value
+            logger.info(f"{metric}: {value:.4f}")
+        
+        # Get predictions
+        y_pred_prob = self.model.predict(X_test)
+        
+        # Different processing based on output type
+        if self.output_size == 1:
+            # Binary classification
+            y_pred = (y_pred_prob > 0.5).astype(int).flatten()
+            
+            # Classification report
+            report = classification_report(y_test, y_pred)
+            logger.info(f"Classification Report:\n{report}")
+            
+            # Confusion matrix
+            cm = confusion_matrix(y_test, y_pred)
+            logger.info(f"Confusion Matrix:\n{cm}")
+            
+            # ROC curve and AUC
+            fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
+            roc_auc = auc(fpr, tpr)
+            metrics['auc'] = roc_auc
+            
+            if plot_results:
+                self._plot_binary_results(y_test, y_pred, y_pred_prob, fpr, tpr, roc_auc)
+                
+        elif self.output_size == 3:
+            # Multi-class classification
+            y_pred = np.argmax(y_pred_prob, axis=1)
+            
+            # Classification report
+            report = classification_report(y_test_original, y_pred)
+            logger.info(f"Classification Report:\n{report}")
+            
+            # Confusion matrix
+            cm = confusion_matrix(y_test_original, y_pred)
+            logger.info(f"Confusion Matrix:\n{cm}")
+            
+            if plot_results:
+                self._plot_multiclass_results(y_test_original, y_pred, y_pred_prob)
+        
+        return metrics
+    
+    def predict(self, X):
+        """
+        Make predictions on new data.
+        
+        Args:
+            X (numpy.ndarray): Input features
+            
+        Returns:
+            tuple: (y_pred, y_proba) where:
+                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
+                y_proba is the class probability
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Ensure X has the right shape
+        if len(X.shape) == 2:
+            # Single sample, add batch dimension
+            X = np.expand_dims(X, axis=0)
+        
+        # Get predictions
+        y_proba = self.model.predict(X)
+        
+        # Process based on output type
+        if self.output_size == 1:
+            # Binary classification
+            y_pred = (y_proba > 0.5).astype(int).flatten()
+            return y_pred, y_proba.flatten()
+        elif self.output_size == 3:
+            # Multi-class classification
+            y_pred = np.argmax(y_proba, axis=1)
+            return y_pred, y_proba
+        else:
+            # Regression
+            return y_proba, y_proba
+    
+    def save(self, filepath=None):
+        """
+        Save the model to disk.
+        
+        Args:
+            filepath (str): Path to save the model
+            
+        Returns:
+            str: Path where the model was saved
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built yet")
+        
+        if filepath is None:
+            # Create a default filepath with timestamp
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            filepath = os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5")
+        
+        self.model.save(filepath)
+        logger.info(f"Model saved to {filepath}")
+        return filepath
+    
+    def load(self, filepath):
+        """
+        Load a saved model from disk.
+        
+        Args:
+            filepath (str): Path to the saved model
+            
+        Returns:
+            The loaded model
+        """
+        self.model = load_model(filepath)
+        logger.info(f"Model loaded from {filepath}")
+        return self.model
+    
+    def extract_hidden_features(self, X):
+        """
+        Extract features from the last hidden layer of the CNN for transfer learning.
+        
+        Args:
+            X (numpy.ndarray): Input data
+            
+        Returns:
+            numpy.ndarray: Extracted features
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Create a new model that outputs the features from the layer before the output
+        feature_layer_name = self.model.layers[-2].name
+        feature_extractor = Model(
+            inputs=self.model.input,
+            outputs=self.model.get_layer(feature_layer_name).output
+        )
+        
+        # Extract features
+        features = feature_extractor.predict(X)
+        
+        return features
+    
+    def _plot_binary_results(self, y_true, y_pred, y_proba, fpr, tpr, roc_auc):
+        """
+        Plot evaluation results for binary classification.
+        
+        Args:
+            y_true (numpy.ndarray): True labels
+            y_pred (numpy.ndarray): Predicted labels
+            y_proba (numpy.ndarray): Prediction probabilities
+            fpr (numpy.ndarray): False positive rates for ROC curve
+            tpr (numpy.ndarray): True positive rates for ROC curve
+            roc_auc (float): Area under ROC curve
+        """
+        plt.figure(figsize=(15, 5))
+        
+        # Confusion Matrix
+        plt.subplot(1, 3, 1)
+        cm = confusion_matrix(y_true, y_pred)
+        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
+        plt.title('Confusion Matrix')
+        plt.colorbar()
+        tick_marks = [0, 1]
+        plt.xticks(tick_marks, ['0', '1'])
+        plt.yticks(tick_marks, ['0', '1'])
+        plt.xlabel('Predicted Label')
+        plt.ylabel('True Label')
+        
+        # Add text annotations to confusion matrix
+        thresh = cm.max() / 2.
+        for i in range(cm.shape[0]):
+            for j in range(cm.shape[1]):
+                plt.text(j, i, format(cm[i, j], 'd'),
+                        horizontalalignment="center",
+                        color="white" if cm[i, j] > thresh else "black")
+        
+        # Histogram of prediction probabilities
+        plt.subplot(1, 3, 2)
+        plt.hist(y_proba[y_true == 0], alpha=0.5, label='Class 0')
+        plt.hist(y_proba[y_true == 1], alpha=0.5, label='Class 1')
+        plt.title('Prediction Probabilities')
+        plt.xlabel('Probability of Class 1')
+        plt.ylabel('Count')
+        plt.legend()
+        
+        # ROC Curve
+        plt.subplot(1, 3, 3)
+        plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.3f})')
+        plt.plot([0, 1], [0, 1], 'k--')
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.05])
+        plt.xlabel('False Positive Rate')
+        plt.ylabel('True Positive Rate')
+        plt.title('Receiver Operating Characteristic')
+        plt.legend(loc="lower right")
+        
+        plt.tight_layout()
+        
+        # Save figure
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        fig_path = os.path.join(self.model_dir, f"cnn_evaluation_{timestamp}.png")
+        plt.savefig(fig_path)
+        plt.close()
+        
+        logger.info(f"Evaluation plots saved to {fig_path}")
+    
+    def _plot_multiclass_results(self, y_true, y_pred, y_proba):
+        """
+        Plot evaluation results for multi-class classification.
+        
+        Args:
+            y_true (numpy.ndarray): True labels
+            y_pred (numpy.ndarray): Predicted labels
+            y_proba (numpy.ndarray): Prediction probabilities
+        """
+        plt.figure(figsize=(12, 5))
+        
+        # Confusion Matrix
+        plt.subplot(1, 2, 1)
+        cm = confusion_matrix(y_true, y_pred)
+        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
+        plt.title('Confusion Matrix')
+        plt.colorbar()
+        classes = ['BUY', 'HOLD', 'SELL']  # Assumes classes are 0, 1, 2
+        tick_marks = np.arange(len(classes))
+        plt.xticks(tick_marks, classes)
+        plt.yticks(tick_marks, classes)
+        plt.xlabel('Predicted Label')
+        plt.ylabel('True Label')
+        
+        # Add text annotations to confusion matrix
+        thresh = cm.max() / 2.
+        for i in range(cm.shape[0]):
+            for j in range(cm.shape[1]):
+                plt.text(j, i, format(cm[i, j], 'd'),
+                        horizontalalignment="center",
+                        color="white" if cm[i, j] > thresh else "black")
+        
+        # Class probability distributions
+        plt.subplot(1, 2, 2)
+        for i, cls in enumerate(classes):
+            plt.hist(y_proba[y_true == i, i], alpha=0.5, label=f'Class {cls}')
+        plt.title('Class Probability Distributions')
+        plt.xlabel('Probability')
+        plt.ylabel('Count')
+        plt.legend()
+        
+        plt.tight_layout()
+        
+        # Save figure
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        fig_path = os.path.join(self.model_dir, f"cnn_multiclass_evaluation_{timestamp}.png")
+        plt.savefig(fig_path)
+        plt.close()
+        
+        logger.info(f"Multiclass evaluation plots saved to {fig_path}")
+    
+    def plot_training_history(self):
+        """
+        Plot training history (loss and metrics).
+        
+        Returns:
+            str: Path to the saved plot
+        """
+        if self.history is None:
+            raise ValueError("Model has not been trained yet")
+        
+        plt.figure(figsize=(12, 5))
+        
+        # Plot loss
+        plt.subplot(1, 2, 1)
+        plt.plot(self.history.history['loss'], label='Training Loss')
+        if 'val_loss' in self.history.history:
+            plt.plot(self.history.history['val_loss'], label='Validation Loss')
+        plt.title('Model Loss')
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend()
+        
+        # Plot accuracy
+        plt.subplot(1, 2, 2)
+        
+        if 'accuracy' in self.history.history:
+            plt.plot(self.history.history['accuracy'], label='Training Accuracy')
+            if 'val_accuracy' in self.history.history:
+                plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
+            plt.title('Model Accuracy')
+            plt.ylabel('Accuracy')
+        elif 'mae' in self.history.history:
+            plt.plot(self.history.history['mae'], label='Training MAE')
+            if 'val_mae' in self.history.history:
+                plt.plot(self.history.history['val_mae'], label='Validation MAE')
+            plt.title('Model MAE')
+            plt.ylabel('MAE')
+        
+        plt.xlabel('Epoch')
+        plt.legend()
+        
+        plt.tight_layout()
+        
+        # Save figure
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        fig_path = os.path.join(self.model_dir, f"cnn_training_history_{timestamp}.png")
+        plt.savefig(fig_path)
+        plt.close()
+        
+        logger.info(f"Training history plot saved to {fig_path}")
+        return fig_path 
--- a/NN/models/cnn_model_pytorch.py
+++ b/NN/models/cnn_model_pytorch.py
@ -0,0 +1,546 @@
+#!/usr/bin/env python3
+"""
+CNN Model - PyTorch Implementation
+
+This module implements a CNN model using PyTorch for time series analysis.
+The model consists of multiple convolutional pathways and LSTM layers.
+"""
+
+import os
+import logging
+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+class CNNPyTorch(nn.Module):
+    """PyTorch CNN model for time series analysis"""
+    
+    def __init__(self, input_shape, output_size=3):
+        """
+        Initialize the CNN model.
+        
+        Args:
+            input_shape (tuple): Shape of input data (window_size, features)
+            output_size (int): Size of output (1 for regression, 3 for classification)
+        """
+        super(CNNPyTorch, self).__init__()
+        
+        window_size, num_features = input_shape
+        
+        # Architecture parameters
+        filters = [32, 64, 128]
+        kernel_sizes = [3, 5, 7]
+        lstm_units = 100
+        dense_units = 64
+        dropout_rate = 0.3
+        
+        # Create parallel convolutional pathways
+        self.conv_paths = nn.ModuleList()
+        
+        for f, k in zip(filters, kernel_sizes):
+            path = nn.Sequential(
+                nn.Conv1d(num_features, f, kernel_size=k, padding='same'),
+                nn.ReLU(),
+                nn.BatchNorm1d(f),
+                nn.MaxPool1d(kernel_size=2, stride=1, padding=1),
+                nn.Dropout(dropout_rate)
+            )
+            self.conv_paths.append(path)
+        
+        # Calculate output size from conv paths
+        conv_output_size = sum(filters) * window_size
+        
+        # LSTM layer
+        self.lstm = nn.LSTM(
+            input_size=sum(filters),
+            hidden_size=lstm_units,
+            batch_first=True,
+            bidirectional=True
+        )
+        
+        # Dense layers
+        self.flatten = nn.Flatten()
+        self.dense1 = nn.Sequential(
+            nn.Linear(lstm_units * 2 * window_size, dense_units),
+            nn.ReLU(),
+            nn.BatchNorm1d(dense_units),
+            nn.Dropout(dropout_rate)
+        )
+        
+        # Output layer
+        self.output = nn.Linear(dense_units, output_size)
+        
+        # Activation based on output size
+        if output_size == 1:
+            self.activation = nn.Sigmoid()  # Binary classification or regression
+        elif output_size > 1:
+            self.activation = nn.Softmax(dim=1)  # Multi-class classification
+        else:
+            self.activation = nn.Identity()  # No activation
+    
+    def forward(self, x):
+        """
+        Forward pass through the network.
+        
+        Args:
+            x: Input tensor of shape [batch_size, window_size, features]
+            
+        Returns:
+            Output tensor of shape [batch_size, output_size]
+        """
+        batch_size, window_size, num_features = x.shape
+        
+        # Transpose for conv1d: [batch, features, window]
+        x_t = x.transpose(1, 2)
+        
+        # Process through parallel conv paths
+        conv_outputs = []
+        for path in self.conv_paths:
+            conv_outputs.append(path(x_t))
+        
+        # Concatenate conv outputs
+        conv_concat = torch.cat(conv_outputs, dim=1)
+        
+        # Transpose back for LSTM: [batch, window, features]
+        conv_concat = conv_concat.transpose(1, 2)
+        
+        # LSTM processing
+        lstm_out, _ = self.lstm(conv_concat)
+        
+        # Flatten
+        flattened = self.flatten(lstm_out)
+        
+        # Dense processing
+        dense_out = self.dense1(flattened)
+        
+        # Output
+        output = self.output(dense_out)
+        
+        # Apply activation
+        return self.activation(output)
+
+
+class CNNModelPyTorch:
+    """
+    CNN model wrapper class for time series analysis using PyTorch.
+    
+    This class provides methods for building, training, evaluating, and making
+    predictions with the CNN model.
+    """
+    
+    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
+        """
+        Initialize the CNN model.
+        
+        Args:
+            window_size (int): Size of the input window
+            num_features (int): Number of features in the input data
+            output_size (int): Size of the output (1 for regression, 3 for classification)
+            timeframes (list): List of timeframes used (for logging)
+        """
+        self.window_size = window_size
+        self.num_features = num_features
+        self.output_size = output_size
+        self.timeframes = timeframes or []
+        
+        # Determine device (GPU or CPU)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        
+        # Initialize model
+        self.model = None
+        self.build_model()
+        
+        # Initialize training history
+        self.history = {
+            'loss': [],
+            'val_loss': [],
+            'accuracy': [],
+            'val_accuracy': []
+        }
+    
+    def build_model(self):
+        """Build the CNN model architecture"""
+        logger.info(f"Building PyTorch CNN model with window_size={self.window_size}, "
+                   f"num_features={self.num_features}, output_size={self.output_size}")
+        
+        self.model = CNNPyTorch(
+            input_shape=(self.window_size, self.num_features),
+            output_size=self.output_size
+        ).to(self.device)
+        
+        # Initialize optimizer
+        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
+        
+        # Initialize loss function based on output size
+        if self.output_size == 1:
+            self.criterion = nn.BCELoss()  # Binary classification
+        elif self.output_size > 1:
+            self.criterion = nn.CrossEntropyLoss()  # Multi-class classification
+        else:
+            self.criterion = nn.MSELoss()  # Regression
+        
+        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
+    
+    def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
+        """
+        Train the CNN model.
+        
+        Args:
+            X_train: Training input data
+            y_train: Training target data
+            X_val: Validation input data
+            y_val: Validation target data
+            batch_size: Batch size for training
+            epochs: Number of training epochs
+            
+        Returns:
+            Training history
+        """
+        logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, "
+                   f"batch_size={batch_size}, epochs={epochs}")
+        
+        # Convert numpy arrays to PyTorch tensors
+        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
+        
+        # Handle different output sizes for y_train
+        if self.output_size == 1:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
+        else:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
+        
+        # Create DataLoader for training data
+        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
+        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+        
+        # Create DataLoader for validation data if provided
+        if X_val is not None and y_val is not None:
+            X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
+            if self.output_size == 1:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
+            else:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
+                
+            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
+            val_loader = DataLoader(val_dataset, batch_size=batch_size)
+        else:
+            val_loader = None
+        
+        # Training loop
+        for epoch in range(epochs):
+            # Training phase
+            self.model.train()
+            running_loss = 0.0
+            correct = 0
+            total = 0
+            
+            for inputs, targets in train_loader:
+                # Zero the parameter gradients
+                self.optimizer.zero_grad()
+                
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                # Backward pass and optimize
+                loss.backward()
+                self.optimizer.step()
+                
+                # Statistics
+                running_loss += loss.item()
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+            
+            epoch_loss = running_loss / len(train_loader)
+            epoch_acc = correct / total if total > 0 else 0
+            
+            # Validation phase
+            if val_loader is not None:
+                val_loss, val_acc = self._validate(val_loader)
+                
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
+                           f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
+                
+                # Update history
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+                self.history['val_loss'].append(val_loss)
+                self.history['val_accuracy'].append(val_acc)
+            else:
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
+                
+                # Update history without validation
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+        
+        logger.info("Training completed")
+        return self.history
+    
+    def _validate(self, val_loader):
+        """Validate the model using the validation set"""
+        self.model.eval()
+        val_loss = 0.0
+        correct = 0
+        total = 0
+        
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                val_loss += loss.item()
+                
+                # Calculate accuracy
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+        
+        return val_loss / len(val_loader), correct / total if total > 0 else 0
+    
+    def evaluate(self, X_test, y_test):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test: Test input data
+            y_test: Test target data
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        logger.info(f"Evaluating model on {len(X_test)} samples")
+        
+        # Convert to PyTorch tensors
+        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            y_pred = self.model(X_test_tensor)
+            
+            if self.output_size > 1:
+                _, y_pred_class = torch.max(y_pred, 1)
+                y_pred_class = y_pred_class.cpu().numpy()
+            else:
+                y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
+        
+        # Calculate metrics
+        if self.output_size > 1:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class, average='weighted')
+            recall = recall_score(y_test, y_pred_class, average='weighted')
+            f1 = f1_score(y_test, y_pred_class, average='weighted')
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        else:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class)
+            recall = recall_score(y_test, y_pred_class)
+            f1 = f1_score(y_test, y_pred_class)
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        
+        logger.info(f"Evaluation metrics: {metrics}")
+        return metrics
+    
+    def predict(self, X):
+        """
+        Make predictions with the model.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Predictions
+        """
+        # Convert to PyTorch tensor
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            predictions = self.model(X_tensor)
+            
+            if self.output_size > 1:
+                # Multi-class classification
+                probs = predictions.cpu().numpy()
+                _, class_preds = torch.max(predictions, 1)
+                class_preds = class_preds.cpu().numpy()
+                return class_preds, probs
+            else:
+                # Binary classification or regression
+                preds = predictions.cpu().numpy()
+                if self.output_size == 1:
+                    # Binary classification
+                    class_preds = (preds > 0.5).astype(int)
+                    return class_preds.flatten(), preds.flatten()
+                else:
+                    # Regression
+                    return preds.flatten(), None
+    
+    def save(self, filepath):
+        """
+        Save the model to a file.
+        
+        Args:
+            filepath: Path to save the model
+        """
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        
+        # Save the model state
+        model_state = {
+            'model_state_dict': self.model.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'history': self.history,
+            'window_size': self.window_size,
+            'num_features': self.num_features,
+            'output_size': self.output_size,
+            'timeframes': self.timeframes
+        }
+        
+        torch.save(model_state, f"{filepath}.pt")
+        logger.info(f"Model saved to {filepath}.pt")
+    
+    def load(self, filepath):
+        """
+        Load the model from a file.
+        
+        Args:
+            filepath: Path to load the model from
+        """
+        # Check if file exists
+        if not os.path.exists(f"{filepath}.pt"):
+            logger.error(f"Model file {filepath}.pt not found")
+            return False
+        
+        # Load the model state
+        model_state = torch.load(f"{filepath}.pt", map_location=self.device)
+        
+        # Update model parameters
+        self.window_size = model_state['window_size']
+        self.num_features = model_state['num_features']
+        self.output_size = model_state['output_size']
+        self.timeframes = model_state['timeframes']
+        
+        # Rebuild the model
+        self.build_model()
+        
+        # Load the model state
+        self.model.load_state_dict(model_state['model_state_dict'])
+        self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
+        self.history = model_state['history']
+        
+        logger.info(f"Model loaded from {filepath}.pt")
+        return True
+    
+    def plot_training_history(self):
+        """Plot the training history"""
+        if not self.history['loss']:
+            logger.warning("No training history to plot")
+            return
+        
+        plt.figure(figsize=(12, 4))
+        
+        # Plot loss
+        plt.subplot(1, 2, 1)
+        plt.plot(self.history['loss'], label='Training Loss')
+        if 'val_loss' in self.history and self.history['val_loss']:
+            plt.plot(self.history['val_loss'], label='Validation Loss')
+        plt.title('Model Loss')
+        plt.ylabel('Loss')
+        plt.xlabel('Epoch')
+        plt.legend()
+        
+        # Plot accuracy
+        plt.subplot(1, 2, 2)
+        plt.plot(self.history['accuracy'], label='Training Accuracy')
+        if 'val_accuracy' in self.history and self.history['val_accuracy']:
+            plt.plot(self.history['val_accuracy'], label='Validation Accuracy')
+        plt.title('Model Accuracy')
+        plt.ylabel('Accuracy')
+        plt.xlabel('Epoch')
+        plt.legend()
+        
+        # Save the plot
+        os.makedirs('plots', exist_ok=True)
+        plt.savefig(os.path.join('plots', f"cnn_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"))
+        plt.close()
+        
+        logger.info("Training history plots saved to plots directory")
+    
+    def extract_hidden_features(self, X):
+        """
+        Extract hidden features from the model.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Hidden features
+        """
+        # Convert to PyTorch tensor
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        # Forward pass through the model up to the last hidden layer
+        self.model.eval()
+        with torch.no_grad():
+            # Get features before the output layer
+            x_t = X_tensor.transpose(1, 2)
+            
+            # Process through parallel conv paths
+            conv_outputs = []
+            for path in self.model.conv_paths:
+                conv_outputs.append(path(x_t))
+            
+            # Concatenate conv outputs
+            conv_concat = torch.cat(conv_outputs, dim=1)
+            
+            # Transpose back for LSTM
+            conv_concat = conv_concat.transpose(1, 2)
+            
+            # LSTM processing
+            lstm_out, _ = self.model.lstm(conv_concat)
+            
+            # Flatten
+            flattened = self.model.flatten(lstm_out)
+            
+            # Dense processing
+            hidden_features = self.model.dense1(flattened)
+        
+        return hidden_features.cpu().numpy() 
--- a/NN/models/transformer_model.py
+++ b/NN/models/transformer_model.py
@ -1,45 +1,38 @@
+"""
+Transformer Neural Network for timeseries analysis
+
+This module implements a Transformer model with attention mechanisms for cryptocurrency price analysis.
+It also includes a Mixture of Experts model that combines predictions from multiple models.
+"""
+
 import os
-import sys
+import logging
 import numpy as np
-import pandas as pd
+import matplotlib.pyplot as plt
 import tensorflow as tf
-from tensorflow.keras.models import Model
+from tensorflow.keras.models import Model, load_model
 from tensorflow.keras.layers import (
-    Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, 
-    GlobalAveragePooling1D, Concatenate, Add, Activation, Flatten
+    Input, Dense, Dropout, BatchNormalization, 
+    Concatenate, Layer, LayerNormalization, MultiHeadAttention,
+    Add, GlobalAveragePooling1D, Conv1D, Reshape
 )
 from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.callbacks import (
-    EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, 
-    TensorBoard, CSVLogger
-)
-import matplotlib.pyplot as plt
-import logging
-import time
+from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
 import datetime
+import json

-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler('nn_transformer_model.log')
-    ]
-)
+logger = logging.getLogger(__name__)

-logger = logging.getLogger('transformer_model')
-
-class TransformerBlock(tf.keras.layers.Layer):
+class TransformerBlock(Layer):
    """
-    Transformer block with multi-head self-attention and feed-forward network
+    Transformer block implementation with multi-head attention and feed-forward networks.
    """
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
-            Dense(embed_dim)
+            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
@ -47,33 +40,86 @@ class TransformerBlock(tf.keras.layers.Layer):
        self.dropout2 = Dropout(rate)
        
    def call(self, inputs, training=False):
-        # Normalization and attention
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
-        
-        # Feed-forward network
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
-        
-        # Skip connection and normalization
        return self.layernorm2(out1 + ffn_output)
    
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'att': self.att,
+            'ffn': self.ffn,
+            'layernorm1': self.layernorm1,
+            'layernorm2': self.layernorm2,
+            'dropout1': self.dropout1,
+            'dropout2': self.dropout2
+        })
+        return config
+
+class PositionalEncoding(Layer):
+    """
+    Positional encoding layer to add position information to input embeddings.
+    """
+    def __init__(self, position, d_model):
+        super(PositionalEncoding, self).__init__()
+        self.position = position
+        self.d_model = d_model
+        self.pos_encoding = self.positional_encoding(position, d_model)
+        
+    def get_angles(self, position, i, d_model):
+        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
+        return position * angles
+    
+    def positional_encoding(self, position, d_model):
+        angle_rads = self.get_angles(
+            position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
+            i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
+            d_model=d_model
+        )
+        
+        # Apply sin to even indices in the array
+        sines = tf.math.sin(angle_rads[:, 0::2])
+        
+        # Apply cos to odd indices in the array
+        cosines = tf.math.cos(angle_rads[:, 1::2])
+        
+        pos_encoding = tf.concat([sines, cosines], axis=-1)
+        pos_encoding = pos_encoding[tf.newaxis, ...]
+        
+        return tf.cast(pos_encoding, tf.float32)
+    
+    def call(self, inputs):
+        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
+    
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'position': self.position,
+            'd_model': self.d_model,
+            'pos_encoding': self.pos_encoding
+        })
+        return config
+
 class TransformerModel:
    """
-    Transformer-based model for financial time series analysis.
-    This model processes both raw time series data and high-level features from the CNN model.
+    Transformer Neural Network for time series analysis.
+    
+    This model uses self-attention mechanisms to capture relationships between
+    different time points in the input data.
    """
    
-    def __init__(self, ts_input_shape=(20, 5), feature_input_shape=128, output_size=3, model_dir='NN/models/saved'):
+    def __init__(self, ts_input_shape=(20, 5), feature_input_shape=64, output_size=1, model_dir="NN/models/saved"):
        """
-        Initialize the Transformer model
+        Initialize the Transformer model.
        
        Args:
-            ts_input_shape: Shape of time series input data (sequence_length, features)
-            feature_input_shape: Shape of high-level feature input (from CNN)
-            output_size: Number of output classes or values
-            model_dir: Directory to save model files
+            ts_input_shape (tuple): Shape of time series input data (sequence_length, features)
+            feature_input_shape (int): Shape of additional feature input (e.g., from CNN)
+            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
+            model_dir (str): Directory to save trained models
        """
        self.ts_input_shape = ts_input_shape
        self.feature_input_shape = feature_input_shape
@ -83,341 +129,418 @@ class TransformerModel:
        self.history = None
        
        # Create model directory if it doesn't exist
-        os.makedirs(model_dir, exist_ok=True)
+        os.makedirs(self.model_dir, exist_ok=True)
        
-        logger.info(f"Initialized TransformerModel with time series input shape {ts_input_shape}, "
+        logger.info(f"Initialized Transformer model with TS input shape {ts_input_shape}, "
                   f"feature input shape {feature_input_shape}, and output size {output_size}")
    
-    def build_model(self, embed_dim=64, num_heads=4, ff_dim=128, num_transformer_blocks=2, 
-                   dropout_rate=0.2, learning_rate=0.001):
+    def build_model(self, embed_dim=32, num_heads=4, ff_dim=64, num_transformer_blocks=2, dropout_rate=0.1, learning_rate=0.001):
        """
-        Build the Transformer model architecture
+        Build the Transformer model architecture.
        
        Args:
-            embed_dim: Embedding dimension for the transformer
-            num_heads: Number of attention heads
-            ff_dim: Hidden layer size in the feed-forward network
-            num_transformer_blocks: Number of transformer blocks to stack
-            dropout_rate: Dropout rate for regularization
-            learning_rate: Learning rate for the optimizer
+            embed_dim (int): Embedding dimension for transformer
+            num_heads (int): Number of attention heads
+            ff_dim (int): Hidden dimension of the feed forward network
+            num_transformer_blocks (int): Number of transformer blocks
+            dropout_rate (float): Dropout rate for regularization
+            learning_rate (float): Learning rate for Adam optimizer
            
        Returns:
-            Compiled Keras model
+            The compiled model
        """
-        # Time series input (price and volume data)
-        ts_inputs = Input(shape=self.ts_input_shape, name='time_series_input')
+        # Time series input
+        ts_inputs = Input(shape=self.ts_input_shape, name="ts_input")
        
-        # High-level feature input (from CNN or other sources)
-        feature_inputs = Input(shape=(self.feature_input_shape,), name='feature_input')
+        # Additional feature input (e.g., from CNN)
+        feature_inputs = Input(shape=(self.feature_input_shape,), name="feature_input")
        
-        # Process time series with transformer blocks
-        x = ts_inputs
+        # Process time series with transformer
+        # First, project the input to the embedding dimension
+        x = Conv1D(embed_dim, 1, activation="relu")(ts_inputs)
+        
+        # Add positional encoding
+        x = PositionalEncoding(self.ts_input_shape[0], embed_dim)(x)
+        
+        # Add transformer blocks
        for _ in range(num_transformer_blocks):
            x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
        
-        # Global pooling to get fixed-size representation
+        # Global pooling to get a single vector representation
        x = GlobalAveragePooling1D()(x)
+        x = Dropout(dropout_rate)(x)
        
-        # Combine with the high-level features
+        # Combine with additional features
        combined = Concatenate()([x, feature_inputs])
        
-        # Dense layers
-        dense1 = Dense(128, activation='relu')(combined)
-        dropout1 = Dropout(dropout_rate)(dense1)
-        dense2 = Dense(64, activation='relu')(dropout1)
-        dropout2 = Dropout(dropout_rate)(dense2)
+        # Dense layers for final classification/regression
+        x = Dense(64, activation="relu")(combined)
+        x = BatchNormalization()(x)
+        x = Dropout(dropout_rate)(x)
        
        # Output layer
        if self.output_size == 1:
-            # Binary classification
-            outputs = Dense(1, activation='sigmoid')(dropout2)
+            # Binary classification (up/down)
+            outputs = Dense(1, activation='sigmoid', name='output')(x)
+            loss = 'binary_crossentropy'
+            metrics = ['accuracy']
        elif self.output_size == 3:
-            # For BUY/HOLD/SELL signals (3 classes)
-            outputs = Dense(3, activation='softmax')(dropout2)
-        else:
-            # Regression or multi-class classification
-            outputs = Dense(self.output_size, activation='linear')(dropout2)
-        
-        # Create and compile the model
-        model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
-        
-        if self.output_size == 1:
-            # Binary classification
-            model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='binary_crossentropy',
-                metrics=['accuracy']
-            )
-        elif self.output_size == 3:
-            # Multi-class classification for BUY/HOLD/SELL
-            model.compile(
-                optimizer=Adam(learning_rate=learning_rate),
-                loss='categorical_crossentropy',
-                metrics=['accuracy']
-            )
+            # Multi-class classification (buy/hold/sell)
+            outputs = Dense(3, activation='softmax', name='output')(x)
+            loss = 'categorical_crossentropy'
+            metrics = ['accuracy']
        else:
            # Regression
-            model.compile(
+            outputs = Dense(self.output_size, activation='linear', name='output')(x)
+            loss = 'mse'
+            metrics = ['mae']
+        
+        # Create and compile model
+        self.model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
+        
+        # Compile with Adam optimizer
+        self.model.compile(
            optimizer=Adam(learning_rate=learning_rate),
-                loss='mse',
-                metrics=['mae']
+            loss=loss,
+            metrics=metrics
        )
        
-        self.model = model
-        logger.info(f"Model built with {model.count_params()} parameters")
-        model.summary(print_fn=logger.info)
+        # Log model summary
+        self.model.summary(print_fn=lambda x: logger.info(x))
        
-        return model
+        return self.model
    
    def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
-             early_stopping_patience=20, reduce_lr_patience=10, verbose=1):
+             callbacks=None, class_weights=None):
        """
-        Train the Transformer model
+        Train the Transformer model on the provided data.
        
        Args:
-            X_ts: Time series input data
-            X_features: High-level feature input data
-            y: Target values
-            batch_size: Batch size for training
-            epochs: Maximum number of epochs
-            validation_split: Fraction of data to use for validation
-            early_stopping_patience: Patience for early stopping
-            reduce_lr_patience: Patience for learning rate reduction
-            verbose: Verbosity level
+            X_ts (numpy.ndarray): Time series input features
+            X_features (numpy.ndarray): Additional input features
+            y (numpy.ndarray): Target labels
+            batch_size (int): Batch size
+            epochs (int): Number of epochs
+            validation_split (float): Fraction of data to use for validation
+            callbacks (list): List of Keras callbacks
+            class_weights (dict): Class weights for imbalanced datasets
            
        Returns:
-            Training history
+            History object containing training metrics
        """
        if self.model is None:
-            logger.warning("Model not built yet, building with default parameters")
            self.build_model()
        
-        # Create a timestamp for this training run
+        # Default callbacks if none provided
+        if callbacks is None:
+            # Create a timestamp for model checkpoints
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        model_name = f"transformer_model_{timestamp}"
            
-        # Set up callbacks
            callbacks = [
-            # Early stopping to prevent overfitting
                EarlyStopping(
                    monitor='val_loss',
-                patience=early_stopping_patience,
-                restore_best_weights=True,
-                verbose=1
+                    patience=10,
+                    restore_best_weights=True
                ),
-            
-            # Reduce learning rate when training plateaus
                ReduceLROnPlateau(
                    monitor='val_loss',
                    factor=0.5,
-                patience=reduce_lr_patience, 
-                min_lr=1e-6,
-                verbose=1
+                    patience=5,
+                    min_lr=1e-6
                ),
-            
-            # Save the best model
                ModelCheckpoint(
-                filepath=os.path.join(self.model_dir, f"{model_name}_best.h5"),
+                    filepath=os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5"),
                    monitor='val_loss',
-                save_best_only=True,
-                verbose=1
-            ),
-            
-            # TensorBoard logging
-            TensorBoard(
-                log_dir=os.path.join(self.model_dir, 'logs', model_name),
-                histogram_freq=1
-            ),
-            
-            # CSV logging
-            CSVLogger(
-                filename=os.path.join(self.model_dir, f"{model_name}_training.csv"),
-                separator=',', 
-                append=False
+                    save_best_only=True
                )
            ]
        
-        # Train the model
-        logger.info(f"Starting training with {len(X_ts)} samples, {epochs} max epochs")
+        # Check if y needs to be one-hot encoded for multi-class
+        if self.output_size == 3 and len(y.shape) == 1:
+            y = tf.keras.utils.to_categorical(y, num_classes=3)
        
-        start_time = time.time()
-        history = self.model.fit(
+        # Train the model
+        logger.info(f"Training Transformer model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
+        self.history = self.model.fit(
            [X_ts, X_features], y,
            batch_size=batch_size,
            epochs=epochs,
            validation_split=validation_split,
            callbacks=callbacks,
-            verbose=verbose
+            class_weight=class_weights,
+            verbose=2
        )
        
-        # Calculate training time
-        training_time = time.time() - start_time
-        logger.info(f"Training completed in {training_time:.2f} seconds")
-        
-        # Save the final model
-        self.model.save(os.path.join(self.model_dir, f"{model_name}_final.h5"))
-        logger.info(f"Model saved to {os.path.join(self.model_dir, model_name + '_final.h5')}")
+        # Save the trained model
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_path = os.path.join(self.model_dir, f"transformer_model_final_{timestamp}.h5")
+        self.model.save(model_path)
+        logger.info(f"Model saved to {model_path}")
        
        # Save training history
-        hist_df = pd.DataFrame(history.history)
-        hist_df.to_csv(os.path.join(self.model_dir, f"{model_name}_history.csv"), index=False)
+        history_path = os.path.join(self.model_dir, f"transformer_model_history_{timestamp}.json")
+        with open(history_path, 'w') as f:
+            # Convert numpy values to Python native types for JSON serialization
+            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
+            json.dump(history_dict, f, indent=2)
        
-        self.history = history
-        return history
+        return self.history
    
-    def predict(self, X_ts, X_features, threshold=0.5):
+    def evaluate(self, X_ts, X_features, y):
        """
-        Make predictions with the model
+        Evaluate the model on test data.
        
        Args:
-            X_ts: Time series input data
-            X_features: High-level feature input data
-            threshold: Threshold for binary classification
+            X_ts (numpy.ndarray): Time series input features
+            X_features (numpy.ndarray): Additional input features
+            y (numpy.ndarray): Target labels
            
        Returns:
-            Predicted values or classes
+            dict: Evaluation metrics
        """
        if self.model is None:
-            logger.error("Model not built or trained yet")
-            return None
+            raise ValueError("Model has not been built or trained yet")
        
-        # Get raw predictions
-        y_pred_proba = self.model.predict([X_ts, X_features])
+        # Convert y to one-hot encoding for multi-class
+        if self.output_size == 3 and len(y.shape) == 1:
+            y = tf.keras.utils.to_categorical(y, num_classes=3)
        
-        # Format predictions based on output type
+        # Evaluate model
+        logger.info(f"Evaluating Transformer model on {len(X_ts)} samples")
+        eval_results = self.model.evaluate([X_ts, X_features], y, verbose=0)
+        
+        metrics = {}
+        for metric, value in zip(self.model.metrics_names, eval_results):
+            metrics[metric] = value
+            logger.info(f"{metric}: {value:.4f}")
+        
+        return metrics
+    
+    def predict(self, X_ts, X_features=None):
+        """
+        Make predictions on new data.
+        
+        Args:
+            X_ts (numpy.ndarray): Time series input features
+            X_features (numpy.ndarray): Additional input features
+            
+        Returns:
+            tuple: (y_pred, y_proba) where:
+                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
+                y_proba is the class probability
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Ensure X_ts has the right shape
+        if len(X_ts.shape) == 2:
+            # Single sample, add batch dimension
+            X_ts = np.expand_dims(X_ts, axis=0)
+        
+        # Ensure X_features has the right shape
+        if X_features is None:
+            # Create dummy features with zeros
+            X_features = np.zeros((X_ts.shape[0], self.feature_input_shape))
+        elif len(X_features.shape) == 1:
+            # Single sample, add batch dimension
+            X_features = np.expand_dims(X_features, axis=0)
+        
+        # Get predictions
+        y_proba = self.model.predict([X_ts, X_features])
+        
+        # Process based on output type
        if self.output_size == 1:
            # Binary classification
-            y_pred = (y_pred_proba > threshold).astype(int).flatten()
-            return y_pred, y_pred_proba.flatten()
+            y_pred = (y_proba > 0.5).astype(int).flatten()
+            return y_pred, y_proba.flatten()
        elif self.output_size == 3:
-            # Multi-class (BUY/HOLD/SELL)
-            y_pred = np.argmax(y_pred_proba, axis=1)
-            return y_pred, y_pred_proba
+            # Multi-class classification
+            y_pred = np.argmax(y_proba, axis=1)
+            return y_pred, y_proba
        else:
            # Regression
-            return y_pred_proba
+            return y_proba, y_proba
    
-    def save_model(self, filepath=None):
+    def save(self, filepath=None):
        """
-        Save the model to a file
+        Save the model to disk.
        
        Args:
-            filepath: Path to save the model to
+            filepath (str): Path to save the model
            
        Returns:
-            Path to the saved model
+            str: Path where the model was saved
        """
        if self.model is None:
-            logger.error("Model not built or trained yet")
-            return None
+            raise ValueError("Model has not been built yet")
        
        if filepath is None:
-            # Create a default filepath
+            # Create a default filepath with timestamp
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            filepath = os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5")
        
        self.model.save(filepath)
        logger.info(f"Model saved to {filepath}")
-        
        return filepath
    
-    def load_model(self, filepath):
+    def load(self, filepath):
        """
-        Load a model from a file
+        Load a saved model from disk.
        
        Args:
-            filepath: Path to load the model from
+            filepath (str): Path to the saved model
            
        Returns:
-            Loaded model
+            The loaded model
        """
-        try:
-            self.model = tf.keras.models.load_model(filepath)
+        # Register custom layers
+        custom_objects = {
+            'TransformerBlock': TransformerBlock,
+            'PositionalEncoding': PositionalEncoding
+        }
+        
+        self.model = load_model(filepath, custom_objects=custom_objects)
        logger.info(f"Model loaded from {filepath}")
        return self.model
-        except Exception as e:
-            logger.error(f"Error loading model: {str(e)}")
-            return None
+    
+    def plot_training_history(self):
+        """
+        Plot training history (loss and metrics).
+        
+        Returns:
+            str: Path to the saved plot
+        """
+        if self.history is None:
+            raise ValueError("Model has not been trained yet")
+        
+        plt.figure(figsize=(12, 5))
+        
+        # Plot loss
+        plt.subplot(1, 2, 1)
+        plt.plot(self.history.history['loss'], label='Training Loss')
+        if 'val_loss' in self.history.history:
+            plt.plot(self.history.history['val_loss'], label='Validation Loss')
+        plt.title('Model Loss')
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend()
+        
+        # Plot accuracy
+        plt.subplot(1, 2, 2)
+        
+        if 'accuracy' in self.history.history:
+            plt.plot(self.history.history['accuracy'], label='Training Accuracy')
+            if 'val_accuracy' in self.history.history:
+                plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
+            plt.title('Model Accuracy')
+            plt.ylabel('Accuracy')
+        elif 'mae' in self.history.history:
+            plt.plot(self.history.history['mae'], label='Training MAE')
+            if 'val_mae' in self.history.history:
+                plt.plot(self.history.history['val_mae'], label='Validation MAE')
+            plt.title('Model MAE')
+            plt.ylabel('MAE')
+        
+        plt.xlabel('Epoch')
+        plt.legend()
+        
+        plt.tight_layout()
+        
+        # Save figure
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        fig_path = os.path.join(self.model_dir, f"transformer_training_history_{timestamp}.png")
+        plt.savefig(fig_path)
+        plt.close()
+        
+        logger.info(f"Training history plot saved to {fig_path}")
+        return fig_path
+

 class MixtureOfExpertsModel:
    """
-    Mixture of Experts (MoE) model that combines predictions from multiple models.
-    This implementation focuses on combining CNN and Transformer models for financial analysis.
+    Mixture of Experts (MoE) model.
+    
+    This model combines predictions from multiple expert models (such as CNN and Transformer)
+    using a weighted ensemble approach.
    """
    
-    def __init__(self, output_size=3, model_dir='NN/models/saved'):
+    def __init__(self, output_size=1, model_dir="NN/models/saved"):
        """
-        Initialize the MoE model
+        Initialize the MoE model.
        
        Args:
-            output_size: Number of output classes or values
-            model_dir: Directory to save model files
+            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
+            model_dir (str): Directory to save trained models
        """
        self.output_size = output_size
        self.model_dir = model_dir
-        self.models = {}  # Dictionary to store expert models
-        self.gating_model = None  # Model to determine which expert to use
-        self.model = None  # Combined MoE model
+        self.model = None
+        self.history = None
+        self.experts = {}
        
        # Create model directory if it doesn't exist
-        os.makedirs(model_dir, exist_ok=True)
+        os.makedirs(self.model_dir, exist_ok=True)
        
-        logger.info(f"Initialized MixtureOfExpertsModel with output size {output_size}")
+        logger.info(f"Initialized Mixture of Experts model with output size {output_size}")
    
    def add_expert(self, name, model):
        """
-        Add an expert model to the MoE
+        Add an expert model to the MoE.
        
        Args:
-            name: Name of the expert
-            model: Expert model instance
+            name (str): Name of the expert model
+            model: The expert model instance
            
        Returns:
            None
        """
-        self.models[name] = model
+        self.experts[name] = model
        logger.info(f"Added expert model '{name}' to MoE")
    
    def build_model(self, ts_input_shape=(20, 5), expert_weights=None, learning_rate=0.001):
        """
-        Build the MoE model architecture
+        Build the MoE model by combining expert models.
        
        Args:
-            ts_input_shape: Shape of time series input data
-            expert_weights: Dictionary of expert weights (if None, equal weighting)
-            learning_rate: Learning rate for the optimizer
+            ts_input_shape (tuple): Shape of time series input data
+            expert_weights (dict): Weights for each expert model
+            learning_rate (float): Learning rate for Adam optimizer
            
        Returns:
-            Compiled Keras model
+            The compiled model
        """
-        if not self.models:
-            logger.error("No expert models added to MoE")
-            return None
-        
        # Time series input
-        ts_inputs = Input(shape=ts_input_shape, name='time_series_input')
+        ts_inputs = Input(shape=ts_input_shape, name="ts_input")
        
-        # Get predictions from each expert
+        # Additional feature input (from CNN)
+        feature_inputs = Input(shape=(64,), name="feature_input")  # Default size for features
+        
+        # Process with each expert model
        expert_outputs = []
        expert_names = []
        
-        for name, model in self.models.items():
-            if hasattr(model, 'predict') and callable(model.predict):
-                expert_names.append(name)
+        for name, expert in self.experts.items():
+            # Skip if expert model is not valid or doesn't have a call/predict method
+            if expert is None:
+                logger.warning(f"Expert model '{name}' is None, skipping")
+                continue
+                
+            try:
+                # Different handling based on model type
                if name == 'cnn':
-                    # For CNN, we directly use the time series input
-                    # We need to extract the raw prediction function from the model's predict method
-                    # which typically returns both predictions and probabilities
-                    expert_outputs.append(model.model(ts_inputs))
+                    # CNN model takes only time series input
+                    expert_output = expert(ts_inputs)
+                    expert_outputs.append(expert_output)
+                    expert_names.append(name)
                elif name == 'transformer':
-                    # For transformer, we need features from the CNN as well
-                    # This is a simplification - in a real implementation, we would need to
-                    # extract features from the CNN model and pass them to the transformer
-                    # Here we just create dummy features
-                    dummy_features = Dense(128, activation='relu')(Flatten()(ts_inputs))
-                    expert_outputs.append(model.model([ts_inputs, dummy_features]))
+                    # Transformer model takes both time series and feature inputs
+                    expert_output = expert([ts_inputs, feature_inputs])
+                    expert_outputs.append(expert_output)
+                    expert_names.append(name)
                else:
-                    logger.warning(f"Unknown model type: {name}, skipping")
+                    logger.warning(f"Unknown expert model type: {name}")
+            except Exception as e:
+                logger.error(f"Error adding expert '{name}': {str(e)}")
        
        if not expert_outputs:
            logger.error("No valid expert models found")
@ -443,7 +566,7 @@ class MixtureOfExpertsModel:
            combined_output = Add()(weighted_outputs)
        
        # Create the MoE model
-        moe_model = Model(inputs=ts_inputs, outputs=combined_output)
+        moe_model = Model(inputs=[ts_inputs, feature_inputs], outputs=combined_output)
        
        # Compile the model
        if self.output_size == 1:
@ -469,83 +592,176 @@ class MixtureOfExpertsModel:
            )
        
        self.model = moe_model
-        logger.info(f"MoE model built with experts: {expert_names}, weights: {weights}")
-        moe_model.summary(print_fn=logger.info)
        
-        return moe_model
+        # Log model summary
+        self.model.summary(print_fn=lambda x: logger.info(x))
        
-    def predict(self, X, threshold=0.5):
+        logger.info(f"Built MoE model with weights: {weights}")
+        return self.model
+    
+    def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
+             callbacks=None, class_weights=None):
        """
-        Make predictions with the MoE model
+        Train the MoE model on the provided data.
        
        Args:
-            X: Input data
-            threshold: Threshold for binary classification
+            X_ts (numpy.ndarray): Time series input features
+            X_features (numpy.ndarray): Additional input features
+            y (numpy.ndarray): Target labels
+            batch_size (int): Batch size
+            epochs (int): Number of epochs
+            validation_split (float): Fraction of data to use for validation
+            callbacks (list): List of Keras callbacks
+            class_weights (dict): Class weights for imbalanced datasets
            
        Returns:
-            Predicted values or classes
+            History object containing training metrics
        """
        if self.model is None:
-            logger.error("MoE model not built yet")
+            logger.error("MoE model has not been built yet")
            return None
        
-        # Get raw predictions
-        y_pred_proba = self.model.predict(X)
+        # Default callbacks if none provided
+        if callbacks is None:
+            # Create a timestamp for model checkpoints
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            
-        # Format predictions based on output type
+            callbacks = [
+                EarlyStopping(
+                    monitor='val_loss',
+                    patience=10,
+                    restore_best_weights=True
+                ),
+                ReduceLROnPlateau(
+                    monitor='val_loss',
+                    factor=0.5,
+                    patience=5,
+                    min_lr=1e-6
+                ),
+                ModelCheckpoint(
+                    filepath=os.path.join(self.model_dir, f"moe_model_{timestamp}.h5"),
+                    monitor='val_loss',
+                    save_best_only=True
+                )
+            ]
+        
+        # Check if y needs to be one-hot encoded for multi-class
+        if self.output_size == 3 and len(y.shape) == 1:
+            y = tf.keras.utils.to_categorical(y, num_classes=3)
+        
+        # Train the model
+        logger.info(f"Training MoE model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
+        self.history = self.model.fit(
+            [X_ts, X_features], y,
+            batch_size=batch_size,
+            epochs=epochs,
+            validation_split=validation_split,
+            callbacks=callbacks,
+            class_weight=class_weights,
+            verbose=2
+        )
+        
+        # Save the trained model
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_path = os.path.join(self.model_dir, f"moe_model_final_{timestamp}.h5")
+        self.model.save(model_path)
+        logger.info(f"Model saved to {model_path}")
+        
+        # Save training history
+        history_path = os.path.join(self.model_dir, f"moe_model_history_{timestamp}.json")
+        with open(history_path, 'w') as f:
+            # Convert numpy values to Python native types for JSON serialization
+            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
+            json.dump(history_dict, f, indent=2)
+        
+        return self.history
+    
+    def predict(self, X_ts, X_features=None):
+        """
+        Make predictions on new data.
+        
+        Args:
+            X_ts (numpy.ndarray): Time series input features
+            X_features (numpy.ndarray): Additional input features
+            
+        Returns:
+            tuple: (y_pred, y_proba) where:
+                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
+                y_proba is the class probability
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Ensure X_ts has the right shape
+        if len(X_ts.shape) == 2:
+            # Single sample, add batch dimension
+            X_ts = np.expand_dims(X_ts, axis=0)
+        
+        # Ensure X_features has the right shape
+        if X_features is None:
+            # Create dummy features with zeros
+            X_features = np.zeros((X_ts.shape[0], 64))  # Default size
+        elif len(X_features.shape) == 1:
+            # Single sample, add batch dimension
+            X_features = np.expand_dims(X_features, axis=0)
+        
+        # Get predictions
+        y_proba = self.model.predict([X_ts, X_features])
+        
+        # Process based on output type
        if self.output_size == 1:
            # Binary classification
-            y_pred = (y_pred_proba > threshold).astype(int).flatten()
-            return y_pred, y_pred_proba.flatten()
+            y_pred = (y_proba > 0.5).astype(int).flatten()
+            return y_pred, y_proba.flatten()
        elif self.output_size == 3:
-            # Multi-class (BUY/HOLD/SELL)
-            y_pred = np.argmax(y_pred_proba, axis=1)
-            return y_pred, y_pred_proba
+            # Multi-class classification
+            y_pred = np.argmax(y_proba, axis=1)
+            return y_pred, y_proba
        else:
            # Regression
-            return y_pred_proba
+            return y_proba, y_proba
    
-    def save_model(self, filepath=None):
+    def save(self, filepath=None):
        """
-        Save the MoE model to a file
+        Save the model to disk.
        
        Args:
-            filepath: Path to save the model to
+            filepath (str): Path to save the model
            
        Returns:
-            Path to the saved model
+            str: Path where the model was saved
        """
        if self.model is None:
-            logger.error("MoE model not built yet")
-            return None
+            raise ValueError("Model has not been built yet")
        
        if filepath is None:
-            # Create a default filepath
+            # Create a default filepath with timestamp
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            filepath = os.path.join(self.model_dir, f"moe_model_{timestamp}.h5")
        
        self.model.save(filepath)
-        logger.info(f"MoE model saved to {filepath}")
-        
+        logger.info(f"Model saved to {filepath}")
        return filepath
    
-    def load_model(self, filepath):
+    def load(self, filepath):
        """
-        Load an MoE model from a file
+        Load a saved model from disk.
        
        Args:
-            filepath: Path to load the model from
+            filepath (str): Path to the saved model
            
        Returns:
-            Loaded model
+            The loaded model
        """
-        try:
-            self.model = tf.keras.models.load_model(filepath)
-            logger.info(f"MoE model loaded from {filepath}")
+        # Register custom layers
+        custom_objects = {
+            'TransformerBlock': TransformerBlock,
+            'PositionalEncoding': PositionalEncoding
+        }
+        
+        self.model = load_model(filepath, custom_objects=custom_objects)
+        logger.info(f"Model loaded from {filepath}")
        return self.model
-        except Exception as e:
-            logger.error(f"Error loading MoE model: {str(e)}")
-            return None

 # Example usage:
 if __name__ == "__main__":
--- a/NN/models/transformer_model_pytorch.py
+++ b/NN/models/transformer_model_pytorch.py
@ -0,0 +1,653 @@
+#!/usr/bin/env python3
+"""
+Transformer Model - PyTorch Implementation
+
+This module implements a Transformer model using PyTorch for time series analysis.
+The model consists of a Transformer encoder and a Mixture of Experts model.
+"""
+
+import os
+import logging
+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+class TransformerBlock(nn.Module):
+    """Transformer Block with self-attention mechanism"""
+    
+    def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
+        super(TransformerBlock, self).__init__()
+        
+        self.attention = nn.MultiheadAttention(
+            embed_dim=input_dim,
+            num_heads=num_heads,
+            dropout=dropout,
+            batch_first=True
+        )
+        
+        self.feed_forward = nn.Sequential(
+            nn.Linear(input_dim, ff_dim),
+            nn.ReLU(),
+            nn.Linear(ff_dim, input_dim)
+        )
+        
+        self.layernorm1 = nn.LayerNorm(input_dim)
+        self.layernorm2 = nn.LayerNorm(input_dim)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+    
+    def forward(self, x):
+        # Self-attention
+        attn_output, _ = self.attention(x, x, x)
+        x = x + self.dropout1(attn_output)
+        x = self.layernorm1(x)
+        
+        # Feed forward
+        ff_output = self.feed_forward(x)
+        x = x + self.dropout2(ff_output)
+        x = self.layernorm2(x)
+        
+        return x
+
+class TransformerModelPyTorch(nn.Module):
+    """PyTorch Transformer model for time series analysis"""
+    
+    def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
+        """
+        Initialize the Transformer model.
+        
+        Args:
+            input_shape (tuple): Shape of input data (window_size, features)
+            output_size (int): Size of output (1 for regression, 3 for classification)
+            num_heads (int): Number of attention heads
+            ff_dim (int): Feed forward dimension
+            num_transformer_blocks (int): Number of transformer blocks
+        """
+        super(TransformerModelPyTorch, self).__init__()
+        
+        window_size, num_features = input_shape
+        
+        # Positional encoding
+        self.pos_encoding = nn.Parameter(
+            torch.zeros(1, window_size, num_features),
+            requires_grad=True
+        )
+        
+        # Transformer blocks
+        self.transformer_blocks = nn.ModuleList([
+            TransformerBlock(
+                input_dim=num_features,
+                num_heads=num_heads,
+                ff_dim=ff_dim
+            ) for _ in range(num_transformer_blocks)
+        ])
+        
+        # Global average pooling
+        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
+        
+        # Dense layers
+        self.dense = nn.Sequential(
+            nn.Linear(num_features, 64),
+            nn.ReLU(),
+            nn.BatchNorm1d(64),
+            nn.Dropout(0.3),
+            nn.Linear(64, output_size)
+        )
+        
+        # Activation based on output size
+        if output_size == 1:
+            self.activation = nn.Sigmoid()  # Binary classification or regression
+        elif output_size > 1:
+            self.activation = nn.Softmax(dim=1)  # Multi-class classification
+        else:
+            self.activation = nn.Identity()  # No activation
+    
+    def forward(self, x):
+        """
+        Forward pass through the network.
+        
+        Args:
+            x: Input tensor of shape [batch_size, window_size, features]
+            
+        Returns:
+            Output tensor of shape [batch_size, output_size]
+        """
+        # Add positional encoding
+        x = x + self.pos_encoding
+        
+        # Apply transformer blocks
+        for transformer_block in self.transformer_blocks:
+            x = transformer_block(x)
+        
+        # Global average pooling
+        x = x.transpose(1, 2)  # [batch, features, window]
+        x = self.global_avg_pool(x)  # [batch, features, 1]
+        x = x.squeeze(-1)  # [batch, features]
+        
+        # Dense layers
+        x = self.dense(x)
+        
+        # Apply activation
+        return self.activation(x)
+
+
+class TransformerModelPyTorchWrapper:
+    """
+    Transformer model wrapper class for time series analysis using PyTorch.
+    
+    This class provides methods for building, training, evaluating, and making
+    predictions with the Transformer model.
+    """
+    
+    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
+        """
+        Initialize the Transformer model.
+        
+        Args:
+            window_size (int): Size of the input window
+            num_features (int): Number of features in the input data
+            output_size (int): Size of the output (1 for regression, 3 for classification)
+            timeframes (list): List of timeframes used (for logging)
+        """
+        self.window_size = window_size
+        self.num_features = num_features
+        self.output_size = output_size
+        self.timeframes = timeframes or []
+        
+        # Determine device (GPU or CPU)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        
+        # Initialize model
+        self.model = None
+        self.build_model()
+        
+        # Initialize training history
+        self.history = {
+            'loss': [],
+            'val_loss': [],
+            'accuracy': [],
+            'val_accuracy': []
+        }
+    
+    def build_model(self):
+        """Build the Transformer model architecture"""
+        logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
+                   f"num_features={self.num_features}, output_size={self.output_size}")
+        
+        self.model = TransformerModelPyTorch(
+            input_shape=(self.window_size, self.num_features),
+            output_size=self.output_size
+        ).to(self.device)
+        
+        # Initialize optimizer
+        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
+        
+        # Initialize loss function based on output size
+        if self.output_size == 1:
+            self.criterion = nn.BCELoss()  # Binary classification
+        elif self.output_size > 1:
+            self.criterion = nn.CrossEntropyLoss()  # Multi-class classification
+        else:
+            self.criterion = nn.MSELoss()  # Regression
+        
+        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
+    
+    def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
+        """
+        Train the Transformer model.
+        
+        Args:
+            X_train: Training input data
+            y_train: Training target data
+            X_val: Validation input data
+            y_val: Validation target data
+            batch_size: Batch size for training
+            epochs: Number of training epochs
+            
+        Returns:
+            Training history
+        """
+        logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
+                   f"batch_size={batch_size}, epochs={epochs}")
+        
+        # Convert numpy arrays to PyTorch tensors
+        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
+        
+        # Handle different output sizes for y_train
+        if self.output_size == 1:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
+        else:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
+        
+        # Create DataLoader for training data
+        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
+        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+        
+        # Create DataLoader for validation data if provided
+        if X_val is not None and y_val is not None:
+            X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
+            if self.output_size == 1:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
+            else:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
+                
+            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
+            val_loader = DataLoader(val_dataset, batch_size=batch_size)
+        else:
+            val_loader = None
+        
+        # Training loop
+        for epoch in range(epochs):
+            # Training phase
+            self.model.train()
+            running_loss = 0.0
+            correct = 0
+            total = 0
+            
+            for inputs, targets in train_loader:
+                # Zero the parameter gradients
+                self.optimizer.zero_grad()
+                
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                # Backward pass and optimize
+                loss.backward()
+                self.optimizer.step()
+                
+                # Statistics
+                running_loss += loss.item()
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+            
+            epoch_loss = running_loss / len(train_loader)
+            epoch_acc = correct / total if total > 0 else 0
+            
+            # Validation phase
+            if val_loader is not None:
+                val_loss, val_acc = self._validate(val_loader)
+                
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
+                           f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
+                
+                # Update history
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+                self.history['val_loss'].append(val_loss)
+                self.history['val_accuracy'].append(val_acc)
+            else:
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
+                
+                # Update history without validation
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+        
+        logger.info("Training completed")
+        return self.history
+    
+    def _validate(self, val_loader):
+        """Validate the model using the validation set"""
+        self.model.eval()
+        val_loss = 0.0
+        correct = 0
+        total = 0
+        
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                val_loss += loss.item()
+                
+                # Calculate accuracy
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+        
+        return val_loss / len(val_loader), correct / total if total > 0 else 0
+    
+    def evaluate(self, X_test, y_test):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test: Test input data
+            y_test: Test target data
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        logger.info(f"Evaluating model on {len(X_test)} samples")
+        
+        # Convert to PyTorch tensors
+        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            y_pred = self.model(X_test_tensor)
+            
+            if self.output_size > 1:
+                _, y_pred_class = torch.max(y_pred, 1)
+                y_pred_class = y_pred_class.cpu().numpy()
+            else:
+                y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
+        
+        # Calculate metrics
+        if self.output_size > 1:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class, average='weighted')
+            recall = recall_score(y_test, y_pred_class, average='weighted')
+            f1 = f1_score(y_test, y_pred_class, average='weighted')
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        else:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class)
+            recall = recall_score(y_test, y_pred_class)
+            f1 = f1_score(y_test, y_pred_class)
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        
+        logger.info(f"Evaluation metrics: {metrics}")
+        return metrics
+    
+    def predict(self, X):
+        """
+        Make predictions with the model.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Predictions
+        """
+        # Convert to PyTorch tensor
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            predictions = self.model(X_tensor)
+            
+            if self.output_size > 1:
+                # Multi-class classification
+                probs = predictions.cpu().numpy()
+                _, class_preds = torch.max(predictions, 1)
+                class_preds = class_preds.cpu().numpy()
+                return class_preds, probs
+            else:
+                # Binary classification or regression
+                preds = predictions.cpu().numpy()
+                if self.output_size == 1:
+                    # Binary classification
+                    class_preds = (preds > 0.5).astype(int)
+                    return class_preds.flatten(), preds.flatten()
+                else:
+                    # Regression
+                    return preds.flatten(), None
+    
+    def save(self, filepath):
+        """
+        Save the model to a file.
+        
+        Args:
+            filepath: Path to save the model
+        """
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        
+        # Save the model state
+        model_state = {
+            'model_state_dict': self.model.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'history': self.history,
+            'window_size': self.window_size,
+            'num_features': self.num_features,
+            'output_size': self.output_size,
+            'timeframes': self.timeframes
+        }
+        
+        torch.save(model_state, f"{filepath}.pt")
+        logger.info(f"Model saved to {filepath}.pt")
+    
+    def load(self, filepath):
+        """
+        Load the model from a file.
+        
+        Args:
+            filepath: Path to load the model from
+        """
+        # Check if file exists
+        if not os.path.exists(f"{filepath}.pt"):
+            logger.error(f"Model file {filepath}.pt not found")
+            return False
+        
+        # Load the model state
+        model_state = torch.load(f"{filepath}.pt", map_location=self.device)
+        
+        # Update model parameters
+        self.window_size = model_state['window_size']
+        self.num_features = model_state['num_features']
+        self.output_size = model_state['output_size']
+        self.timeframes = model_state['timeframes']
+        
+        # Rebuild the model
+        self.build_model()
+        
+        # Load the model state
+        self.model.load_state_dict(model_state['model_state_dict'])
+        self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
+        self.history = model_state['history']
+        
+        logger.info(f"Model loaded from {filepath}.pt")
+        return True
+
+class MixtureOfExpertsModelPyTorch:
+    """
+    Mixture of Experts model implementation using PyTorch.
+    
+    This model combines predictions from multiple models (experts) using a 
+    learned weighting scheme.
+    """
+    
+    def __init__(self, output_size=3, timeframes=None):
+        """
+        Initialize the Mixture of Experts model.
+        
+        Args:
+            output_size (int): Size of the output (1 for regression, 3 for classification)
+            timeframes (list): List of timeframes used (for logging)
+        """
+        self.output_size = output_size
+        self.timeframes = timeframes or []
+        self.experts = {}
+        self.expert_weights = {}
+        
+        # Determine device (GPU or CPU)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        
+        # Initialize model and training history
+        self.model = None
+        self.history = {
+            'loss': [],
+            'val_loss': [],
+            'accuracy': [],
+            'val_accuracy': []
+        }
+    
+    def add_expert(self, name, model):
+        """
+        Add an expert model.
+        
+        Args:
+            name (str): Name of the expert
+            model: Expert model
+        """
+        self.experts[name] = model
+        logger.info(f"Added expert: {name}")
+    
+    def predict(self, X):
+        """
+        Make predictions using all experts and combine them.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Combined predictions
+        """
+        if not self.experts:
+            logger.error("No experts added to the MoE model")
+            return None
+        
+        # Get predictions from each expert
+        expert_predictions = {}
+        for name, expert in self.experts.items():
+            pred, _ = expert.predict(X)
+            expert_predictions[name] = pred
+        
+        # Combine predictions based on weights
+        final_pred = None
+        for name, pred in expert_predictions.items():
+            weight = self.expert_weights.get(name, 1.0 / len(self.experts))
+            if final_pred is None:
+                final_pred = weight * pred
+            else:
+                final_pred += weight * pred
+        
+        # For classification, convert to class indices
+        if self.output_size > 1:
+            # Get class with highest probability
+            class_pred = np.argmax(final_pred, axis=1)
+            return class_pred, final_pred
+        else:
+            # Binary classification
+            class_pred = (final_pred > 0.5).astype(int)
+            return class_pred, final_pred
+    
+    def evaluate(self, X_test, y_test):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test: Test input data
+            y_test: Test target data
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        logger.info(f"Evaluating MoE model on {len(X_test)} samples")
+        
+        # Get predictions
+        y_pred_class, _ = self.predict(X_test)
+        
+        # Calculate metrics
+        if self.output_size > 1:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class, average='weighted')
+            recall = recall_score(y_test, y_pred_class, average='weighted')
+            f1 = f1_score(y_test, y_pred_class, average='weighted')
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        else:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class)
+            recall = recall_score(y_test, y_pred_class)
+            f1 = f1_score(y_test, y_pred_class)
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        
+        logger.info(f"MoE evaluation metrics: {metrics}")
+        return metrics
+    
+    def save(self, filepath):
+        """
+        Save the model weights to a file.
+        
+        Args:
+            filepath: Path to save the model
+        """
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        
+        # Save the model state
+        model_state = {
+            'expert_weights': self.expert_weights,
+            'output_size': self.output_size,
+            'timeframes': self.timeframes
+        }
+        
+        torch.save(model_state, f"{filepath}_moe.pt")
+        logger.info(f"MoE model saved to {filepath}_moe.pt")
+    
+    def load(self, filepath):
+        """
+        Load the model from a file.
+        
+        Args:
+            filepath: Path to load the model from
+        """
+        # Check if file exists
+        if not os.path.exists(f"{filepath}_moe.pt"):
+            logger.error(f"MoE model file {filepath}_moe.pt not found")
+            return False
+        
+        # Load the model state
+        model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
+        
+        # Update model parameters
+        self.expert_weights = model_state['expert_weights']
+        self.output_size = model_state['output_size']
+        self.timeframes = model_state['timeframes']
+        
+        logger.info(f"MoE model loaded from {filepath}_moe.pt")
+        return True 
--- a/NN/requirements.txt
+++ b/NN/requirements.txt
@ -1,13 +1,22 @@
-tensorflow>=2.5.0
+# Main dependencies
 numpy>=1.19.5
 pandas>=1.3.0
 matplotlib>=3.4.2
 scikit-learn>=0.24.2
-tensorflow-addons>=0.13.0
-plotly>=5.1.0
-h5py>=3.1.0
-tqdm>=4.61.1
-pyyaml>=5.4.1
-tensorboard>=2.5.0
-ccxt>=1.50.0
-requests>=2.25.1 
+
+# PyTorch (primary framework)
+torch
+torchvision
+
+# TensorFlow (optional)
+# tensorflow>=2.5.0
+# tensorflow-addons>=0.13.0
+
+# Additional dependencies
+plotly
+h5py
+tqdm
+pyyaml
+tensorboard
+ccxt
+requests
--- a/NN/start_tensorboard.py
+++ b/NN/start_tensorboard.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python
+"""
+Start TensorBoard for monitoring neural network training
+"""
+
+import os
+import sys
+import subprocess
+import webbrowser
+from time import sleep
+
+def start_tensorboard(logdir="NN/models/saved/logs", port=6006, open_browser=True):
+    """
+    Start TensorBoard in a subprocess
+    
+    Args:
+        logdir: Directory containing TensorBoard logs
+        port: Port to run TensorBoard on
+        open_browser: Whether to open a browser automatically
+    """
+    # Make sure the log directory exists
+    os.makedirs(logdir, exist_ok=True)
+    
+    # Create command
+    cmd = [
+        sys.executable,
+        "-m",
+        "tensorboard.main",
+        f"--logdir={logdir}",
+        f"--port={port}",
+        "--bind_all"
+    ]
+    
+    print(f"Starting TensorBoard with logs from {logdir} on port {port}")
+    print(f"Command: {' '.join(cmd)}")
+    
+    # Start TensorBoard in a subprocess
+    process = subprocess.Popen(
+        cmd, 
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        universal_newlines=True
+    )
+    
+    # Wait for TensorBoard to start up
+    for line in process.stdout:
+        print(line.strip())
+        if "TensorBoard" in line and "http://" in line:
+            # TensorBoard is running, extract the URL
+            url = None
+            for part in line.split():
+                if part.startswith(("http://", "https://")):
+                    url = part
+                    break
+            
+            # Open browser if requested and URL found
+            if open_browser and url:
+                print(f"Opening TensorBoard in browser: {url}")
+                webbrowser.open(url)
+            
+            break
+    
+    # Return the process for the caller to manage
+    return process
+
+if __name__ == "__main__":
+    import argparse
+    
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description="Start TensorBoard for NN training visualization")
+    parser.add_argument("--logdir", default="NN/models/saved/logs", help="Directory containing TensorBoard logs")
+    parser.add_argument("--port", type=int, default=6006, help="Port to run TensorBoard on")
+    parser.add_argument("--no-browser", action="store_true", help="Don't open browser automatically")
+    
+    args = parser.parse_args()
+    
+    # Start TensorBoard
+    process = start_tensorboard(args.logdir, args.port, not args.no_browser)
+    
+    try:
+        # Keep the script running until Ctrl+C
+        print("TensorBoard is running. Press Ctrl+C to stop.")
+        while True:
+            sleep(1)
+    except KeyboardInterrupt:
+        print("Stopping TensorBoard...")
+        process.terminate()
+        process.wait() 
--- a/NN/utils/pycache/init.cpython-312.pyc
+++ b/NN/utils/pycache/init.cpython-312.pyc
--- a/NN/utils/pycache/data_interface.cpython-312.pyc
+++ b/NN/utils/pycache/data_interface.cpython-312.pyc
--- a/NN/utils/data_interface.py
+++ b/NN/utils/data_interface.py
@ -0,0 +1,390 @@
+"""
+Data Interface for Neural Network Trading System
+
+This module provides functionality to fetch, process, and prepare data for the neural network models.
+"""
+
+import os
+import logging
+import numpy as np
+import pandas as pd
+from datetime import datetime, timedelta
+import json
+import pickle
+from sklearn.preprocessing import MinMaxScaler
+
+logger = logging.getLogger(__name__)
+
+class DataInterface:
+    """
+    Handles data collection, processing, and preparation for neural network models.
+    
+    This class is responsible for:
+    1. Fetching historical data
+    2. Preprocessing data for neural network input
+    3. Generating training datasets
+    4. Handling real-time data integration
+    """
+    
+    def __init__(self, symbol="BTC/USDT", timeframes=None, data_dir="NN/data"):
+        """
+        Initialize the data interface.
+        
+        Args:
+            symbol (str): Trading pair symbol (e.g., "BTC/USDT")
+            timeframes (list): List of timeframes to use (e.g., ['1m', '5m', '1h', '4h', '1d'])
+            data_dir (str): Directory to store/load datasets
+        """
+        self.symbol = symbol
+        self.timeframes = timeframes or ['1h', '4h', '1d']
+        self.data_dir = data_dir
+        self.scalers = {}  # Store scalers for each timeframe
+        
+        # Create data directory if it doesn't exist
+        os.makedirs(self.data_dir, exist_ok=True)
+        
+        # Initialize empty dataframes for each timeframe
+        self.dataframes = {tf: None for tf in self.timeframes}
+        
+        logger.info(f"DataInterface initialized for {symbol} with timeframes {timeframes}")
+    
+    def get_historical_data(self, timeframe='1h', n_candles=1000, use_cache=True):
+        """
+        Fetch historical price data for a given timeframe.
+        
+        Args:
+            timeframe (str): Timeframe to fetch data for
+            n_candles (int): Number of candles to fetch
+            use_cache (bool): Whether to use cached data if available
+            
+        Returns:
+            pd.DataFrame: DataFrame with OHLCV data
+        """
+        cache_file = os.path.join(self.data_dir, f"{self.symbol.replace('/', '_')}_{timeframe}.csv")
+        
+        # Check if cached data exists and is recent
+        if use_cache and os.path.exists(cache_file):
+            try:
+                df = pd.read_csv(cache_file, parse_dates=['timestamp'])
+                # If we have enough data and it's recent, use it
+                if len(df) >= n_candles:
+                    logger.info(f"Using cached data for {self.symbol} {timeframe} ({len(df)} candles)")
+                    self.dataframes[timeframe] = df
+                    return df.tail(n_candles)
+            except Exception as e:
+                logger.error(f"Error reading cached data: {str(e)}")
+        
+        # If we get here, we need to fetch data
+        # For now, we'll use a placeholder for fetching data from an exchange
+        try:
+            # In a real implementation, we would fetch data from an exchange or API here
+            # For this example, we'll create dummy data if we can't load from cache
+            logger.info(f"Fetching historical data for {self.symbol} {timeframe}")
+            
+            # Placeholder for real data fetching
+            # In a real implementation, this would be replaced with API calls
+            self._fetch_data_from_exchange(timeframe, n_candles)
+            
+            # Save to cache
+            if self.dataframes[timeframe] is not None:
+                self.dataframes[timeframe].to_csv(cache_file, index=False)
+                return self.dataframes[timeframe]
+            else:
+                # Create dummy data as fallback
+                logger.warning(f"Could not fetch data for {self.symbol} {timeframe}, using dummy data")
+                df = self._create_dummy_data(timeframe, n_candles)
+                self.dataframes[timeframe] = df
+                return df
+        except Exception as e:
+            logger.error(f"Error fetching data: {str(e)}")
+            return None
+    
+    def _fetch_data_from_exchange(self, timeframe, n_candles):
+        """
+        Placeholder method for fetching data from an exchange.
+        In a real implementation, this would connect to an exchange API.
+        """
+        # This is a placeholder - in a real implementation this would make API calls
+        # to a cryptocurrency exchange to fetch OHLCV data
+        
+        # For now, just generate dummy data
+        self.dataframes[timeframe] = self._create_dummy_data(timeframe, n_candles)
+    
+    def _create_dummy_data(self, timeframe, n_candles):
+        """
+        Create dummy OHLCV data for testing purposes.
+        
+        Args:
+            timeframe (str): Timeframe to create data for
+            n_candles (int): Number of candles to create
+            
+        Returns:
+            pd.DataFrame: DataFrame with dummy OHLCV data
+        """
+        # Map timeframe to seconds
+        tf_seconds = {
+            '1m': 60,
+            '5m': 300,
+            '15m': 900,
+            '1h': 3600,
+            '4h': 14400,
+            '1d': 86400
+        }
+        seconds = tf_seconds.get(timeframe, 3600)  # Default to 1h
+        
+        # Create timestamps
+        end_time = datetime.now()
+        timestamps = [end_time - timedelta(seconds=seconds * i) for i in range(n_candles)]
+        timestamps.reverse()  # Oldest first
+        
+        # Generate random price data with realistic patterns
+        np.random.seed(42)  # For reproducibility
+        
+        # Start price
+        price = 50000  # For BTC/USDT
+        prices = []
+        volumes = []
+        
+        for i in range(n_candles):
+            # Random walk with drift and volatility based on timeframe
+            drift = 0.0001 * seconds  # Larger drift for larger timeframes
+            volatility = 0.01 * np.sqrt(seconds / 3600)  # Scale volatility by sqrt of time
+            
+            # Daily/weekly patterns
+            if timeframe in ['1d', '4h']:
+                # Add some cyclical patterns
+                cycle = np.sin(i / 7 * np.pi) * 0.02  # Weekly cycle
+            else:
+                cycle = np.sin(i / 24 * np.pi) * 0.01  # Daily cycle
+            
+            # Calculate price change with random walk + cycles
+            price_change = price * (drift + volatility * np.random.randn() + cycle)
+            price += price_change
+            
+            # Generate OHLC from the price
+            open_price = price
+            high_price = price * (1 + abs(0.005 * np.random.randn()))
+            low_price = price * (1 - abs(0.005 * np.random.randn()))
+            close_price = price * (1 + 0.002 * np.random.randn())
+            
+            # Ensure high >= open, close, low and low <= open, close
+            high_price = max(high_price, open_price, close_price)
+            low_price = min(low_price, open_price, close_price)
+            
+            # Generate volume (higher for larger price movements)
+            volume = abs(price_change) * (10000 + 5000 * np.random.rand())
+            
+            prices.append((open_price, high_price, low_price, close_price))
+            volumes.append(volume)
+            
+            # Update price for next iteration
+            price = close_price
+        
+        # Create DataFrame
+        df = pd.DataFrame(
+            [(t, o, h, l, c, v) for t, (o, h, l, c), v in zip(timestamps, prices, volumes)],
+            columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
+        )
+        
+        return df
+    
+    def prepare_nn_input(self, timeframes=None, n_candles=500, window_size=20):
+        """
+        Prepare input data for neural network models.
+        
+        Args:
+            timeframes (list): List of timeframes to use
+            n_candles (int): Number of candles to fetch for each timeframe
+            window_size (int): Size of the sliding window for feature creation
+            
+        Returns:
+            tuple: (X, y, timestamps) where:
+                X is the input features array with shape (n_samples, window_size, n_features)
+                y is the target array with shape (n_samples,)
+                timestamps is an array of timestamps for each sample
+        """
+        if timeframes is None:
+            timeframes = self.timeframes
+        
+        # Get data for all requested timeframes
+        dfs = {}
+        for tf in timeframes:
+            df = self.get_historical_data(timeframe=tf, n_candles=n_candles)
+            if df is not None and not df.empty:
+                dfs[tf] = df
+        
+        if not dfs:
+            logger.error("No data available for feature creation")
+            return None, None, None
+        
+        # For simplicity, we'll use just one timeframe for now
+        # In a more complex implementation, we would merge multiple timeframes
+        primary_tf = timeframes[0]
+        if primary_tf not in dfs:
+            logger.error(f"Primary timeframe {primary_tf} not available")
+            return None, None, None
+        
+        df = dfs[primary_tf]
+        
+        # Create features
+        X, y, timestamps = self._create_features(df, window_size)
+        
+        return X, y, timestamps
+    
+    def _create_features(self, df, window_size):
+        """
+        Create features from OHLCV data using a sliding window approach.
+        
+        Args:
+            df (pd.DataFrame): DataFrame with OHLCV data
+            window_size (int): Size of the sliding window
+            
+        Returns:
+            tuple: (X, y, timestamps) where:
+                X is the input features array
+                y is the target array
+                timestamps is an array of timestamps for each sample
+        """
+        # Extract OHLCV columns
+        ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
+        
+        # Scale the data
+        scaler = MinMaxScaler()
+        ohlcv_scaled = scaler.fit_transform(ohlcv)
+        
+        # Store the scaler for later use
+        timeframe = next((tf for tf in self.timeframes if self.dataframes.get(tf) is not None and 
+                         self.dataframes[tf].equals(df)), 'unknown')
+        self.scalers[timeframe] = scaler
+        
+        # Create sliding windows
+        X = []
+        y = []
+        timestamps = []
+        
+        for i in range(len(ohlcv_scaled) - window_size):
+            # Input: window_size candles of OHLCV data
+            X.append(ohlcv_scaled[i:i+window_size])
+            
+            # Target: binary classification - price goes up (1) or down (0)
+            # 1 if close price increases in the next candle, 0 otherwise
+            price_change = ohlcv[i+window_size, 3] - ohlcv[i+window_size-1, 3]
+            y.append(1 if price_change > 0 else 0)
+            
+            # Store timestamp for reference
+            timestamps.append(df['timestamp'].iloc[i+window_size])
+        
+        return np.array(X), np.array(y), np.array(timestamps)
+    
+    def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
+        """
+        Generate and save a training dataset for neural network models.
+        
+        Args:
+            timeframes (list): List of timeframes to use
+            n_candles (int): Number of candles to fetch for each timeframe
+            window_size (int): Size of the sliding window for feature creation
+            
+        Returns:
+            dict: Dictionary of dataset file paths
+        """
+        if timeframes is None:
+            timeframes = self.timeframes
+        
+        # Prepare inputs
+        X, y, timestamps = self.prepare_nn_input(timeframes, n_candles, window_size)
+        
+        if X is None or y is None:
+            logger.error("Failed to prepare input data for dataset")
+            return None
+        
+        # Prepare output paths
+        timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+        dataset_name = f"{self.symbol.replace('/', '_')}_{'_'.join(timeframes)}_{timestamp_str}"
+        
+        X_path = os.path.join(self.data_dir, f"{dataset_name}_X.npy")
+        y_path = os.path.join(self.data_dir, f"{dataset_name}_y.npy")
+        timestamps_path = os.path.join(self.data_dir, f"{dataset_name}_timestamps.npy")
+        metadata_path = os.path.join(self.data_dir, f"{dataset_name}_metadata.json")
+        
+        # Save arrays
+        np.save(X_path, X)
+        np.save(y_path, y)
+        np.save(timestamps_path, timestamps)
+        
+        # Save metadata
+        metadata = {
+            'symbol': self.symbol,
+            'timeframes': timeframes,
+            'window_size': window_size,
+            'n_samples': len(X),
+            'feature_shape': X.shape[1:],
+            'created_at': datetime.now().isoformat(),
+            'dataset_name': dataset_name
+        }
+        
+        with open(metadata_path, 'w') as f:
+            json.dump(metadata, f, indent=2)
+        
+        # Save scalers
+        scaler_path = os.path.join(self.data_dir, f"{dataset_name}_scalers.pkl")
+        with open(scaler_path, 'wb') as f:
+            pickle.dump(self.scalers, f)
+        
+        # Return dataset info
+        dataset_info = {
+            'X_path': X_path,
+            'y_path': y_path,
+            'timestamps_path': timestamps_path,
+            'metadata_path': metadata_path,
+            'scaler_path': scaler_path
+        }
+        
+        logger.info(f"Dataset generated and saved: {dataset_name}")
+        return dataset_info
+    
+    def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
+        """
+        Prepare a single input sample from the most recent data for real-time inference.
+        
+        Args:
+            timeframe (str): Timeframe to use
+            n_candles (int): Number of recent candles to fetch
+            window_size (int): Size of the sliding window
+            
+        Returns:
+            tuple: (X, timestamp) where:
+                X is the input features array with shape (1, window_size, n_features)
+                timestamp is the timestamp of the most recent candle
+        """
+        # Get recent data
+        df = self.get_historical_data(timeframe=timeframe, n_candles=n_candles, use_cache=False)
+        
+        if df is None or len(df) < window_size:
+            logger.error(f"Not enough data for inference (need at least {window_size} candles)")
+            return None, None
+        
+        # Extract features from the most recent window
+        ohlcv = df[['open', 'high', 'low', 'close', 'volume']].tail(window_size).values
+        
+        # Scale the data
+        if timeframe in self.scalers:
+            # Use existing scaler
+            scaler = self.scalers[timeframe]
+        else:
+            # Create new scaler
+            scaler = MinMaxScaler()
+            # Fit on all available data
+            all_data = df[['open', 'high', 'low', 'close', 'volume']].values
+            scaler.fit(all_data)
+            self.scalers[timeframe] = scaler
+        
+        ohlcv_scaled = scaler.transform(ohlcv)
+        
+        # Reshape to (1, window_size, n_features)
+        X = np.array([ohlcv_scaled])
+        
+        # Get timestamp of the most recent candle
+        timestamp = df['timestamp'].iloc[-1]
+        
+        return X, timestamp 
--- a/run_nn.py
+++ b/run_nn.py
@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""
+Neural Network Training Runner Script
+
+This script runs the Neural Network Trading System with the existing conda environment.
+It detects which deep learning framework is available (TensorFlow or PyTorch) and
+adjusts the implementation accordingly.
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import logging
+from pathlib import Path
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger('nn_runner')
+
+def detect_framework():
+    """Detect which deep learning framework is available in the environment"""
+    try:
+        import torch
+        torch_version = torch.__version__
+        logger.info(f"PyTorch {torch_version} detected")
+        return "pytorch", torch_version
+    except ImportError:
+        logger.warning("PyTorch not found in environment")
+        try:
+            import tensorflow as tf
+            tf_version = tf.__version__
+            logger.info(f"TensorFlow {tf_version} detected")
+            return "tensorflow", tf_version
+        except ImportError:
+            logger.error("Neither PyTorch nor TensorFlow is available in the environment")
+            return None, None
+
+def check_dependencies():
+    """Check for required dependencies and return if they are met"""
+    required_packages = ["numpy", "pandas", "matplotlib", "scikit-learn"]
+    missing_packages = []
+    
+    for package in required_packages:
+        try:
+            __import__(package)
+        except ImportError:
+            missing_packages.append(package)
+    
+    if missing_packages:
+        logger.warning(f"Missing required packages: {', '.join(missing_packages)}")
+        return False
+    
+    return True
+
+def create_run_command(args, framework):
+    """Create the command to run the neural network based on the available framework"""
+    cmd = ["python", "-m", "NN.main"]
+    
+    # Add mode
+    cmd.extend(["--mode", args.mode])
+    
+    # Add symbol
+    if args.symbol:
+        cmd.extend(["--symbol", args.symbol])
+    
+    # Add timeframes
+    if args.timeframes:
+        cmd.extend(["--timeframes"] + args.timeframes)
+    
+    # Add window size
+    if args.window_size:
+        cmd.extend(["--window-size", str(args.window_size)])
+    
+    # Add output size
+    if args.output_size:
+        cmd.extend(["--output-size", str(args.output_size)])
+    
+    # Add batch size
+    if args.batch_size:
+        cmd.extend(["--batch-size", str(args.batch_size)])
+    
+    # Add epochs
+    if args.epochs:
+        cmd.extend(["--epochs", str(args.epochs)])
+    
+    # Add model type
+    if args.model_type:
+        cmd.extend(["--model-type", args.model_type])
+    
+    # Add framework-specific flag
+    cmd.extend(["--framework", framework])
+    
+    return cmd
+
+def parse_arguments():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser(description='Neural Network Trading System Runner')
+    
+    parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
+                        help='Mode to run (train, predict, realtime)')
+    parser.add_argument('--symbol', type=str, default='BTC/USDT',
+                        help='Trading pair symbol')
+    parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
+                        help='Timeframes to use')
+    parser.add_argument('--window-size', type=int, default=20,
+                        help='Window size for input data')
+    parser.add_argument('--output-size', type=int, default=3,
+                        help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
+    parser.add_argument('--batch-size', type=int, default=32,
+                        help='Batch size for training')
+    parser.add_argument('--epochs', type=int, default=100,
+                        help='Number of epochs for training')
+    parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
+                        help='Model type to use')
+    parser.add_argument('--conda-env', type=str, default='gpt-gpu',
+                        help='Name of conda environment to use')
+    parser.add_argument('--no-conda', action='store_true',
+                        help='Do not use conda environment activation')
+    parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
+                        help='Deep learning framework to use (default: pytorch)')
+    
+    return parser.parse_args()
+
+def main():
+    # Parse arguments
+    args = parse_arguments()
+    
+    # Check if we should run with conda
+    if not args.no_conda and args.conda_env:
+        # Create conda activation command
+        if sys.platform == 'win32':
+            conda_cmd = f"conda activate {args.conda_env} && "
+        else:
+            conda_cmd = f"source activate {args.conda_env} && "
+        
+        logger.info(f"Running with conda environment: {args.conda_env}")
+        
+        # Create the run script
+        script_path = Path("run_nn_in_conda.bat" if sys.platform == 'win32' else "run_nn_in_conda.sh")
+        
+        with open(script_path, 'w') as f:
+            if sys.platform == 'win32':
+                f.write("@echo off\n")
+                f.write(f"call conda activate {args.conda_env}\n")
+                f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
+                
+                if args.timeframes:
+                    f.write(f" --timeframes {' '.join(args.timeframes)}")
+                
+                if args.window_size:
+                    f.write(f" --window-size {args.window_size}")
+                
+                if args.output_size:
+                    f.write(f" --output-size {args.output_size}")
+                
+                if args.batch_size:
+                    f.write(f" --batch-size {args.batch_size}")
+                
+                if args.epochs:
+                    f.write(f" --epochs {args.epochs}")
+                
+                if args.model_type:
+                    f.write(f" --model-type {args.model_type}")
+            else:
+                f.write("#!/bin/bash\n")
+                f.write(f"source activate {args.conda_env}\n")
+                f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
+                
+                if args.timeframes:
+                    f.write(f" --timeframes {' '.join(args.timeframes)}")
+                
+                if args.window_size:
+                    f.write(f" --window-size {args.window_size}")
+                
+                if args.output_size:
+                    f.write(f" --output-size {args.output_size}")
+                
+                if args.batch_size:
+                    f.write(f" --batch-size {args.batch_size}")
+                
+                if args.epochs:
+                    f.write(f" --epochs {args.epochs}")
+                
+                if args.model_type:
+                    f.write(f" --model-type {args.model_type}")
+        
+        # Make script executable on Unix
+        if sys.platform != 'win32':
+            os.chmod(script_path, 0o755)
+        
+        # Run the script
+        logger.info(f"Created script: {script_path}")
+        logger.info("Run this script to execute the neural network with the conda environment")
+        
+        if sys.platform == 'win32':
+            print("\nTo run the neural network, execute the following command:")
+            print(f"   {script_path}")
+        else:
+            print("\nTo run the neural network, execute the following command:")
+            print(f"   ./{script_path}")
+    else:
+        # Run directly without conda
+        # First detect available framework
+        framework, version = detect_framework()
+        
+        if framework is None:
+            logger.error("Cannot run Neural Network - no deep learning framework available")
+            return
+        
+        # Check dependencies
+        if not check_dependencies():
+            logger.error("Missing required dependencies - please install them first")
+            return
+        
+        # Create command
+        cmd = create_run_command(args, framework)
+        
+        # Run command
+        logger.info(f"Running command: {' '.join(cmd)}")
+        try:
+            subprocess.run(cmd, check=True)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Error running neural network: {str(e)}")
+        except Exception as e:
+            logger.error(f"Error: {str(e)}")
+
+if __name__ == "__main__":
+    main() 
--- a/run_nn_in_conda.bat
+++ b/run_nn_in_conda.bat
@ -0,0 +1,3 @@
+@echo off
+call conda activate gpt-gpu
+python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs 100 --model-type cnn --framework pytorch
--- a/run_pytorch_nn.bat
+++ b/run_pytorch_nn.bat
@ -0,0 +1,50 @@
+@echo off
+echo ============================================================
+echo Neural Network Trading System - PyTorch Implementation
+echo ============================================================
+
+call conda activate gpt-gpu
+
+REM Parse command-line arguments
+set MODE=train
+set MODEL_TYPE=cnn
+set SYMBOL=BTC/USDT
+set EPOCHS=100
+
+:parse
+if "%~1"=="" goto endparse
+if /i "%~1"=="--mode" (
+    set MODE=%~2
+    shift
+    shift
+    goto parse
+)
+if /i "%~1"=="--model" (
+    set MODEL_TYPE=%~2
+    shift
+    shift
+    goto parse
+)
+if /i "%~1"=="--symbol" (
+    set SYMBOL=%~2
+    shift
+    shift
+    goto parse
+)
+if /i "%~1"=="--epochs" (
+    set EPOCHS=%~2
+    shift
+    shift
+    goto parse
+)
+shift
+goto parse
+:endparse
+
+echo Running Neural Network in %MODE% mode with %MODEL_TYPE% model for %SYMBOL% for %EPOCHS% epochs
+
+python -m NN.main --mode %MODE% --symbol %SYMBOL% --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs %EPOCHS% --model-type %MODEL_TYPE% --framework pytorch
+
+echo ============================================================
+echo Run completed.
+echo ============================================================