new nn wip

2025-03-25 13:38:25 +02:00
parent 50eb50696b
commit 0042581275
18 changed files with 3358 additions and 294 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,3 +14,4 @@ models/trading_agent_final.pt
 models/trading_agent_final.pt.backup
 *.pt
 *.backup
 logs/
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -0,0 +1,38 @@
 {
    "version": "2.0.0",
    "tasks": [
        {
            "label": "Start TensorBoard",
            "type": "shell",
            "command": "python",
            "args": [
                "-m",
                "tensorboard.main",
                "--logdir=NN/models/saved/logs",
                "--port=6006",
                "--host=localhost"
            ],
            "isBackground": true,
            "problemMatcher": {
                "pattern": {
                    "regexp": "^.*$",
                    "file": 1,
                    "location": 2,
                    "message": 3
                },
                "background": {
                    "activeOnStart": true,
                    "beginsPattern": ".*TensorBoard.*",
                    "endsPattern": ".*TensorBoard.*"
                }
            },
            "presentation": {
                "reveal": "always",
                "panel": "new"
            },
            "runOptions": {
                "runOn": "folderOpen"
            }
        }
    ]
 } 
--- a/NN/pycache/init.cpython-312.pyc
+++ b/NN/pycache/init.cpython-312.pyc
--- a/NN/pycache/main.cpython-312.pyc
+++ b/NN/pycache/main.cpython-312.pyc
--- a/NN/_notes.md
+++ b/NN/_notes.md
@ -0,0 +1,13 @@
 great. realtime.py works. now let's examine and contunue with our  500m NN in a NN folder with different modules - first module will be around 100m Convolutional NN that is historically used for image recognition with great success by detecting features on multiple levels - deep NN. create the NN class and integrated RL pipeline that will use historical data to retrospectively identify buy/sell opportunities and use that to train the module. use the data from realtime.py (add easy to use realtime data interface if existing functions are not convenient enough) 
 create a new main file in the NN folder for our new MoE model. we'll use one main NN module that will orchestrate data flows. our CNN module should have training and inference pipelines implemented internally, but the orchestrator will get the realtime data and forward it. use a common interface. another module later will be Transformer module that will take as input raw data from the latest hidden layers of the CNN where high end features are learned as well as the output, which will be BUY/HOLD/SELL signals as well as key support/resistance trend lines
 # Train a CNN model
 python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --model-type cnn --epochs 100
 # Make predictions with a trained model
 python -m NN.main --mode predict --symbol BTC/USDT --timeframe 1h --model-type cnn
 # Run real-time analysis
 python -m NN.main --mode realtime --symbol BTC/USDT --timeframe 1h --inference-interval 60
--- a/NN/main.py
+++ b/NN/main.py
@ -0,0 +1,265 @@
 #!/usr/bin/env python3
 """
 Neural Network Trading System Main Module
 This module serves as the main entry point for the NN trading system,
 coordinating data flow between different components and implementing 
 training and inference pipelines.
 """
 import os
 import sys
 import logging
 import argparse
 from datetime import datetime
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler(os.path.join('logs', f'nn_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'))
    ]
 )
 logger = logging.getLogger('NN')
 # Create logs directory if it doesn't exist
 os.makedirs('logs', exist_ok=True)
 def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(description='Neural Network Trading System')
    parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
                        help='Mode to run (train, predict, realtime)')
    parser.add_argument('--symbol', type=str, default='BTC/USDT',
                        help='Trading pair symbol')
    parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
                        help='Timeframes to use')
    parser.add_argument('--window-size', type=int, default=20,
                        help='Window size for input data')
    parser.add_argument('--output-size', type=int, default=3,
                        help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
    parser.add_argument('--batch-size', type=int, default=32,
                        help='Batch size for training')
    parser.add_argument('--epochs', type=int, default=100,
                        help='Number of epochs for training')
    parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
                        help='Model type to use')
    parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
                        help='Deep learning framework to use')
    return parser.parse_args()
 def main():
    """Main entry point for the NN trading system"""
    # Parse arguments
    args = parse_arguments()
    logger.info(f"Starting NN Trading System in {args.mode} mode")
    logger.info(f"Configuration: Symbol={args.symbol}, Timeframes={args.timeframes}, "
                f"Window Size={args.window_size}, Output Size={args.output_size}, "
                f"Model Type={args.model_type}, Framework={args.framework}")
    # Import the appropriate modules based on the framework
    if args.framework == 'pytorch':
        try:
            import torch
            logger.info(f"Using PyTorch {torch.__version__}")
            # Import PyTorch-based modules
            from NN.utils.data_interface import DataInterface
            if args.model_type == 'cnn':
                from NN.models.cnn_model_pytorch import CNNModelPyTorch as Model
            elif args.model_type == 'transformer':
                from NN.models.transformer_model_pytorch import TransformerModelPyTorchWrapper as Model
            elif args.model_type == 'moe':
                from NN.models.transformer_model_pytorch import MixtureOfExpertsModelPyTorch as Model
            else:
                logger.error(f"Unknown model type: {args.model_type}")
                return
        except ImportError as e:
            logger.error(f"Failed to import PyTorch modules: {str(e)}")
            logger.error("Please make sure PyTorch is installed or use the TensorFlow framework.")
            return
    elif args.framework == 'tensorflow':
        try:
            import tensorflow as tf
            logger.info(f"Using TensorFlow {tf.__version__}")
            # Import TensorFlow-based modules
            from NN.utils.data_interface import DataInterface
            if args.model_type == 'cnn':
                from NN.models.cnn_model import CNNModel as Model
            elif args.model_type == 'transformer':
                from NN.models.transformer_model import TransformerModel as Model
            elif args.model_type == 'moe':
                from NN.models.transformer_model import MixtureOfExpertsModel as Model
            else:
                logger.error(f"Unknown model type: {args.model_type}")
                return
        except ImportError as e:
            logger.error(f"Failed to import TensorFlow modules: {str(e)}")
            logger.error("Please make sure TensorFlow is installed or use the PyTorch framework.")
            return
    else:
        logger.error(f"Unknown framework: {args.framework}")
        return
    # Initialize data interface
    try:
        logger.info("Initializing data interface...")
        data_interface = DataInterface(
            symbol=args.symbol,
            timeframes=args.timeframes,
            window_size=args.window_size,
            output_size=args.output_size
        )
    except Exception as e:
        logger.error(f"Failed to initialize data interface: {str(e)}")
        return
    # Initialize model
    try:
        logger.info(f"Initializing {args.model_type.upper()} model...")
        model = Model(
            window_size=args.window_size,
            num_features=data_interface.get_feature_count(),
            output_size=args.output_size,
            timeframes=args.timeframes
        )
    except Exception as e:
        logger.error(f"Failed to initialize model: {str(e)}")
        return
    # Execute the requested mode
    if args.mode == 'train':
        train(data_interface, model, args)
    elif args.mode == 'predict':
        predict(data_interface, model, args)
    elif args.mode == 'realtime':
        realtime(data_interface, model, args)
    else:
        logger.error(f"Unknown mode: {args.mode}")
        return
    logger.info("Neural Network Trading System finished successfully")
 def train(data_interface, model, args):
    """Train the model using the data interface"""
    logger.info("Starting training mode...")
    try:
        # Prepare training data
        logger.info("Preparing training data...")
        X_train, y_train, X_val, y_val = data_interface.prepare_training_data()
        # Train the model
        logger.info("Training model...")
        model.train(
            X_train, y_train,
            X_val, y_val,
            batch_size=args.batch_size,
            epochs=args.epochs
        )
        # Save the model
        model_path = os.path.join(
            'models', 
            f"{args.model_type}_{args.symbol.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        )
        logger.info(f"Saving model to {model_path}...")
        model.save(model_path)
        # Evaluate the model
        logger.info("Evaluating model...")
        metrics = model.evaluate(X_val, y_val)
        logger.info(f"Evaluation metrics: {metrics}")
    except Exception as e:
        logger.error(f"Error in training mode: {str(e)}")
        return
 def predict(data_interface, model, args):
    """Make predictions using the trained model"""
    logger.info("Starting prediction mode...")
    try:
        # Load the latest model
        model_dir = os.path.join('models')
        model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
        if not model_files:
            logger.error(f"No saved model found for type {args.model_type}")
            return
        latest_model = sorted(model_files)[-1]
        model_path = os.path.join(model_dir, latest_model)
        logger.info(f"Loading model from {model_path}...")
        model.load(model_path)
        # Prepare prediction data
        logger.info("Preparing prediction data...")
        X_pred = data_interface.prepare_prediction_data()
        # Make predictions
        logger.info("Making predictions...")
        predictions = model.predict(X_pred)
        # Process and display predictions
        logger.info("Processing predictions...")
        data_interface.process_predictions(predictions)
    except Exception as e:
        logger.error(f"Error in prediction mode: {str(e)}")
        return
 def realtime(data_interface, model, args):
    """Run the model in real-time mode"""
    logger.info("Starting real-time mode...")
    try:
        # Import realtime module
        from NN.realtime import RealtimeAnalyzer
        # Load the latest model
        model_dir = os.path.join('models')
        model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
        if not model_files:
            logger.error(f"No saved model found for type {args.model_type}")
            return
        latest_model = sorted(model_files)[-1]
        model_path = os.path.join(model_dir, latest_model)
        logger.info(f"Loading model from {model_path}...")
        model.load(model_path)
        # Initialize realtime analyzer
        logger.info("Initializing real-time analyzer...")
        realtime_analyzer = RealtimeAnalyzer(
            data_interface=data_interface,
            model=model,
            symbol=args.symbol,
            timeframes=args.timeframes
        )
        # Start real-time analysis
        logger.info("Starting real-time analysis...")
        realtime_analyzer.start()
    except Exception as e:
        logger.error(f"Error in real-time mode: {str(e)}")
        return
 if __name__ == "__main__":
    main() 
--- a/NN/models/cnn_model.py
+++ b/NN/models/cnn_model.py
@ -0,0 +1,560 @@
 """
 Convolutional Neural Network for timeseries analysis
 This module implements a deep CNN model for cryptocurrency price analysis.
 The model uses multiple parallel convolutional pathways and LSTM layers
 to detect patterns at different time scales.
 """
 import os
 import logging
 import numpy as np
 import matplotlib.pyplot as plt
 import tensorflow as tf
 from tensorflow.keras.models import Model, load_model
 from tensorflow.keras.layers import (
    Input, Conv1D, MaxPooling1D, Dense, Dropout, BatchNormalization,
    LSTM, Bidirectional, Flatten, Concatenate, GlobalAveragePooling1D,
    LeakyReLU, Attention
 )
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
 from tensorflow.keras.metrics import AUC
 from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
 import datetime
 import json
 logger = logging.getLogger(__name__)
 class CNNModel:
    """
    Convolutional Neural Network for time series analysis.
    This model uses a multi-pathway architecture with different filter sizes
    to detect patterns at different time scales, combined with LSTM layers
    for temporal dependencies.
    """
    def __init__(self, input_shape=(20, 5), output_size=1, model_dir="NN/models/saved"):
        """
        Initialize the CNN model.
        Args:
            input_shape (tuple): Shape of input data (sequence_length, features)
            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
            model_dir (str): Directory to save trained models
        """
        self.input_shape = input_shape
        self.output_size = output_size
        self.model_dir = model_dir
        self.model = None
        self.history = None
        # Create model directory if it doesn't exist
        os.makedirs(self.model_dir, exist_ok=True)
        logger.info(f"Initialized CNN model with input shape {input_shape} and output size {output_size}")
    def build_model(self, filters=(32, 64, 128), kernel_sizes=(3, 5, 7), 
                   dropout_rate=0.3, learning_rate=0.001):
        """
        Build the CNN model architecture.
        Args:
            filters (tuple): Number of filters for each convolutional pathway
            kernel_sizes (tuple): Kernel sizes for each convolutional pathway
            dropout_rate (float): Dropout rate for regularization
            learning_rate (float): Learning rate for Adam optimizer
        Returns:
            The compiled model
        """
        # Input layer
        inputs = Input(shape=self.input_shape)
        # Multiple parallel convolutional pathways with different kernel sizes
        # to capture patterns at different time scales
        conv_layers = []
        for i, (filter_size, kernel_size) in enumerate(zip(filters, kernel_sizes)):
            conv_path = Conv1D(
                filters=filter_size,
                kernel_size=kernel_size,
                padding='same',
                name=f'conv1d_{i+1}'
            )(inputs)
            conv_path = BatchNormalization()(conv_path)
            conv_path = LeakyReLU(alpha=0.1)(conv_path)
            conv_path = MaxPooling1D(pool_size=2, padding='same')(conv_path)
            conv_path = Dropout(dropout_rate)(conv_path)
            conv_layers.append(conv_path)
        # Merge convolutional pathways
        if len(conv_layers) > 1:
            merged = Concatenate()(conv_layers)
        else:
            merged = conv_layers[0]
        # Add another Conv1D layer after merging
        x = Conv1D(filters=filters[-1], kernel_size=3, padding='same')(merged)
        x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.1)(x)
        x = MaxPooling1D(pool_size=2, padding='same')(x)
        x = Dropout(dropout_rate)(x)
        # Bidirectional LSTM for temporal dependencies
        x = Bidirectional(LSTM(128, return_sequences=True))(x)
        x = Dropout(dropout_rate)(x)
        # Attention mechanism to focus on important time steps
        x = Bidirectional(LSTM(64, return_sequences=True))(x)
        # Global average pooling to reduce parameters
        x = GlobalAveragePooling1D()(x)
        x = Dropout(dropout_rate)(x)
        # Dense layers for final classification/regression
        x = Dense(64, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(dropout_rate)(x)
        # Output layer
        if self.output_size == 1:
            # Binary classification (up/down)
            outputs = Dense(1, activation='sigmoid', name='output')(x)
            loss = 'binary_crossentropy'
            metrics = ['accuracy', AUC()]
        elif self.output_size == 3:
            # Multi-class classification (buy/hold/sell)
            outputs = Dense(3, activation='softmax', name='output')(x)
            loss = 'categorical_crossentropy'
            metrics = ['accuracy']
        else:
            # Regression
            outputs = Dense(self.output_size, activation='linear', name='output')(x)
            loss = 'mse'
            metrics = ['mae']
        # Create and compile model
        self.model = Model(inputs=inputs, outputs=outputs)
        # Compile with Adam optimizer
        self.model.compile(
            optimizer=Adam(learning_rate=learning_rate),
            loss=loss,
            metrics=metrics
        )
        # Log model summary
        self.model.summary(print_fn=lambda x: logger.info(x))
        return self.model
    def train(self, X_train, y_train, batch_size=32, epochs=100, validation_split=0.2,
             callbacks=None, class_weights=None):
        """
        Train the CNN model on the provided data.
        Args:
            X_train (numpy.ndarray): Training features
            y_train (numpy.ndarray): Training targets
            batch_size (int): Batch size
            epochs (int): Number of epochs
            validation_split (float): Fraction of data to use for validation
            callbacks (list): List of Keras callbacks
            class_weights (dict): Class weights for imbalanced datasets
        Returns:
            History object containing training metrics
        """
        if self.model is None:
            self.build_model()
        # Default callbacks if none provided
        if callbacks is None:
            # Create a timestamp for model checkpoints
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            callbacks = [
                EarlyStopping(
                    monitor='val_loss',
                    patience=10,
                    restore_best_weights=True
                ),
                ReduceLROnPlateau(
                    monitor='val_loss',
                    factor=0.5,
                    patience=5,
                    min_lr=1e-6
                ),
                ModelCheckpoint(
                    filepath=os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5"),
                    monitor='val_loss',
                    save_best_only=True
                )
            ]
        # Check if y_train needs to be one-hot encoded for multi-class
        if self.output_size == 3 and len(y_train.shape) == 1:
            y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
        # Train the model
        logger.info(f"Training CNN model with {len(X_train)} samples, batch size {batch_size}, epochs {epochs}")
        self.history = self.model.fit(
            X_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_split=validation_split,
            callbacks=callbacks,
            class_weight=class_weights,
            verbose=2
        )
        # Save the trained model
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        model_path = os.path.join(self.model_dir, f"cnn_model_final_{timestamp}.h5")
        self.model.save(model_path)
        logger.info(f"Model saved to {model_path}")
        # Save training history
        history_path = os.path.join(self.model_dir, f"cnn_model_history_{timestamp}.json")
        with open(history_path, 'w') as f:
            # Convert numpy values to Python native types for JSON serialization
            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
            json.dump(history_dict, f, indent=2)
        return self.history
    def evaluate(self, X_test, y_test, plot_results=False):
        """
        Evaluate the model on test data.
        Args:
            X_test (numpy.ndarray): Test features
            y_test (numpy.ndarray): Test targets
            plot_results (bool): Whether to plot evaluation results
        Returns:
            dict: Evaluation metrics
        """
        if self.model is None:
            raise ValueError("Model has not been built or trained yet")
        # Convert y_test to one-hot encoding for multi-class
        y_test_original = y_test.copy()
        if self.output_size == 3 and len(y_test.shape) == 1:
            y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)
        # Evaluate model
        logger.info(f"Evaluating CNN model on {len(X_test)} samples")
        eval_results = self.model.evaluate(X_test, y_test, verbose=0)
        metrics = {}
        for metric, value in zip(self.model.metrics_names, eval_results):
            metrics[metric] = value
            logger.info(f"{metric}: {value:.4f}")
        # Get predictions
        y_pred_prob = self.model.predict(X_test)
        # Different processing based on output type
        if self.output_size == 1:
            # Binary classification
            y_pred = (y_pred_prob > 0.5).astype(int).flatten()
            # Classification report
            report = classification_report(y_test, y_pred)
            logger.info(f"Classification Report:\n{report}")
            # Confusion matrix
            cm = confusion_matrix(y_test, y_pred)
            logger.info(f"Confusion Matrix:\n{cm}")
            # ROC curve and AUC
            fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
            roc_auc = auc(fpr, tpr)
            metrics['auc'] = roc_auc
            if plot_results:
                self._plot_binary_results(y_test, y_pred, y_pred_prob, fpr, tpr, roc_auc)
        elif self.output_size == 3:
            # Multi-class classification
            y_pred = np.argmax(y_pred_prob, axis=1)
            # Classification report
            report = classification_report(y_test_original, y_pred)
            logger.info(f"Classification Report:\n{report}")
            # Confusion matrix
            cm = confusion_matrix(y_test_original, y_pred)
            logger.info(f"Confusion Matrix:\n{cm}")
            if plot_results:
                self._plot_multiclass_results(y_test_original, y_pred, y_pred_prob)
        return metrics
    def predict(self, X):
        """
        Make predictions on new data.
        Args:
            X (numpy.ndarray): Input features
        Returns:
            tuple: (y_pred, y_proba) where:
                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
                y_proba is the class probability
        """
        if self.model is None:
            raise ValueError("Model has not been built or trained yet")
        # Ensure X has the right shape
        if len(X.shape) == 2:
            # Single sample, add batch dimension
            X = np.expand_dims(X, axis=0)
        # Get predictions
        y_proba = self.model.predict(X)
        # Process based on output type
        if self.output_size == 1:
            # Binary classification
            y_pred = (y_proba > 0.5).astype(int).flatten()
            return y_pred, y_proba.flatten()
        elif self.output_size == 3:
            # Multi-class classification
            y_pred = np.argmax(y_proba, axis=1)
            return y_pred, y_proba
        else:
            # Regression
            return y_proba, y_proba
    def save(self, filepath=None):
        """
        Save the model to disk.
        Args:
            filepath (str): Path to save the model
        Returns:
            str: Path where the model was saved
        """
        if self.model is None:
            raise ValueError("Model has not been built yet")
        if filepath is None:
            # Create a default filepath with timestamp
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            filepath = os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5")
        self.model.save(filepath)
        logger.info(f"Model saved to {filepath}")
        return filepath
    def load(self, filepath):
        """
        Load a saved model from disk.
        Args:
            filepath (str): Path to the saved model
        Returns:
            The loaded model
        """
        self.model = load_model(filepath)
        logger.info(f"Model loaded from {filepath}")
        return self.model
    def extract_hidden_features(self, X):
        """
        Extract features from the last hidden layer of the CNN for transfer learning.
        Args:
            X (numpy.ndarray): Input data
        Returns:
            numpy.ndarray: Extracted features
        """
        if self.model is None:
            raise ValueError("Model has not been built or trained yet")
        # Create a new model that outputs the features from the layer before the output
        feature_layer_name = self.model.layers[-2].name
        feature_extractor = Model(
            inputs=self.model.input,
            outputs=self.model.get_layer(feature_layer_name).output
        )
        # Extract features
        features = feature_extractor.predict(X)
        return features
    def _plot_binary_results(self, y_true, y_pred, y_proba, fpr, tpr, roc_auc):
        """
        Plot evaluation results for binary classification.
        Args:
            y_true (numpy.ndarray): True labels
            y_pred (numpy.ndarray): Predicted labels
            y_proba (numpy.ndarray): Prediction probabilities
            fpr (numpy.ndarray): False positive rates for ROC curve
            tpr (numpy.ndarray): True positive rates for ROC curve
            roc_auc (float): Area under ROC curve
        """
        plt.figure(figsize=(15, 5))
        # Confusion Matrix
        plt.subplot(1, 3, 1)
        cm = confusion_matrix(y_true, y_pred)
        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
        plt.title('Confusion Matrix')
        plt.colorbar()
        tick_marks = [0, 1]
        plt.xticks(tick_marks, ['0', '1'])
        plt.yticks(tick_marks, ['0', '1'])
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        # Add text annotations to confusion matrix
        thresh = cm.max() / 2.
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                plt.text(j, i, format(cm[i, j], 'd'),
                        horizontalalignment="center",
                        color="white" if cm[i, j] > thresh else "black")
        # Histogram of prediction probabilities
        plt.subplot(1, 3, 2)
        plt.hist(y_proba[y_true == 0], alpha=0.5, label='Class 0')
        plt.hist(y_proba[y_true == 1], alpha=0.5, label='Class 1')
        plt.title('Prediction Probabilities')
        plt.xlabel('Probability of Class 1')
        plt.ylabel('Count')
        plt.legend()
        # ROC Curve
        plt.subplot(1, 3, 3)
        plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.3f})')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic')
        plt.legend(loc="lower right")
        plt.tight_layout()
        # Save figure
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        fig_path = os.path.join(self.model_dir, f"cnn_evaluation_{timestamp}.png")
        plt.savefig(fig_path)
        plt.close()
        logger.info(f"Evaluation plots saved to {fig_path}")
    def _plot_multiclass_results(self, y_true, y_pred, y_proba):
        """
        Plot evaluation results for multi-class classification.
        Args:
            y_true (numpy.ndarray): True labels
            y_pred (numpy.ndarray): Predicted labels
            y_proba (numpy.ndarray): Prediction probabilities
        """
        plt.figure(figsize=(12, 5))
        # Confusion Matrix
        plt.subplot(1, 2, 1)
        cm = confusion_matrix(y_true, y_pred)
        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
        plt.title('Confusion Matrix')
        plt.colorbar()
        classes = ['BUY', 'HOLD', 'SELL']  # Assumes classes are 0, 1, 2
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes)
        plt.yticks(tick_marks, classes)
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        # Add text annotations to confusion matrix
        thresh = cm.max() / 2.
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                plt.text(j, i, format(cm[i, j], 'd'),
                        horizontalalignment="center",
                        color="white" if cm[i, j] > thresh else "black")
        # Class probability distributions
        plt.subplot(1, 2, 2)
        for i, cls in enumerate(classes):
            plt.hist(y_proba[y_true == i, i], alpha=0.5, label=f'Class {cls}')
        plt.title('Class Probability Distributions')
        plt.xlabel('Probability')
        plt.ylabel('Count')
        plt.legend()
        plt.tight_layout()
        # Save figure
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        fig_path = os.path.join(self.model_dir, f"cnn_multiclass_evaluation_{timestamp}.png")
        plt.savefig(fig_path)
        plt.close()
        logger.info(f"Multiclass evaluation plots saved to {fig_path}")
    def plot_training_history(self):
        """
        Plot training history (loss and metrics).
        Returns:
            str: Path to the saved plot
        """
        if self.history is None:
            raise ValueError("Model has not been trained yet")
        plt.figure(figsize=(12, 5))
        # Plot loss
        plt.subplot(1, 2, 1)
        plt.plot(self.history.history['loss'], label='Training Loss')
        if 'val_loss' in self.history.history:
            plt.plot(self.history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        # Plot accuracy
        plt.subplot(1, 2, 2)
        if 'accuracy' in self.history.history:
            plt.plot(self.history.history['accuracy'], label='Training Accuracy')
            if 'val_accuracy' in self.history.history:
                plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
            plt.title('Model Accuracy')
            plt.ylabel('Accuracy')
        elif 'mae' in self.history.history:
            plt.plot(self.history.history['mae'], label='Training MAE')
            if 'val_mae' in self.history.history:
                plt.plot(self.history.history['val_mae'], label='Validation MAE')
            plt.title('Model MAE')
            plt.ylabel('MAE')
        plt.xlabel('Epoch')
        plt.legend()
        plt.tight_layout()
        # Save figure
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        fig_path = os.path.join(self.model_dir, f"cnn_training_history_{timestamp}.png")
        plt.savefig(fig_path)
        plt.close()
        logger.info(f"Training history plot saved to {fig_path}")
        return fig_path 
--- a/NN/models/cnn_model_pytorch.py
+++ b/NN/models/cnn_model_pytorch.py
@ -0,0 +1,546 @@
 #!/usr/bin/env python3
 """
 CNN Model - PyTorch Implementation
 This module implements a CNN model using PyTorch for time series analysis.
 The model consists of multiple convolutional pathways and LSTM layers.
 """
 import os
 import logging
 import numpy as np
 import matplotlib.pyplot as plt
 from datetime import datetime
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader, TensorDataset
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 # Configure logging
 logger = logging.getLogger(__name__)
 class CNNPyTorch(nn.Module):
    """PyTorch CNN model for time series analysis"""
    def __init__(self, input_shape, output_size=3):
        """
        Initialize the CNN model.
        Args:
            input_shape (tuple): Shape of input data (window_size, features)
            output_size (int): Size of output (1 for regression, 3 for classification)
        """
        super(CNNPyTorch, self).__init__()
        window_size, num_features = input_shape
        # Architecture parameters
        filters = [32, 64, 128]
        kernel_sizes = [3, 5, 7]
        lstm_units = 100
        dense_units = 64
        dropout_rate = 0.3
        # Create parallel convolutional pathways
        self.conv_paths = nn.ModuleList()
        for f, k in zip(filters, kernel_sizes):
            path = nn.Sequential(
                nn.Conv1d(num_features, f, kernel_size=k, padding='same'),
                nn.ReLU(),
                nn.BatchNorm1d(f),
                nn.MaxPool1d(kernel_size=2, stride=1, padding=1),
                nn.Dropout(dropout_rate)
            )
            self.conv_paths.append(path)
        # Calculate output size from conv paths
        conv_output_size = sum(filters) * window_size
        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=sum(filters),
            hidden_size=lstm_units,
            batch_first=True,
            bidirectional=True
        )
        # Dense layers
        self.flatten = nn.Flatten()
        self.dense1 = nn.Sequential(
            nn.Linear(lstm_units * 2 * window_size, dense_units),
            nn.ReLU(),
            nn.BatchNorm1d(dense_units),
            nn.Dropout(dropout_rate)
        )
        # Output layer
        self.output = nn.Linear(dense_units, output_size)
        # Activation based on output size
        if output_size == 1:
            self.activation = nn.Sigmoid()  # Binary classification or regression
        elif output_size > 1:
            self.activation = nn.Softmax(dim=1)  # Multi-class classification
        else:
            self.activation = nn.Identity()  # No activation
    def forward(self, x):
        """
        Forward pass through the network.
        Args:
            x: Input tensor of shape [batch_size, window_size, features]
        Returns:
            Output tensor of shape [batch_size, output_size]
        """
        batch_size, window_size, num_features = x.shape
        # Transpose for conv1d: [batch, features, window]
        x_t = x.transpose(1, 2)
        # Process through parallel conv paths
        conv_outputs = []
        for path in self.conv_paths:
            conv_outputs.append(path(x_t))
        # Concatenate conv outputs
        conv_concat = torch.cat(conv_outputs, dim=1)
        # Transpose back for LSTM: [batch, window, features]
        conv_concat = conv_concat.transpose(1, 2)
        # LSTM processing
        lstm_out, _ = self.lstm(conv_concat)
        # Flatten
        flattened = self.flatten(lstm_out)
        # Dense processing
        dense_out = self.dense1(flattened)
        # Output
        output = self.output(dense_out)
        # Apply activation
        return self.activation(output)
 class CNNModelPyTorch:
    """
    CNN model wrapper class for time series analysis using PyTorch.
    This class provides methods for building, training, evaluating, and making
    predictions with the CNN model.
    """
    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
        """
        Initialize the CNN model.
        Args:
            window_size (int): Size of the input window
            num_features (int): Number of features in the input data
            output_size (int): Size of the output (1 for regression, 3 for classification)
            timeframes (list): List of timeframes used (for logging)
        """
        self.window_size = window_size
        self.num_features = num_features
        self.output_size = output_size
        self.timeframes = timeframes or []
        # Determine device (GPU or CPU)
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        logger.info(f"Using device: {self.device}")
        # Initialize model
        self.model = None
        self.build_model()
        # Initialize training history
        self.history = {
            'loss': [],
            'val_loss': [],
            'accuracy': [],
            'val_accuracy': []
        }
    def build_model(self):
        """Build the CNN model architecture"""
        logger.info(f"Building PyTorch CNN model with window_size={self.window_size}, "
                   f"num_features={self.num_features}, output_size={self.output_size}")
        self.model = CNNPyTorch(
            input_shape=(self.window_size, self.num_features),
            output_size=self.output_size
        ).to(self.device)
        # Initialize optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        # Initialize loss function based on output size
        if self.output_size == 1:
            self.criterion = nn.BCELoss()  # Binary classification
        elif self.output_size > 1:
            self.criterion = nn.CrossEntropyLoss()  # Multi-class classification
        else:
            self.criterion = nn.MSELoss()  # Regression
        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
    def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
        """
        Train the CNN model.
        Args:
            X_train: Training input data
            y_train: Training target data
            X_val: Validation input data
            y_val: Validation target data
            batch_size: Batch size for training
            epochs: Number of training epochs
        Returns:
            Training history
        """
        logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, "
                   f"batch_size={batch_size}, epochs={epochs}")
        # Convert numpy arrays to PyTorch tensors
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
        # Handle different output sizes for y_train
        if self.output_size == 1:
            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
        else:
            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
        # Create DataLoader for training data
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        # Create DataLoader for validation data if provided
        if X_val is not None and y_val is not None:
            X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
            if self.output_size == 1:
                y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
            else:
                y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
            val_loader = DataLoader(val_dataset, batch_size=batch_size)
        else:
            val_loader = None
        # Training loop
        for epoch in range(epochs):
            # Training phase
            self.model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            for inputs, targets in train_loader:
                # Zero the parameter gradients
                self.optimizer.zero_grad()
                # Forward pass
                outputs = self.model(inputs)
                # Calculate loss
                if self.output_size == 1:
                    loss = self.criterion(outputs, targets.unsqueeze(1))
                else:
                    loss = self.criterion(outputs, targets)
                # Backward pass and optimize
                loss.backward()
                self.optimizer.step()
                # Statistics
                running_loss += loss.item()
                if self.output_size > 1:
                    _, predicted = torch.max(outputs, 1)
                    total += targets.size(0)
                    correct += (predicted == targets).sum().item()
            epoch_loss = running_loss / len(train_loader)
            epoch_acc = correct / total if total > 0 else 0
            # Validation phase
            if val_loader is not None:
                val_loss, val_acc = self._validate(val_loader)
                logger.info(f"Epoch {epoch+1}/{epochs} - "
                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
                           f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
                # Update history
                self.history['loss'].append(epoch_loss)
                self.history['accuracy'].append(epoch_acc)
                self.history['val_loss'].append(val_loss)
                self.history['val_accuracy'].append(val_acc)
            else:
                logger.info(f"Epoch {epoch+1}/{epochs} - "
                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
                # Update history without validation
                self.history['loss'].append(epoch_loss)
                self.history['accuracy'].append(epoch_acc)
        logger.info("Training completed")
        return self.history
    def _validate(self, val_loader):
        """Validate the model using the validation set"""
        self.model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                # Forward pass
                outputs = self.model(inputs)
                # Calculate loss
                if self.output_size == 1:
                    loss = self.criterion(outputs, targets.unsqueeze(1))
                else:
                    loss = self.criterion(outputs, targets)
                val_loss += loss.item()
                # Calculate accuracy
                if self.output_size > 1:
                    _, predicted = torch.max(outputs, 1)
                    total += targets.size(0)
                    correct += (predicted == targets).sum().item()
        return val_loss / len(val_loader), correct / total if total > 0 else 0
    def evaluate(self, X_test, y_test):
        """
        Evaluate the model on test data.
        Args:
            X_test: Test input data
            y_test: Test target data
        Returns:
            dict: Evaluation metrics
        """
        logger.info(f"Evaluating model on {len(X_test)} samples")
        # Convert to PyTorch tensors
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
        # Get predictions
        self.model.eval()
        with torch.no_grad():
            y_pred = self.model(X_test_tensor)
            if self.output_size > 1:
                _, y_pred_class = torch.max(y_pred, 1)
                y_pred_class = y_pred_class.cpu().numpy()
            else:
                y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
        # Calculate metrics
        if self.output_size > 1:
            accuracy = accuracy_score(y_test, y_pred_class)
            precision = precision_score(y_test, y_pred_class, average='weighted')
            recall = recall_score(y_test, y_pred_class, average='weighted')
            f1 = f1_score(y_test, y_pred_class, average='weighted')
            metrics = {
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1
            }
        else:
            accuracy = accuracy_score(y_test, y_pred_class)
            precision = precision_score(y_test, y_pred_class)
            recall = recall_score(y_test, y_pred_class)
            f1 = f1_score(y_test, y_pred_class)
            metrics = {
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1
            }
        logger.info(f"Evaluation metrics: {metrics}")
        return metrics
    def predict(self, X):
        """
        Make predictions with the model.
        Args:
            X: Input data
        Returns:
            Predictions
        """
        # Convert to PyTorch tensor
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        # Get predictions
        self.model.eval()
        with torch.no_grad():
            predictions = self.model(X_tensor)
            if self.output_size > 1:
                # Multi-class classification
                probs = predictions.cpu().numpy()
                _, class_preds = torch.max(predictions, 1)
                class_preds = class_preds.cpu().numpy()
                return class_preds, probs
            else:
                # Binary classification or regression
                preds = predictions.cpu().numpy()
                if self.output_size == 1:
                    # Binary classification
                    class_preds = (preds > 0.5).astype(int)
                    return class_preds.flatten(), preds.flatten()
                else:
                    # Regression
                    return preds.flatten(), None
    def save(self, filepath):
        """
        Save the model to a file.
        Args:
            filepath: Path to save the model
        """
        # Create directory if it doesn't exist
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        # Save the model state
        model_state = {
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'history': self.history,
            'window_size': self.window_size,
            'num_features': self.num_features,
            'output_size': self.output_size,
            'timeframes': self.timeframes
        }
        torch.save(model_state, f"{filepath}.pt")
        logger.info(f"Model saved to {filepath}.pt")
    def load(self, filepath):
        """
        Load the model from a file.
        Args:
            filepath: Path to load the model from
        """
        # Check if file exists
        if not os.path.exists(f"{filepath}.pt"):
            logger.error(f"Model file {filepath}.pt not found")
            return False
        # Load the model state
        model_state = torch.load(f"{filepath}.pt", map_location=self.device)
        # Update model parameters
        self.window_size = model_state['window_size']
        self.num_features = model_state['num_features']
        self.output_size = model_state['output_size']
        self.timeframes = model_state['timeframes']
        # Rebuild the model
        self.build_model()
        # Load the model state
        self.model.load_state_dict(model_state['model_state_dict'])
        self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
        self.history = model_state['history']
        logger.info(f"Model loaded from {filepath}.pt")
        return True
    def plot_training_history(self):
        """Plot the training history"""
        if not self.history['loss']:
            logger.warning("No training history to plot")
            return
        plt.figure(figsize=(12, 4))
        # Plot loss
        plt.subplot(1, 2, 1)
        plt.plot(self.history['loss'], label='Training Loss')
        if 'val_loss' in self.history and self.history['val_loss']:
            plt.plot(self.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend()
        # Plot accuracy
        plt.subplot(1, 2, 2)
        plt.plot(self.history['accuracy'], label='Training Accuracy')
        if 'val_accuracy' in self.history and self.history['val_accuracy']:
            plt.plot(self.history['val_accuracy'], label='Validation Accuracy')
        plt.title('Model Accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend()
        # Save the plot
        os.makedirs('plots', exist_ok=True)
        plt.savefig(os.path.join('plots', f"cnn_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"))
        plt.close()
        logger.info("Training history plots saved to plots directory")
    def extract_hidden_features(self, X):
        """
        Extract hidden features from the model.
        Args:
            X: Input data
        Returns:
            Hidden features
        """
        # Convert to PyTorch tensor
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        # Forward pass through the model up to the last hidden layer
        self.model.eval()
        with torch.no_grad():
            # Get features before the output layer
            x_t = X_tensor.transpose(1, 2)
            # Process through parallel conv paths
            conv_outputs = []
            for path in self.model.conv_paths:
                conv_outputs.append(path(x_t))
            # Concatenate conv outputs
            conv_concat = torch.cat(conv_outputs, dim=1)
            # Transpose back for LSTM
            conv_concat = conv_concat.transpose(1, 2)
            # LSTM processing
            lstm_out, _ = self.model.lstm(conv_concat)
            # Flatten
            flattened = self.model.flatten(lstm_out)
            # Dense processing
            hidden_features = self.model.dense1(flattened)
        return hidden_features.cpu().numpy() 
--- a/NN/models/transformer_model.py
+++ b/NN/models/transformer_model.py
--- a/NN/models/transformer_model_pytorch.py
+++ b/NN/models/transformer_model_pytorch.py
@ -0,0 +1,653 @@
 #!/usr/bin/env python3
 """
 Transformer Model - PyTorch Implementation
 This module implements a Transformer model using PyTorch for time series analysis.
 The model consists of a Transformer encoder and a Mixture of Experts model.
 """
 import os
 import logging
 import numpy as np
 import matplotlib.pyplot as plt
 from datetime import datetime
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader, TensorDataset
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 # Configure logging
 logger = logging.getLogger(__name__)
 class TransformerBlock(nn.Module):
    """Transformer Block with self-attention mechanism"""
    def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.attention = nn.MultiheadAttention(
            embed_dim=input_dim,
            num_heads=num_heads,
            dropout=dropout,
            batch_first=True
        )
        self.feed_forward = nn.Sequential(
            nn.Linear(input_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, input_dim)
        )
        self.layernorm1 = nn.LayerNorm(input_dim)
        self.layernorm2 = nn.LayerNorm(input_dim)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
    def forward(self, x):
        # Self-attention
        attn_output, _ = self.attention(x, x, x)
        x = x + self.dropout1(attn_output)
        x = self.layernorm1(x)
        # Feed forward
        ff_output = self.feed_forward(x)
        x = x + self.dropout2(ff_output)
        x = self.layernorm2(x)
        return x
 class TransformerModelPyTorch(nn.Module):
    """PyTorch Transformer model for time series analysis"""
    def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
        """
        Initialize the Transformer model.
        Args:
            input_shape (tuple): Shape of input data (window_size, features)
            output_size (int): Size of output (1 for regression, 3 for classification)
            num_heads (int): Number of attention heads
            ff_dim (int): Feed forward dimension
            num_transformer_blocks (int): Number of transformer blocks
        """
        super(TransformerModelPyTorch, self).__init__()
        window_size, num_features = input_shape
        # Positional encoding
        self.pos_encoding = nn.Parameter(
            torch.zeros(1, window_size, num_features),
            requires_grad=True
        )
        # Transformer blocks
        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(
                input_dim=num_features,
                num_heads=num_heads,
                ff_dim=ff_dim
            ) for _ in range(num_transformer_blocks)
        ])
        # Global average pooling
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        # Dense layers
        self.dense = nn.Sequential(
            nn.Linear(num_features, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3),
            nn.Linear(64, output_size)
        )
        # Activation based on output size
        if output_size == 1:
            self.activation = nn.Sigmoid()  # Binary classification or regression
        elif output_size > 1:
            self.activation = nn.Softmax(dim=1)  # Multi-class classification
        else:
            self.activation = nn.Identity()  # No activation
    def forward(self, x):
        """
        Forward pass through the network.
        Args:
            x: Input tensor of shape [batch_size, window_size, features]
        Returns:
            Output tensor of shape [batch_size, output_size]
        """
        # Add positional encoding
        x = x + self.pos_encoding
        # Apply transformer blocks
        for transformer_block in self.transformer_blocks:
            x = transformer_block(x)
        # Global average pooling
        x = x.transpose(1, 2)  # [batch, features, window]
        x = self.global_avg_pool(x)  # [batch, features, 1]
        x = x.squeeze(-1)  # [batch, features]
        # Dense layers
        x = self.dense(x)
        # Apply activation
        return self.activation(x)
 class TransformerModelPyTorchWrapper:
    """
    Transformer model wrapper class for time series analysis using PyTorch.
    This class provides methods for building, training, evaluating, and making
    predictions with the Transformer model.
    """
    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
        """
        Initialize the Transformer model.
        Args:
            window_size (int): Size of the input window
            num_features (int): Number of features in the input data
            output_size (int): Size of the output (1 for regression, 3 for classification)
            timeframes (list): List of timeframes used (for logging)
        """
        self.window_size = window_size
        self.num_features = num_features
        self.output_size = output_size
        self.timeframes = timeframes or []
        # Determine device (GPU or CPU)
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        logger.info(f"Using device: {self.device}")
        # Initialize model
        self.model = None
        self.build_model()
        # Initialize training history
        self.history = {
            'loss': [],
            'val_loss': [],
            'accuracy': [],
            'val_accuracy': []
        }
    def build_model(self):
        """Build the Transformer model architecture"""
        logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
                   f"num_features={self.num_features}, output_size={self.output_size}")
        self.model = TransformerModelPyTorch(
            input_shape=(self.window_size, self.num_features),
            output_size=self.output_size
        ).to(self.device)
        # Initialize optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        # Initialize loss function based on output size
        if self.output_size == 1:
            self.criterion = nn.BCELoss()  # Binary classification
        elif self.output_size > 1:
            self.criterion = nn.CrossEntropyLoss()  # Multi-class classification
        else:
            self.criterion = nn.MSELoss()  # Regression
        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
    def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
        """
        Train the Transformer model.
        Args:
            X_train: Training input data
            y_train: Training target data
            X_val: Validation input data
            y_val: Validation target data
            batch_size: Batch size for training
            epochs: Number of training epochs
        Returns:
            Training history
        """
        logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
                   f"batch_size={batch_size}, epochs={epochs}")
        # Convert numpy arrays to PyTorch tensors
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
        # Handle different output sizes for y_train
        if self.output_size == 1:
            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
        else:
            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
        # Create DataLoader for training data
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        # Create DataLoader for validation data if provided
        if X_val is not None and y_val is not None:
            X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
            if self.output_size == 1:
                y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
            else:
                y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
            val_loader = DataLoader(val_dataset, batch_size=batch_size)
        else:
            val_loader = None
        # Training loop
        for epoch in range(epochs):
            # Training phase
            self.model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            for inputs, targets in train_loader:
                # Zero the parameter gradients
                self.optimizer.zero_grad()
                # Forward pass
                outputs = self.model(inputs)
                # Calculate loss
                if self.output_size == 1:
                    loss = self.criterion(outputs, targets.unsqueeze(1))
                else:
                    loss = self.criterion(outputs, targets)
                # Backward pass and optimize
                loss.backward()
                self.optimizer.step()
                # Statistics
                running_loss += loss.item()
                if self.output_size > 1:
                    _, predicted = torch.max(outputs, 1)
                    total += targets.size(0)
                    correct += (predicted == targets).sum().item()
            epoch_loss = running_loss / len(train_loader)
            epoch_acc = correct / total if total > 0 else 0
            # Validation phase
            if val_loader is not None:
                val_loss, val_acc = self._validate(val_loader)
                logger.info(f"Epoch {epoch+1}/{epochs} - "
                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
                           f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
                # Update history
                self.history['loss'].append(epoch_loss)
                self.history['accuracy'].append(epoch_acc)
                self.history['val_loss'].append(val_loss)
                self.history['val_accuracy'].append(val_acc)
            else:
                logger.info(f"Epoch {epoch+1}/{epochs} - "
                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
                # Update history without validation
                self.history['loss'].append(epoch_loss)
                self.history['accuracy'].append(epoch_acc)
        logger.info("Training completed")
        return self.history
    def _validate(self, val_loader):
        """Validate the model using the validation set"""
        self.model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                # Forward pass
                outputs = self.model(inputs)
                # Calculate loss
                if self.output_size == 1:
                    loss = self.criterion(outputs, targets.unsqueeze(1))
                else:
                    loss = self.criterion(outputs, targets)
                val_loss += loss.item()
                # Calculate accuracy
                if self.output_size > 1:
                    _, predicted = torch.max(outputs, 1)
                    total += targets.size(0)
                    correct += (predicted == targets).sum().item()
        return val_loss / len(val_loader), correct / total if total > 0 else 0
    def evaluate(self, X_test, y_test):
        """
        Evaluate the model on test data.
        Args:
            X_test: Test input data
            y_test: Test target data
        Returns:
            dict: Evaluation metrics
        """
        logger.info(f"Evaluating model on {len(X_test)} samples")
        # Convert to PyTorch tensors
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
        # Get predictions
        self.model.eval()
        with torch.no_grad():
            y_pred = self.model(X_test_tensor)
            if self.output_size > 1:
                _, y_pred_class = torch.max(y_pred, 1)
                y_pred_class = y_pred_class.cpu().numpy()
            else:
                y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
        # Calculate metrics
        if self.output_size > 1:
            accuracy = accuracy_score(y_test, y_pred_class)
            precision = precision_score(y_test, y_pred_class, average='weighted')
            recall = recall_score(y_test, y_pred_class, average='weighted')
            f1 = f1_score(y_test, y_pred_class, average='weighted')
            metrics = {
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1
            }
        else:
            accuracy = accuracy_score(y_test, y_pred_class)
            precision = precision_score(y_test, y_pred_class)
            recall = recall_score(y_test, y_pred_class)
            f1 = f1_score(y_test, y_pred_class)
            metrics = {
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1
            }
        logger.info(f"Evaluation metrics: {metrics}")
        return metrics
    def predict(self, X):
        """
        Make predictions with the model.
        Args:
            X: Input data
        Returns:
            Predictions
        """
        # Convert to PyTorch tensor
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        # Get predictions
        self.model.eval()
        with torch.no_grad():
            predictions = self.model(X_tensor)
            if self.output_size > 1:
                # Multi-class classification
                probs = predictions.cpu().numpy()
                _, class_preds = torch.max(predictions, 1)
                class_preds = class_preds.cpu().numpy()
                return class_preds, probs
            else:
                # Binary classification or regression
                preds = predictions.cpu().numpy()
                if self.output_size == 1:
                    # Binary classification
                    class_preds = (preds > 0.5).astype(int)
                    return class_preds.flatten(), preds.flatten()
                else:
                    # Regression
                    return preds.flatten(), None
    def save(self, filepath):
        """
        Save the model to a file.
        Args:
            filepath: Path to save the model
        """
        # Create directory if it doesn't exist
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        # Save the model state
        model_state = {
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'history': self.history,
            'window_size': self.window_size,
            'num_features': self.num_features,
            'output_size': self.output_size,
            'timeframes': self.timeframes
        }
        torch.save(model_state, f"{filepath}.pt")
        logger.info(f"Model saved to {filepath}.pt")
    def load(self, filepath):
        """
        Load the model from a file.
        Args:
            filepath: Path to load the model from
        """
        # Check if file exists
        if not os.path.exists(f"{filepath}.pt"):
            logger.error(f"Model file {filepath}.pt not found")
            return False
        # Load the model state
        model_state = torch.load(f"{filepath}.pt", map_location=self.device)
        # Update model parameters
        self.window_size = model_state['window_size']
        self.num_features = model_state['num_features']
        self.output_size = model_state['output_size']
        self.timeframes = model_state['timeframes']
        # Rebuild the model
        self.build_model()
        # Load the model state
        self.model.load_state_dict(model_state['model_state_dict'])
        self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
        self.history = model_state['history']
        logger.info(f"Model loaded from {filepath}.pt")
        return True
 class MixtureOfExpertsModelPyTorch:
    """
    Mixture of Experts model implementation using PyTorch.
    This model combines predictions from multiple models (experts) using a 
    learned weighting scheme.
    """
    def __init__(self, output_size=3, timeframes=None):
        """
        Initialize the Mixture of Experts model.
        Args:
            output_size (int): Size of the output (1 for regression, 3 for classification)
            timeframes (list): List of timeframes used (for logging)
        """
        self.output_size = output_size
        self.timeframes = timeframes or []
        self.experts = {}
        self.expert_weights = {}
        # Determine device (GPU or CPU)
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        logger.info(f"Using device: {self.device}")
        # Initialize model and training history
        self.model = None
        self.history = {
            'loss': [],
            'val_loss': [],
            'accuracy': [],
            'val_accuracy': []
        }
    def add_expert(self, name, model):
        """
        Add an expert model.
        Args:
            name (str): Name of the expert
            model: Expert model
        """
        self.experts[name] = model
        logger.info(f"Added expert: {name}")
    def predict(self, X):
        """
        Make predictions using all experts and combine them.
        Args:
            X: Input data
        Returns:
            Combined predictions
        """
        if not self.experts:
            logger.error("No experts added to the MoE model")
            return None
        # Get predictions from each expert
        expert_predictions = {}
        for name, expert in self.experts.items():
            pred, _ = expert.predict(X)
            expert_predictions[name] = pred
        # Combine predictions based on weights
        final_pred = None
        for name, pred in expert_predictions.items():
            weight = self.expert_weights.get(name, 1.0 / len(self.experts))
            if final_pred is None:
                final_pred = weight * pred
            else:
                final_pred += weight * pred
        # For classification, convert to class indices
        if self.output_size > 1:
            # Get class with highest probability
            class_pred = np.argmax(final_pred, axis=1)
            return class_pred, final_pred
        else:
            # Binary classification
            class_pred = (final_pred > 0.5).astype(int)
            return class_pred, final_pred
    def evaluate(self, X_test, y_test):
        """
        Evaluate the model on test data.
        Args:
            X_test: Test input data
            y_test: Test target data
        Returns:
            dict: Evaluation metrics
        """
        logger.info(f"Evaluating MoE model on {len(X_test)} samples")
        # Get predictions
        y_pred_class, _ = self.predict(X_test)
        # Calculate metrics
        if self.output_size > 1:
            accuracy = accuracy_score(y_test, y_pred_class)
            precision = precision_score(y_test, y_pred_class, average='weighted')
            recall = recall_score(y_test, y_pred_class, average='weighted')
            f1 = f1_score(y_test, y_pred_class, average='weighted')
            metrics = {
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1
            }
        else:
            accuracy = accuracy_score(y_test, y_pred_class)
            precision = precision_score(y_test, y_pred_class)
            recall = recall_score(y_test, y_pred_class)
            f1 = f1_score(y_test, y_pred_class)
            metrics = {
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1
            }
        logger.info(f"MoE evaluation metrics: {metrics}")
        return metrics
    def save(self, filepath):
        """
        Save the model weights to a file.
        Args:
            filepath: Path to save the model
        """
        # Create directory if it doesn't exist
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        # Save the model state
        model_state = {
            'expert_weights': self.expert_weights,
            'output_size': self.output_size,
            'timeframes': self.timeframes
        }
        torch.save(model_state, f"{filepath}_moe.pt")
        logger.info(f"MoE model saved to {filepath}_moe.pt")
    def load(self, filepath):
        """
        Load the model from a file.
        Args:
            filepath: Path to load the model from
        """
        # Check if file exists
        if not os.path.exists(f"{filepath}_moe.pt"):
            logger.error(f"MoE model file {filepath}_moe.pt not found")
            return False
        # Load the model state
        model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
        # Update model parameters
        self.expert_weights = model_state['expert_weights']
        self.output_size = model_state['output_size']
        self.timeframes = model_state['timeframes']
        logger.info(f"MoE model loaded from {filepath}_moe.pt")
        return True 
--- a/NN/requirements.txt
+++ b/NN/requirements.txt
@ -1,13 +1,22 @@
-tensorflow>=2.5.0
+# Main dependencies
 numpy>=1.19.5
 pandas>=1.3.0
 matplotlib>=3.4.2
 scikit-learn>=0.24.2
-tensorflow-addons>=0.13.0
+
-plotly>=5.1.0
+# PyTorch (primary framework)
-h5py>=3.1.0
+torch
-tqdm>=4.61.1
+torchvision
-pyyaml>=5.4.1
+
-tensorboard>=2.5.0
+# TensorFlow (optional)
-ccxt>=1.50.0
+# tensorflow>=2.5.0
-requests>=2.25.1 
+# tensorflow-addons>=0.13.0
 # Additional dependencies
 plotly
 h5py
 tqdm
 pyyaml
 tensorboard
 ccxt
 requests
--- a/NN/start_tensorboard.py
+++ b/NN/start_tensorboard.py
@ -0,0 +1,88 @@
 #!/usr/bin/env python
 """
 Start TensorBoard for monitoring neural network training
 """
 import os
 import sys
 import subprocess
 import webbrowser
 from time import sleep
 def start_tensorboard(logdir="NN/models/saved/logs", port=6006, open_browser=True):
    """
    Start TensorBoard in a subprocess
    Args:
        logdir: Directory containing TensorBoard logs
        port: Port to run TensorBoard on
        open_browser: Whether to open a browser automatically
    """
    # Make sure the log directory exists
    os.makedirs(logdir, exist_ok=True)
    # Create command
    cmd = [
        sys.executable,
        "-m",
        "tensorboard.main",
        f"--logdir={logdir}",
        f"--port={port}",
        "--bind_all"
    ]
    print(f"Starting TensorBoard with logs from {logdir} on port {port}")
    print(f"Command: {' '.join(cmd)}")
    # Start TensorBoard in a subprocess
    process = subprocess.Popen(
        cmd, 
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True
    )
    # Wait for TensorBoard to start up
    for line in process.stdout:
        print(line.strip())
        if "TensorBoard" in line and "http://" in line:
            # TensorBoard is running, extract the URL
            url = None
            for part in line.split():
                if part.startswith(("http://", "https://")):
                    url = part
                    break
            # Open browser if requested and URL found
            if open_browser and url:
                print(f"Opening TensorBoard in browser: {url}")
                webbrowser.open(url)
            break
    # Return the process for the caller to manage
    return process
 if __name__ == "__main__":
    import argparse
    # Parse command line arguments
    parser = argparse.ArgumentParser(description="Start TensorBoard for NN training visualization")
    parser.add_argument("--logdir", default="NN/models/saved/logs", help="Directory containing TensorBoard logs")
    parser.add_argument("--port", type=int, default=6006, help="Port to run TensorBoard on")
    parser.add_argument("--no-browser", action="store_true", help="Don't open browser automatically")
    args = parser.parse_args()
    # Start TensorBoard
    process = start_tensorboard(args.logdir, args.port, not args.no_browser)
    try:
        # Keep the script running until Ctrl+C
        print("TensorBoard is running. Press Ctrl+C to stop.")
        while True:
            sleep(1)
    except KeyboardInterrupt:
        print("Stopping TensorBoard...")
        process.terminate()
        process.wait() 
--- a/NN/utils/pycache/init.cpython-312.pyc
+++ b/NN/utils/pycache/init.cpython-312.pyc
--- a/NN/utils/pycache/data_interface.cpython-312.pyc
+++ b/NN/utils/pycache/data_interface.cpython-312.pyc
--- a/NN/utils/data_interface.py
+++ b/NN/utils/data_interface.py
@ -0,0 +1,390 @@
 """
 Data Interface for Neural Network Trading System
 This module provides functionality to fetch, process, and prepare data for the neural network models.
 """
 import os
 import logging
 import numpy as np
 import pandas as pd
 from datetime import datetime, timedelta
 import json
 import pickle
 from sklearn.preprocessing import MinMaxScaler
 logger = logging.getLogger(__name__)
 class DataInterface:
    """
    Handles data collection, processing, and preparation for neural network models.
    This class is responsible for:
    1. Fetching historical data
    2. Preprocessing data for neural network input
    3. Generating training datasets
    4. Handling real-time data integration
    """
    def __init__(self, symbol="BTC/USDT", timeframes=None, data_dir="NN/data"):
        """
        Initialize the data interface.
        Args:
            symbol (str): Trading pair symbol (e.g., "BTC/USDT")
            timeframes (list): List of timeframes to use (e.g., ['1m', '5m', '1h', '4h', '1d'])
            data_dir (str): Directory to store/load datasets
        """
        self.symbol = symbol
        self.timeframes = timeframes or ['1h', '4h', '1d']
        self.data_dir = data_dir
        self.scalers = {}  # Store scalers for each timeframe
        # Create data directory if it doesn't exist
        os.makedirs(self.data_dir, exist_ok=True)
        # Initialize empty dataframes for each timeframe
        self.dataframes = {tf: None for tf in self.timeframes}
        logger.info(f"DataInterface initialized for {symbol} with timeframes {timeframes}")
    def get_historical_data(self, timeframe='1h', n_candles=1000, use_cache=True):
        """
        Fetch historical price data for a given timeframe.
        Args:
            timeframe (str): Timeframe to fetch data for
            n_candles (int): Number of candles to fetch
            use_cache (bool): Whether to use cached data if available
        Returns:
            pd.DataFrame: DataFrame with OHLCV data
        """
        cache_file = os.path.join(self.data_dir, f"{self.symbol.replace('/', '_')}_{timeframe}.csv")
        # Check if cached data exists and is recent
        if use_cache and os.path.exists(cache_file):
            try:
                df = pd.read_csv(cache_file, parse_dates=['timestamp'])
                # If we have enough data and it's recent, use it
                if len(df) >= n_candles:
                    logger.info(f"Using cached data for {self.symbol} {timeframe} ({len(df)} candles)")
                    self.dataframes[timeframe] = df
                    return df.tail(n_candles)
            except Exception as e:
                logger.error(f"Error reading cached data: {str(e)}")
        # If we get here, we need to fetch data
        # For now, we'll use a placeholder for fetching data from an exchange
        try:
            # In a real implementation, we would fetch data from an exchange or API here
            # For this example, we'll create dummy data if we can't load from cache
            logger.info(f"Fetching historical data for {self.symbol} {timeframe}")
            # Placeholder for real data fetching
            # In a real implementation, this would be replaced with API calls
            self._fetch_data_from_exchange(timeframe, n_candles)
            # Save to cache
            if self.dataframes[timeframe] is not None:
                self.dataframes[timeframe].to_csv(cache_file, index=False)
                return self.dataframes[timeframe]
            else:
                # Create dummy data as fallback
                logger.warning(f"Could not fetch data for {self.symbol} {timeframe}, using dummy data")
                df = self._create_dummy_data(timeframe, n_candles)
                self.dataframes[timeframe] = df
                return df
        except Exception as e:
            logger.error(f"Error fetching data: {str(e)}")
            return None
    def _fetch_data_from_exchange(self, timeframe, n_candles):
        """
        Placeholder method for fetching data from an exchange.
        In a real implementation, this would connect to an exchange API.
        """
        # This is a placeholder - in a real implementation this would make API calls
        # to a cryptocurrency exchange to fetch OHLCV data
        # For now, just generate dummy data
        self.dataframes[timeframe] = self._create_dummy_data(timeframe, n_candles)
    def _create_dummy_data(self, timeframe, n_candles):
        """
        Create dummy OHLCV data for testing purposes.
        Args:
            timeframe (str): Timeframe to create data for
            n_candles (int): Number of candles to create
        Returns:
            pd.DataFrame: DataFrame with dummy OHLCV data
        """
        # Map timeframe to seconds
        tf_seconds = {
            '1m': 60,
            '5m': 300,
            '15m': 900,
            '1h': 3600,
            '4h': 14400,
            '1d': 86400
        }
        seconds = tf_seconds.get(timeframe, 3600)  # Default to 1h
        # Create timestamps
        end_time = datetime.now()
        timestamps = [end_time - timedelta(seconds=seconds * i) for i in range(n_candles)]
        timestamps.reverse()  # Oldest first
        # Generate random price data with realistic patterns
        np.random.seed(42)  # For reproducibility
        # Start price
        price = 50000  # For BTC/USDT
        prices = []
        volumes = []
        for i in range(n_candles):
            # Random walk with drift and volatility based on timeframe
            drift = 0.0001 * seconds  # Larger drift for larger timeframes
            volatility = 0.01 * np.sqrt(seconds / 3600)  # Scale volatility by sqrt of time
            # Daily/weekly patterns
            if timeframe in ['1d', '4h']:
                # Add some cyclical patterns
                cycle = np.sin(i / 7 * np.pi) * 0.02  # Weekly cycle
            else:
                cycle = np.sin(i / 24 * np.pi) * 0.01  # Daily cycle
            # Calculate price change with random walk + cycles
            price_change = price * (drift + volatility * np.random.randn() + cycle)
            price += price_change
            # Generate OHLC from the price
            open_price = price
            high_price = price * (1 + abs(0.005 * np.random.randn()))
            low_price = price * (1 - abs(0.005 * np.random.randn()))
            close_price = price * (1 + 0.002 * np.random.randn())
            # Ensure high >= open, close, low and low <= open, close
            high_price = max(high_price, open_price, close_price)
            low_price = min(low_price, open_price, close_price)
            # Generate volume (higher for larger price movements)
            volume = abs(price_change) * (10000 + 5000 * np.random.rand())
            prices.append((open_price, high_price, low_price, close_price))
            volumes.append(volume)
            # Update price for next iteration
            price = close_price
        # Create DataFrame
        df = pd.DataFrame(
            [(t, o, h, l, c, v) for t, (o, h, l, c), v in zip(timestamps, prices, volumes)],
            columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
        )
        return df
    def prepare_nn_input(self, timeframes=None, n_candles=500, window_size=20):
        """
        Prepare input data for neural network models.
        Args:
            timeframes (list): List of timeframes to use
            n_candles (int): Number of candles to fetch for each timeframe
            window_size (int): Size of the sliding window for feature creation
        Returns:
            tuple: (X, y, timestamps) where:
                X is the input features array with shape (n_samples, window_size, n_features)
                y is the target array with shape (n_samples,)
                timestamps is an array of timestamps for each sample
        """
        if timeframes is None:
            timeframes = self.timeframes
        # Get data for all requested timeframes
        dfs = {}
        for tf in timeframes:
            df = self.get_historical_data(timeframe=tf, n_candles=n_candles)
            if df is not None and not df.empty:
                dfs[tf] = df
        if not dfs:
            logger.error("No data available for feature creation")
            return None, None, None
        # For simplicity, we'll use just one timeframe for now
        # In a more complex implementation, we would merge multiple timeframes
        primary_tf = timeframes[0]
        if primary_tf not in dfs:
            logger.error(f"Primary timeframe {primary_tf} not available")
            return None, None, None
        df = dfs[primary_tf]
        # Create features
        X, y, timestamps = self._create_features(df, window_size)
        return X, y, timestamps
    def _create_features(self, df, window_size):
        """
        Create features from OHLCV data using a sliding window approach.
        Args:
            df (pd.DataFrame): DataFrame with OHLCV data
            window_size (int): Size of the sliding window
        Returns:
            tuple: (X, y, timestamps) where:
                X is the input features array
                y is the target array
                timestamps is an array of timestamps for each sample
        """
        # Extract OHLCV columns
        ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
        # Scale the data
        scaler = MinMaxScaler()
        ohlcv_scaled = scaler.fit_transform(ohlcv)
        # Store the scaler for later use
        timeframe = next((tf for tf in self.timeframes if self.dataframes.get(tf) is not None and 
                         self.dataframes[tf].equals(df)), 'unknown')
        self.scalers[timeframe] = scaler
        # Create sliding windows
        X = []
        y = []
        timestamps = []
        for i in range(len(ohlcv_scaled) - window_size):
            # Input: window_size candles of OHLCV data
            X.append(ohlcv_scaled[i:i+window_size])
            # Target: binary classification - price goes up (1) or down (0)
            # 1 if close price increases in the next candle, 0 otherwise
            price_change = ohlcv[i+window_size, 3] - ohlcv[i+window_size-1, 3]
            y.append(1 if price_change > 0 else 0)
            # Store timestamp for reference
            timestamps.append(df['timestamp'].iloc[i+window_size])
        return np.array(X), np.array(y), np.array(timestamps)
    def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
        """
        Generate and save a training dataset for neural network models.
        Args:
            timeframes (list): List of timeframes to use
            n_candles (int): Number of candles to fetch for each timeframe
            window_size (int): Size of the sliding window for feature creation
        Returns:
            dict: Dictionary of dataset file paths
        """
        if timeframes is None:
            timeframes = self.timeframes
        # Prepare inputs
        X, y, timestamps = self.prepare_nn_input(timeframes, n_candles, window_size)
        if X is None or y is None:
            logger.error("Failed to prepare input data for dataset")
            return None
        # Prepare output paths
        timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
        dataset_name = f"{self.symbol.replace('/', '_')}_{'_'.join(timeframes)}_{timestamp_str}"
        X_path = os.path.join(self.data_dir, f"{dataset_name}_X.npy")
        y_path = os.path.join(self.data_dir, f"{dataset_name}_y.npy")
        timestamps_path = os.path.join(self.data_dir, f"{dataset_name}_timestamps.npy")
        metadata_path = os.path.join(self.data_dir, f"{dataset_name}_metadata.json")
        # Save arrays
        np.save(X_path, X)
        np.save(y_path, y)
        np.save(timestamps_path, timestamps)
        # Save metadata
        metadata = {
            'symbol': self.symbol,
            'timeframes': timeframes,
            'window_size': window_size,
            'n_samples': len(X),
            'feature_shape': X.shape[1:],
            'created_at': datetime.now().isoformat(),
            'dataset_name': dataset_name
        }
        with open(metadata_path, 'w') as f:
            json.dump(metadata, f, indent=2)
        # Save scalers
        scaler_path = os.path.join(self.data_dir, f"{dataset_name}_scalers.pkl")
        with open(scaler_path, 'wb') as f:
            pickle.dump(self.scalers, f)
        # Return dataset info
        dataset_info = {
            'X_path': X_path,
            'y_path': y_path,
            'timestamps_path': timestamps_path,
            'metadata_path': metadata_path,
            'scaler_path': scaler_path
        }
        logger.info(f"Dataset generated and saved: {dataset_name}")
        return dataset_info
    def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
        """
        Prepare a single input sample from the most recent data for real-time inference.
        Args:
            timeframe (str): Timeframe to use
            n_candles (int): Number of recent candles to fetch
            window_size (int): Size of the sliding window
        Returns:
            tuple: (X, timestamp) where:
                X is the input features array with shape (1, window_size, n_features)
                timestamp is the timestamp of the most recent candle
        """
        # Get recent data
        df = self.get_historical_data(timeframe=timeframe, n_candles=n_candles, use_cache=False)
        if df is None or len(df) < window_size:
            logger.error(f"Not enough data for inference (need at least {window_size} candles)")
            return None, None
        # Extract features from the most recent window
        ohlcv = df[['open', 'high', 'low', 'close', 'volume']].tail(window_size).values
        # Scale the data
        if timeframe in self.scalers:
            # Use existing scaler
            scaler = self.scalers[timeframe]
        else:
            # Create new scaler
            scaler = MinMaxScaler()
            # Fit on all available data
            all_data = df[['open', 'high', 'low', 'close', 'volume']].values
            scaler.fit(all_data)
            self.scalers[timeframe] = scaler
        ohlcv_scaled = scaler.transform(ohlcv)
        # Reshape to (1, window_size, n_features)
        X = np.array([ohlcv_scaled])
        # Get timestamp of the most recent candle
        timestamp = df['timestamp'].iloc[-1]
        return X, timestamp 
--- a/run_nn.py
+++ b/run_nn.py
@ -0,0 +1,232 @@
 #!/usr/bin/env python3
 """
 Neural Network Training Runner Script
 This script runs the Neural Network Trading System with the existing conda environment.
 It detects which deep learning framework is available (TensorFlow or PyTorch) and
 adjusts the implementation accordingly.
 """
 import os
 import sys
 import subprocess
 import argparse
 import logging
 from pathlib import Path
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger('nn_runner')
 def detect_framework():
    """Detect which deep learning framework is available in the environment"""
    try:
        import torch
        torch_version = torch.__version__
        logger.info(f"PyTorch {torch_version} detected")
        return "pytorch", torch_version
    except ImportError:
        logger.warning("PyTorch not found in environment")
        try:
            import tensorflow as tf
            tf_version = tf.__version__
            logger.info(f"TensorFlow {tf_version} detected")
            return "tensorflow", tf_version
        except ImportError:
            logger.error("Neither PyTorch nor TensorFlow is available in the environment")
            return None, None
 def check_dependencies():
    """Check for required dependencies and return if they are met"""
    required_packages = ["numpy", "pandas", "matplotlib", "scikit-learn"]
    missing_packages = []
    for package in required_packages:
        try:
            __import__(package)
        except ImportError:
            missing_packages.append(package)
    if missing_packages:
        logger.warning(f"Missing required packages: {', '.join(missing_packages)}")
        return False
    return True
 def create_run_command(args, framework):
    """Create the command to run the neural network based on the available framework"""
    cmd = ["python", "-m", "NN.main"]
    # Add mode
    cmd.extend(["--mode", args.mode])
    # Add symbol
    if args.symbol:
        cmd.extend(["--symbol", args.symbol])
    # Add timeframes
    if args.timeframes:
        cmd.extend(["--timeframes"] + args.timeframes)
    # Add window size
    if args.window_size:
        cmd.extend(["--window-size", str(args.window_size)])
    # Add output size
    if args.output_size:
        cmd.extend(["--output-size", str(args.output_size)])
    # Add batch size
    if args.batch_size:
        cmd.extend(["--batch-size", str(args.batch_size)])
    # Add epochs
    if args.epochs:
        cmd.extend(["--epochs", str(args.epochs)])
    # Add model type
    if args.model_type:
        cmd.extend(["--model-type", args.model_type])
    # Add framework-specific flag
    cmd.extend(["--framework", framework])
    return cmd
 def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(description='Neural Network Trading System Runner')
    parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
                        help='Mode to run (train, predict, realtime)')
    parser.add_argument('--symbol', type=str, default='BTC/USDT',
                        help='Trading pair symbol')
    parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
                        help='Timeframes to use')
    parser.add_argument('--window-size', type=int, default=20,
                        help='Window size for input data')
    parser.add_argument('--output-size', type=int, default=3,
                        help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
    parser.add_argument('--batch-size', type=int, default=32,
                        help='Batch size for training')
    parser.add_argument('--epochs', type=int, default=100,
                        help='Number of epochs for training')
    parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
                        help='Model type to use')
    parser.add_argument('--conda-env', type=str, default='gpt-gpu',
                        help='Name of conda environment to use')
    parser.add_argument('--no-conda', action='store_true',
                        help='Do not use conda environment activation')
    parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
                        help='Deep learning framework to use (default: pytorch)')
    return parser.parse_args()
 def main():
    # Parse arguments
    args = parse_arguments()
    # Check if we should run with conda
    if not args.no_conda and args.conda_env:
        # Create conda activation command
        if sys.platform == 'win32':
            conda_cmd = f"conda activate {args.conda_env} && "
        else:
            conda_cmd = f"source activate {args.conda_env} && "
        logger.info(f"Running with conda environment: {args.conda_env}")
        # Create the run script
        script_path = Path("run_nn_in_conda.bat" if sys.platform == 'win32' else "run_nn_in_conda.sh")
        with open(script_path, 'w') as f:
            if sys.platform == 'win32':
                f.write("@echo off\n")
                f.write(f"call conda activate {args.conda_env}\n")
                f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
                if args.timeframes:
                    f.write(f" --timeframes {' '.join(args.timeframes)}")
                if args.window_size:
                    f.write(f" --window-size {args.window_size}")
                if args.output_size:
                    f.write(f" --output-size {args.output_size}")
                if args.batch_size:
                    f.write(f" --batch-size {args.batch_size}")
                if args.epochs:
                    f.write(f" --epochs {args.epochs}")
                if args.model_type:
                    f.write(f" --model-type {args.model_type}")
            else:
                f.write("#!/bin/bash\n")
                f.write(f"source activate {args.conda_env}\n")
                f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
                if args.timeframes:
                    f.write(f" --timeframes {' '.join(args.timeframes)}")
                if args.window_size:
                    f.write(f" --window-size {args.window_size}")
                if args.output_size:
                    f.write(f" --output-size {args.output_size}")
                if args.batch_size:
                    f.write(f" --batch-size {args.batch_size}")
                if args.epochs:
                    f.write(f" --epochs {args.epochs}")
                if args.model_type:
                    f.write(f" --model-type {args.model_type}")
        # Make script executable on Unix
        if sys.platform != 'win32':
            os.chmod(script_path, 0o755)
        # Run the script
        logger.info(f"Created script: {script_path}")
        logger.info("Run this script to execute the neural network with the conda environment")
        if sys.platform == 'win32':
            print("\nTo run the neural network, execute the following command:")
            print(f"   {script_path}")
        else:
            print("\nTo run the neural network, execute the following command:")
            print(f"   ./{script_path}")
    else:
        # Run directly without conda
        # First detect available framework
        framework, version = detect_framework()
        if framework is None:
            logger.error("Cannot run Neural Network - no deep learning framework available")
            return
        # Check dependencies
        if not check_dependencies():
            logger.error("Missing required dependencies - please install them first")
            return
        # Create command
        cmd = create_run_command(args, framework)
        # Run command
        logger.info(f"Running command: {' '.join(cmd)}")
        try:
            subprocess.run(cmd, check=True)
        except subprocess.CalledProcessError as e:
            logger.error(f"Error running neural network: {str(e)}")
        except Exception as e:
            logger.error(f"Error: {str(e)}")
 if __name__ == "__main__":
    main() 
--- a/run_nn_in_conda.bat
+++ b/run_nn_in_conda.bat
@ -0,0 +1,3 @@
@echo off
 call conda activate gpt-gpu
 python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs 100 --model-type cnn --framework pytorch
--- a/run_pytorch_nn.bat
+++ b/run_pytorch_nn.bat
@ -0,0 +1,50 @@
@echo off
 echo ============================================================
 echo Neural Network Trading System - PyTorch Implementation
 echo ============================================================
 call conda activate gpt-gpu
 REM Parse command-line arguments
 set MODE=train
 set MODEL_TYPE=cnn
 set SYMBOL=BTC/USDT
 set EPOCHS=100
 :parse
 if "%~1"=="" goto endparse
 if /i "%~1"=="--mode" (
    set MODE=%~2
    shift
    shift
    goto parse
 )
 if /i "%~1"=="--model" (
    set MODEL_TYPE=%~2
    shift
    shift
    goto parse
 )
 if /i "%~1"=="--symbol" (
    set SYMBOL=%~2
    shift
    shift
    goto parse
 )
 if /i "%~1"=="--epochs" (
    set EPOCHS=%~2
    shift
    shift
    goto parse
 )
 shift
 goto parse
 :endparse
 echo Running Neural Network in %MODE% mode with %MODEL_TYPE% model for %SYMBOL% for %EPOCHS% epochs
 python -m NN.main --mode %MODE% --symbol %SYMBOL% --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs %EPOCHS% --model-type %MODEL_TYPE% --framework pytorch
 echo ============================================================
 echo Run completed.
 echo ============================================================