new nn wip

2025-03-25 13:38:25 +02:00
parent 50eb50696b
commit 0042581275
18 changed files with 3358 additions and 294 deletions
--- a/NN/models/cnn_model.py
+++ b/NN/models/cnn_model.py
@@ -0,0 +1,560 @@
+"""
+Convolutional Neural Network for timeseries analysis
+
+This module implements a deep CNN model for cryptocurrency price analysis.
+The model uses multiple parallel convolutional pathways and LSTM layers
+to detect patterns at different time scales.
+"""
+
+import os
+import logging
+import numpy as np
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from tensorflow.keras.models import Model, load_model
+from tensorflow.keras.layers import (
+    Input, Conv1D, MaxPooling1D, Dense, Dropout, BatchNormalization,
+    LSTM, Bidirectional, Flatten, Concatenate, GlobalAveragePooling1D,
+    LeakyReLU, Attention
+)
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
+from tensorflow.keras.metrics import AUC
+from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
+import datetime
+import json
+
+logger = logging.getLogger(__name__)
+
+class CNNModel:
+    """
+    Convolutional Neural Network for time series analysis.
+    
+    This model uses a multi-pathway architecture with different filter sizes
+    to detect patterns at different time scales, combined with LSTM layers
+    for temporal dependencies.
+    """
+    
+    def __init__(self, input_shape=(20, 5), output_size=1, model_dir="NN/models/saved"):
+        """
+        Initialize the CNN model.
+        
+        Args:
+            input_shape (tuple): Shape of input data (sequence_length, features)
+            output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
+            model_dir (str): Directory to save trained models
+        """
+        self.input_shape = input_shape
+        self.output_size = output_size
+        self.model_dir = model_dir
+        self.model = None
+        self.history = None
+        
+        # Create model directory if it doesn't exist
+        os.makedirs(self.model_dir, exist_ok=True)
+        
+        logger.info(f"Initialized CNN model with input shape {input_shape} and output size {output_size}")
+    
+    def build_model(self, filters=(32, 64, 128), kernel_sizes=(3, 5, 7), 
+                   dropout_rate=0.3, learning_rate=0.001):
+        """
+        Build the CNN model architecture.
+        
+        Args:
+            filters (tuple): Number of filters for each convolutional pathway
+            kernel_sizes (tuple): Kernel sizes for each convolutional pathway
+            dropout_rate (float): Dropout rate for regularization
+            learning_rate (float): Learning rate for Adam optimizer
+            
+        Returns:
+            The compiled model
+        """
+        # Input layer
+        inputs = Input(shape=self.input_shape)
+        
+        # Multiple parallel convolutional pathways with different kernel sizes
+        # to capture patterns at different time scales
+        conv_layers = []
+        
+        for i, (filter_size, kernel_size) in enumerate(zip(filters, kernel_sizes)):
+            conv_path = Conv1D(
+                filters=filter_size,
+                kernel_size=kernel_size,
+                padding='same',
+                name=f'conv1d_{i+1}'
+            )(inputs)
+            conv_path = BatchNormalization()(conv_path)
+            conv_path = LeakyReLU(alpha=0.1)(conv_path)
+            conv_path = MaxPooling1D(pool_size=2, padding='same')(conv_path)
+            conv_path = Dropout(dropout_rate)(conv_path)
+            conv_layers.append(conv_path)
+        
+        # Merge convolutional pathways
+        if len(conv_layers) > 1:
+            merged = Concatenate()(conv_layers)
+        else:
+            merged = conv_layers[0]
+        
+        # Add another Conv1D layer after merging
+        x = Conv1D(filters=filters[-1], kernel_size=3, padding='same')(merged)
+        x = BatchNormalization()(x)
+        x = LeakyReLU(alpha=0.1)(x)
+        x = MaxPooling1D(pool_size=2, padding='same')(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Bidirectional LSTM for temporal dependencies
+        x = Bidirectional(LSTM(128, return_sequences=True))(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Attention mechanism to focus on important time steps
+        x = Bidirectional(LSTM(64, return_sequences=True))(x)
+        
+        # Global average pooling to reduce parameters
+        x = GlobalAveragePooling1D()(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Dense layers for final classification/regression
+        x = Dense(64, activation='relu')(x)
+        x = BatchNormalization()(x)
+        x = Dropout(dropout_rate)(x)
+        
+        # Output layer
+        if self.output_size == 1:
+            # Binary classification (up/down)
+            outputs = Dense(1, activation='sigmoid', name='output')(x)
+            loss = 'binary_crossentropy'
+            metrics = ['accuracy', AUC()]
+        elif self.output_size == 3:
+            # Multi-class classification (buy/hold/sell)
+            outputs = Dense(3, activation='softmax', name='output')(x)
+            loss = 'categorical_crossentropy'
+            metrics = ['accuracy']
+        else:
+            # Regression
+            outputs = Dense(self.output_size, activation='linear', name='output')(x)
+            loss = 'mse'
+            metrics = ['mae']
+        
+        # Create and compile model
+        self.model = Model(inputs=inputs, outputs=outputs)
+        
+        # Compile with Adam optimizer
+        self.model.compile(
+            optimizer=Adam(learning_rate=learning_rate),
+            loss=loss,
+            metrics=metrics
+        )
+        
+        # Log model summary
+        self.model.summary(print_fn=lambda x: logger.info(x))
+        
+        return self.model
+    
+    def train(self, X_train, y_train, batch_size=32, epochs=100, validation_split=0.2,
+             callbacks=None, class_weights=None):
+        """
+        Train the CNN model on the provided data.
+        
+        Args:
+            X_train (numpy.ndarray): Training features
+            y_train (numpy.ndarray): Training targets
+            batch_size (int): Batch size
+            epochs (int): Number of epochs
+            validation_split (float): Fraction of data to use for validation
+            callbacks (list): List of Keras callbacks
+            class_weights (dict): Class weights for imbalanced datasets
+            
+        Returns:
+            History object containing training metrics
+        """
+        if self.model is None:
+            self.build_model()
+        
+        # Default callbacks if none provided
+        if callbacks is None:
+            # Create a timestamp for model checkpoints
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            
+            callbacks = [
+                EarlyStopping(
+                    monitor='val_loss',
+                    patience=10,
+                    restore_best_weights=True
+                ),
+                ReduceLROnPlateau(
+                    monitor='val_loss',
+                    factor=0.5,
+                    patience=5,
+                    min_lr=1e-6
+                ),
+                ModelCheckpoint(
+                    filepath=os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5"),
+                    monitor='val_loss',
+                    save_best_only=True
+                )
+            ]
+        
+        # Check if y_train needs to be one-hot encoded for multi-class
+        if self.output_size == 3 and len(y_train.shape) == 1:
+            y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
+        
+        # Train the model
+        logger.info(f"Training CNN model with {len(X_train)} samples, batch size {batch_size}, epochs {epochs}")
+        self.history = self.model.fit(
+            X_train, y_train,
+            batch_size=batch_size,
+            epochs=epochs,
+            validation_split=validation_split,
+            callbacks=callbacks,
+            class_weight=class_weights,
+            verbose=2
+        )
+        
+        # Save the trained model
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_path = os.path.join(self.model_dir, f"cnn_model_final_{timestamp}.h5")
+        self.model.save(model_path)
+        logger.info(f"Model saved to {model_path}")
+        
+        # Save training history
+        history_path = os.path.join(self.model_dir, f"cnn_model_history_{timestamp}.json")
+        with open(history_path, 'w') as f:
+            # Convert numpy values to Python native types for JSON serialization
+            history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
+            json.dump(history_dict, f, indent=2)
+        
+        return self.history
+    
+    def evaluate(self, X_test, y_test, plot_results=False):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test (numpy.ndarray): Test features
+            y_test (numpy.ndarray): Test targets
+            plot_results (bool): Whether to plot evaluation results
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Convert y_test to one-hot encoding for multi-class
+        y_test_original = y_test.copy()
+        if self.output_size == 3 and len(y_test.shape) == 1:
+            y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)
+        
+        # Evaluate model
+        logger.info(f"Evaluating CNN model on {len(X_test)} samples")
+        eval_results = self.model.evaluate(X_test, y_test, verbose=0)
+        
+        metrics = {}
+        for metric, value in zip(self.model.metrics_names, eval_results):
+            metrics[metric] = value
+            logger.info(f"{metric}: {value:.4f}")
+        
+        # Get predictions
+        y_pred_prob = self.model.predict(X_test)
+        
+        # Different processing based on output type
+        if self.output_size == 1:
+            # Binary classification
+            y_pred = (y_pred_prob > 0.5).astype(int).flatten()
+            
+            # Classification report
+            report = classification_report(y_test, y_pred)
+            logger.info(f"Classification Report:\n{report}")
+            
+            # Confusion matrix
+            cm = confusion_matrix(y_test, y_pred)
+            logger.info(f"Confusion Matrix:\n{cm}")
+            
+            # ROC curve and AUC
+            fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
+            roc_auc = auc(fpr, tpr)
+            metrics['auc'] = roc_auc
+            
+            if plot_results:
+                self._plot_binary_results(y_test, y_pred, y_pred_prob, fpr, tpr, roc_auc)
+                
+        elif self.output_size == 3:
+            # Multi-class classification
+            y_pred = np.argmax(y_pred_prob, axis=1)
+            
+            # Classification report
+            report = classification_report(y_test_original, y_pred)
+            logger.info(f"Classification Report:\n{report}")
+            
+            # Confusion matrix
+            cm = confusion_matrix(y_test_original, y_pred)
+            logger.info(f"Confusion Matrix:\n{cm}")
+            
+            if plot_results:
+                self._plot_multiclass_results(y_test_original, y_pred, y_pred_prob)
+        
+        return metrics
+    
+    def predict(self, X):
+        """
+        Make predictions on new data.
+        
+        Args:
+            X (numpy.ndarray): Input features
+            
+        Returns:
+            tuple: (y_pred, y_proba) where:
+                y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
+                y_proba is the class probability
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Ensure X has the right shape
+        if len(X.shape) == 2:
+            # Single sample, add batch dimension
+            X = np.expand_dims(X, axis=0)
+        
+        # Get predictions
+        y_proba = self.model.predict(X)
+        
+        # Process based on output type
+        if self.output_size == 1:
+            # Binary classification
+            y_pred = (y_proba > 0.5).astype(int).flatten()
+            return y_pred, y_proba.flatten()
+        elif self.output_size == 3:
+            # Multi-class classification
+            y_pred = np.argmax(y_proba, axis=1)
+            return y_pred, y_proba
+        else:
+            # Regression
+            return y_proba, y_proba
+    
+    def save(self, filepath=None):
+        """
+        Save the model to disk.
+        
+        Args:
+            filepath (str): Path to save the model
+            
+        Returns:
+            str: Path where the model was saved
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built yet")
+        
+        if filepath is None:
+            # Create a default filepath with timestamp
+            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            filepath = os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5")
+        
+        self.model.save(filepath)
+        logger.info(f"Model saved to {filepath}")
+        return filepath
+    
+    def load(self, filepath):
+        """
+        Load a saved model from disk.
+        
+        Args:
+            filepath (str): Path to the saved model
+            
+        Returns:
+            The loaded model
+        """
+        self.model = load_model(filepath)
+        logger.info(f"Model loaded from {filepath}")
+        return self.model
+    
+    def extract_hidden_features(self, X):
+        """
+        Extract features from the last hidden layer of the CNN for transfer learning.
+        
+        Args:
+            X (numpy.ndarray): Input data
+            
+        Returns:
+            numpy.ndarray: Extracted features
+        """
+        if self.model is None:
+            raise ValueError("Model has not been built or trained yet")
+        
+        # Create a new model that outputs the features from the layer before the output
+        feature_layer_name = self.model.layers[-2].name
+        feature_extractor = Model(
+            inputs=self.model.input,
+            outputs=self.model.get_layer(feature_layer_name).output
+        )
+        
+        # Extract features
+        features = feature_extractor.predict(X)
+        
+        return features
+    
+    def _plot_binary_results(self, y_true, y_pred, y_proba, fpr, tpr, roc_auc):
+        """
+        Plot evaluation results for binary classification.
+        
+        Args:
+            y_true (numpy.ndarray): True labels
+            y_pred (numpy.ndarray): Predicted labels
+            y_proba (numpy.ndarray): Prediction probabilities
+            fpr (numpy.ndarray): False positive rates for ROC curve
+            tpr (numpy.ndarray): True positive rates for ROC curve
+            roc_auc (float): Area under ROC curve
+        """
+        plt.figure(figsize=(15, 5))
+        
+        # Confusion Matrix
+        plt.subplot(1, 3, 1)
+        cm = confusion_matrix(y_true, y_pred)
+        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
+        plt.title('Confusion Matrix')
+        plt.colorbar()
+        tick_marks = [0, 1]
+        plt.xticks(tick_marks, ['0', '1'])
+        plt.yticks(tick_marks, ['0', '1'])
+        plt.xlabel('Predicted Label')
+        plt.ylabel('True Label')
+        
+        # Add text annotations to confusion matrix
+        thresh = cm.max() / 2.
+        for i in range(cm.shape[0]):
+            for j in range(cm.shape[1]):
+                plt.text(j, i, format(cm[i, j], 'd'),
+                        horizontalalignment="center",
+                        color="white" if cm[i, j] > thresh else "black")
+        
+        # Histogram of prediction probabilities
+        plt.subplot(1, 3, 2)
+        plt.hist(y_proba[y_true == 0], alpha=0.5, label='Class 0')
+        plt.hist(y_proba[y_true == 1], alpha=0.5, label='Class 1')
+        plt.title('Prediction Probabilities')
+        plt.xlabel('Probability of Class 1')
+        plt.ylabel('Count')
+        plt.legend()
+        
+        # ROC Curve
+        plt.subplot(1, 3, 3)
+        plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.3f})')
+        plt.plot([0, 1], [0, 1], 'k--')
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.05])
+        plt.xlabel('False Positive Rate')
+        plt.ylabel('True Positive Rate')
+        plt.title('Receiver Operating Characteristic')
+        plt.legend(loc="lower right")
+        
+        plt.tight_layout()
+        
+        # Save figure
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        fig_path = os.path.join(self.model_dir, f"cnn_evaluation_{timestamp}.png")
+        plt.savefig(fig_path)
+        plt.close()
+        
+        logger.info(f"Evaluation plots saved to {fig_path}")
+    
+    def _plot_multiclass_results(self, y_true, y_pred, y_proba):
+        """
+        Plot evaluation results for multi-class classification.
+        
+        Args:
+            y_true (numpy.ndarray): True labels
+            y_pred (numpy.ndarray): Predicted labels
+            y_proba (numpy.ndarray): Prediction probabilities
+        """
+        plt.figure(figsize=(12, 5))
+        
+        # Confusion Matrix
+        plt.subplot(1, 2, 1)
+        cm = confusion_matrix(y_true, y_pred)
+        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
+        plt.title('Confusion Matrix')
+        plt.colorbar()
+        classes = ['BUY', 'HOLD', 'SELL']  # Assumes classes are 0, 1, 2
+        tick_marks = np.arange(len(classes))
+        plt.xticks(tick_marks, classes)
+        plt.yticks(tick_marks, classes)
+        plt.xlabel('Predicted Label')
+        plt.ylabel('True Label')
+        
+        # Add text annotations to confusion matrix
+        thresh = cm.max() / 2.
+        for i in range(cm.shape[0]):
+            for j in range(cm.shape[1]):
+                plt.text(j, i, format(cm[i, j], 'd'),
+                        horizontalalignment="center",
+                        color="white" if cm[i, j] > thresh else "black")
+        
+        # Class probability distributions
+        plt.subplot(1, 2, 2)
+        for i, cls in enumerate(classes):
+            plt.hist(y_proba[y_true == i, i], alpha=0.5, label=f'Class {cls}')
+        plt.title('Class Probability Distributions')
+        plt.xlabel('Probability')
+        plt.ylabel('Count')
+        plt.legend()
+        
+        plt.tight_layout()
+        
+        # Save figure
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        fig_path = os.path.join(self.model_dir, f"cnn_multiclass_evaluation_{timestamp}.png")
+        plt.savefig(fig_path)
+        plt.close()
+        
+        logger.info(f"Multiclass evaluation plots saved to {fig_path}")
+    
+    def plot_training_history(self):
+        """
+        Plot training history (loss and metrics).
+        
+        Returns:
+            str: Path to the saved plot
+        """
+        if self.history is None:
+            raise ValueError("Model has not been trained yet")
+        
+        plt.figure(figsize=(12, 5))
+        
+        # Plot loss
+        plt.subplot(1, 2, 1)
+        plt.plot(self.history.history['loss'], label='Training Loss')
+        if 'val_loss' in self.history.history:
+            plt.plot(self.history.history['val_loss'], label='Validation Loss')
+        plt.title('Model Loss')
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend()
+        
+        # Plot accuracy
+        plt.subplot(1, 2, 2)
+        
+        if 'accuracy' in self.history.history:
+            plt.plot(self.history.history['accuracy'], label='Training Accuracy')
+            if 'val_accuracy' in self.history.history:
+                plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
+            plt.title('Model Accuracy')
+            plt.ylabel('Accuracy')
+        elif 'mae' in self.history.history:
+            plt.plot(self.history.history['mae'], label='Training MAE')
+            if 'val_mae' in self.history.history:
+                plt.plot(self.history.history['val_mae'], label='Validation MAE')
+            plt.title('Model MAE')
+            plt.ylabel('MAE')
+        
+        plt.xlabel('Epoch')
+        plt.legend()
+        
+        plt.tight_layout()
+        
+        # Save figure
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        fig_path = os.path.join(self.model_dir, f"cnn_training_history_{timestamp}.png")
+        plt.savefig(fig_path)
+        plt.close()
+        
+        logger.info(f"Training history plot saved to {fig_path}")
+        return fig_path 
--- a/NN/models/cnn_model_pytorch.py
+++ b/NN/models/cnn_model_pytorch.py
@@ -0,0 +1,546 @@
+#!/usr/bin/env python3
+"""
+CNN Model - PyTorch Implementation
+
+This module implements a CNN model using PyTorch for time series analysis.
+The model consists of multiple convolutional pathways and LSTM layers.
+"""
+
+import os
+import logging
+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+class CNNPyTorch(nn.Module):
+    """PyTorch CNN model for time series analysis"""
+    
+    def __init__(self, input_shape, output_size=3):
+        """
+        Initialize the CNN model.
+        
+        Args:
+            input_shape (tuple): Shape of input data (window_size, features)
+            output_size (int): Size of output (1 for regression, 3 for classification)
+        """
+        super(CNNPyTorch, self).__init__()
+        
+        window_size, num_features = input_shape
+        
+        # Architecture parameters
+        filters = [32, 64, 128]
+        kernel_sizes = [3, 5, 7]
+        lstm_units = 100
+        dense_units = 64
+        dropout_rate = 0.3
+        
+        # Create parallel convolutional pathways
+        self.conv_paths = nn.ModuleList()
+        
+        for f, k in zip(filters, kernel_sizes):
+            path = nn.Sequential(
+                nn.Conv1d(num_features, f, kernel_size=k, padding='same'),
+                nn.ReLU(),
+                nn.BatchNorm1d(f),
+                nn.MaxPool1d(kernel_size=2, stride=1, padding=1),
+                nn.Dropout(dropout_rate)
+            )
+            self.conv_paths.append(path)
+        
+        # Calculate output size from conv paths
+        conv_output_size = sum(filters) * window_size
+        
+        # LSTM layer
+        self.lstm = nn.LSTM(
+            input_size=sum(filters),
+            hidden_size=lstm_units,
+            batch_first=True,
+            bidirectional=True
+        )
+        
+        # Dense layers
+        self.flatten = nn.Flatten()
+        self.dense1 = nn.Sequential(
+            nn.Linear(lstm_units * 2 * window_size, dense_units),
+            nn.ReLU(),
+            nn.BatchNorm1d(dense_units),
+            nn.Dropout(dropout_rate)
+        )
+        
+        # Output layer
+        self.output = nn.Linear(dense_units, output_size)
+        
+        # Activation based on output size
+        if output_size == 1:
+            self.activation = nn.Sigmoid()  # Binary classification or regression
+        elif output_size > 1:
+            self.activation = nn.Softmax(dim=1)  # Multi-class classification
+        else:
+            self.activation = nn.Identity()  # No activation
+    
+    def forward(self, x):
+        """
+        Forward pass through the network.
+        
+        Args:
+            x: Input tensor of shape [batch_size, window_size, features]
+            
+        Returns:
+            Output tensor of shape [batch_size, output_size]
+        """
+        batch_size, window_size, num_features = x.shape
+        
+        # Transpose for conv1d: [batch, features, window]
+        x_t = x.transpose(1, 2)
+        
+        # Process through parallel conv paths
+        conv_outputs = []
+        for path in self.conv_paths:
+            conv_outputs.append(path(x_t))
+        
+        # Concatenate conv outputs
+        conv_concat = torch.cat(conv_outputs, dim=1)
+        
+        # Transpose back for LSTM: [batch, window, features]
+        conv_concat = conv_concat.transpose(1, 2)
+        
+        # LSTM processing
+        lstm_out, _ = self.lstm(conv_concat)
+        
+        # Flatten
+        flattened = self.flatten(lstm_out)
+        
+        # Dense processing
+        dense_out = self.dense1(flattened)
+        
+        # Output
+        output = self.output(dense_out)
+        
+        # Apply activation
+        return self.activation(output)
+
+
+class CNNModelPyTorch:
+    """
+    CNN model wrapper class for time series analysis using PyTorch.
+    
+    This class provides methods for building, training, evaluating, and making
+    predictions with the CNN model.
+    """
+    
+    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
+        """
+        Initialize the CNN model.
+        
+        Args:
+            window_size (int): Size of the input window
+            num_features (int): Number of features in the input data
+            output_size (int): Size of the output (1 for regression, 3 for classification)
+            timeframes (list): List of timeframes used (for logging)
+        """
+        self.window_size = window_size
+        self.num_features = num_features
+        self.output_size = output_size
+        self.timeframes = timeframes or []
+        
+        # Determine device (GPU or CPU)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        
+        # Initialize model
+        self.model = None
+        self.build_model()
+        
+        # Initialize training history
+        self.history = {
+            'loss': [],
+            'val_loss': [],
+            'accuracy': [],
+            'val_accuracy': []
+        }
+    
+    def build_model(self):
+        """Build the CNN model architecture"""
+        logger.info(f"Building PyTorch CNN model with window_size={self.window_size}, "
+                   f"num_features={self.num_features}, output_size={self.output_size}")
+        
+        self.model = CNNPyTorch(
+            input_shape=(self.window_size, self.num_features),
+            output_size=self.output_size
+        ).to(self.device)
+        
+        # Initialize optimizer
+        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
+        
+        # Initialize loss function based on output size
+        if self.output_size == 1:
+            self.criterion = nn.BCELoss()  # Binary classification
+        elif self.output_size > 1:
+            self.criterion = nn.CrossEntropyLoss()  # Multi-class classification
+        else:
+            self.criterion = nn.MSELoss()  # Regression
+        
+        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
+    
+    def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
+        """
+        Train the CNN model.
+        
+        Args:
+            X_train: Training input data
+            y_train: Training target data
+            X_val: Validation input data
+            y_val: Validation target data
+            batch_size: Batch size for training
+            epochs: Number of training epochs
+            
+        Returns:
+            Training history
+        """
+        logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, "
+                   f"batch_size={batch_size}, epochs={epochs}")
+        
+        # Convert numpy arrays to PyTorch tensors
+        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
+        
+        # Handle different output sizes for y_train
+        if self.output_size == 1:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
+        else:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
+        
+        # Create DataLoader for training data
+        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
+        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+        
+        # Create DataLoader for validation data if provided
+        if X_val is not None and y_val is not None:
+            X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
+            if self.output_size == 1:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
+            else:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
+                
+            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
+            val_loader = DataLoader(val_dataset, batch_size=batch_size)
+        else:
+            val_loader = None
+        
+        # Training loop
+        for epoch in range(epochs):
+            # Training phase
+            self.model.train()
+            running_loss = 0.0
+            correct = 0
+            total = 0
+            
+            for inputs, targets in train_loader:
+                # Zero the parameter gradients
+                self.optimizer.zero_grad()
+                
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                # Backward pass and optimize
+                loss.backward()
+                self.optimizer.step()
+                
+                # Statistics
+                running_loss += loss.item()
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+            
+            epoch_loss = running_loss / len(train_loader)
+            epoch_acc = correct / total if total > 0 else 0
+            
+            # Validation phase
+            if val_loader is not None:
+                val_loss, val_acc = self._validate(val_loader)
+                
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
+                           f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
+                
+                # Update history
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+                self.history['val_loss'].append(val_loss)
+                self.history['val_accuracy'].append(val_acc)
+            else:
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
+                
+                # Update history without validation
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+        
+        logger.info("Training completed")
+        return self.history
+    
+    def _validate(self, val_loader):
+        """Validate the model using the validation set"""
+        self.model.eval()
+        val_loss = 0.0
+        correct = 0
+        total = 0
+        
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                val_loss += loss.item()
+                
+                # Calculate accuracy
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+        
+        return val_loss / len(val_loader), correct / total if total > 0 else 0
+    
+    def evaluate(self, X_test, y_test):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test: Test input data
+            y_test: Test target data
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        logger.info(f"Evaluating model on {len(X_test)} samples")
+        
+        # Convert to PyTorch tensors
+        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            y_pred = self.model(X_test_tensor)
+            
+            if self.output_size > 1:
+                _, y_pred_class = torch.max(y_pred, 1)
+                y_pred_class = y_pred_class.cpu().numpy()
+            else:
+                y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
+        
+        # Calculate metrics
+        if self.output_size > 1:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class, average='weighted')
+            recall = recall_score(y_test, y_pred_class, average='weighted')
+            f1 = f1_score(y_test, y_pred_class, average='weighted')
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        else:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class)
+            recall = recall_score(y_test, y_pred_class)
+            f1 = f1_score(y_test, y_pred_class)
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        
+        logger.info(f"Evaluation metrics: {metrics}")
+        return metrics
+    
+    def predict(self, X):
+        """
+        Make predictions with the model.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Predictions
+        """
+        # Convert to PyTorch tensor
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            predictions = self.model(X_tensor)
+            
+            if self.output_size > 1:
+                # Multi-class classification
+                probs = predictions.cpu().numpy()
+                _, class_preds = torch.max(predictions, 1)
+                class_preds = class_preds.cpu().numpy()
+                return class_preds, probs
+            else:
+                # Binary classification or regression
+                preds = predictions.cpu().numpy()
+                if self.output_size == 1:
+                    # Binary classification
+                    class_preds = (preds > 0.5).astype(int)
+                    return class_preds.flatten(), preds.flatten()
+                else:
+                    # Regression
+                    return preds.flatten(), None
+    
+    def save(self, filepath):
+        """
+        Save the model to a file.
+        
+        Args:
+            filepath: Path to save the model
+        """
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        
+        # Save the model state
+        model_state = {
+            'model_state_dict': self.model.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'history': self.history,
+            'window_size': self.window_size,
+            'num_features': self.num_features,
+            'output_size': self.output_size,
+            'timeframes': self.timeframes
+        }
+        
+        torch.save(model_state, f"{filepath}.pt")
+        logger.info(f"Model saved to {filepath}.pt")
+    
+    def load(self, filepath):
+        """
+        Load the model from a file.
+        
+        Args:
+            filepath: Path to load the model from
+        """
+        # Check if file exists
+        if not os.path.exists(f"{filepath}.pt"):
+            logger.error(f"Model file {filepath}.pt not found")
+            return False
+        
+        # Load the model state
+        model_state = torch.load(f"{filepath}.pt", map_location=self.device)
+        
+        # Update model parameters
+        self.window_size = model_state['window_size']
+        self.num_features = model_state['num_features']
+        self.output_size = model_state['output_size']
+        self.timeframes = model_state['timeframes']
+        
+        # Rebuild the model
+        self.build_model()
+        
+        # Load the model state
+        self.model.load_state_dict(model_state['model_state_dict'])
+        self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
+        self.history = model_state['history']
+        
+        logger.info(f"Model loaded from {filepath}.pt")
+        return True
+    
+    def plot_training_history(self):
+        """Plot the training history"""
+        if not self.history['loss']:
+            logger.warning("No training history to plot")
+            return
+        
+        plt.figure(figsize=(12, 4))
+        
+        # Plot loss
+        plt.subplot(1, 2, 1)
+        plt.plot(self.history['loss'], label='Training Loss')
+        if 'val_loss' in self.history and self.history['val_loss']:
+            plt.plot(self.history['val_loss'], label='Validation Loss')
+        plt.title('Model Loss')
+        plt.ylabel('Loss')
+        plt.xlabel('Epoch')
+        plt.legend()
+        
+        # Plot accuracy
+        plt.subplot(1, 2, 2)
+        plt.plot(self.history['accuracy'], label='Training Accuracy')
+        if 'val_accuracy' in self.history and self.history['val_accuracy']:
+            plt.plot(self.history['val_accuracy'], label='Validation Accuracy')
+        plt.title('Model Accuracy')
+        plt.ylabel('Accuracy')
+        plt.xlabel('Epoch')
+        plt.legend()
+        
+        # Save the plot
+        os.makedirs('plots', exist_ok=True)
+        plt.savefig(os.path.join('plots', f"cnn_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"))
+        plt.close()
+        
+        logger.info("Training history plots saved to plots directory")
+    
+    def extract_hidden_features(self, X):
+        """
+        Extract hidden features from the model.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Hidden features
+        """
+        # Convert to PyTorch tensor
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        # Forward pass through the model up to the last hidden layer
+        self.model.eval()
+        with torch.no_grad():
+            # Get features before the output layer
+            x_t = X_tensor.transpose(1, 2)
+            
+            # Process through parallel conv paths
+            conv_outputs = []
+            for path in self.model.conv_paths:
+                conv_outputs.append(path(x_t))
+            
+            # Concatenate conv outputs
+            conv_concat = torch.cat(conv_outputs, dim=1)
+            
+            # Transpose back for LSTM
+            conv_concat = conv_concat.transpose(1, 2)
+            
+            # LSTM processing
+            lstm_out, _ = self.model.lstm(conv_concat)
+            
+            # Flatten
+            flattened = self.model.flatten(lstm_out)
+            
+            # Dense processing
+            hidden_features = self.model.dense1(flattened)
+        
+        return hidden_features.cpu().numpy() 
--- a/NN/models/transformer_model.py
+++ b/NN/models/transformer_model.py
--- a/NN/models/transformer_model_pytorch.py
+++ b/NN/models/transformer_model_pytorch.py
@@ -0,0 +1,653 @@
+#!/usr/bin/env python3
+"""
+Transformer Model - PyTorch Implementation
+
+This module implements a Transformer model using PyTorch for time series analysis.
+The model consists of a Transformer encoder and a Mixture of Experts model.
+"""
+
+import os
+import logging
+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+class TransformerBlock(nn.Module):
+    """Transformer Block with self-attention mechanism"""
+    
+    def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
+        super(TransformerBlock, self).__init__()
+        
+        self.attention = nn.MultiheadAttention(
+            embed_dim=input_dim,
+            num_heads=num_heads,
+            dropout=dropout,
+            batch_first=True
+        )
+        
+        self.feed_forward = nn.Sequential(
+            nn.Linear(input_dim, ff_dim),
+            nn.ReLU(),
+            nn.Linear(ff_dim, input_dim)
+        )
+        
+        self.layernorm1 = nn.LayerNorm(input_dim)
+        self.layernorm2 = nn.LayerNorm(input_dim)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+    
+    def forward(self, x):
+        # Self-attention
+        attn_output, _ = self.attention(x, x, x)
+        x = x + self.dropout1(attn_output)
+        x = self.layernorm1(x)
+        
+        # Feed forward
+        ff_output = self.feed_forward(x)
+        x = x + self.dropout2(ff_output)
+        x = self.layernorm2(x)
+        
+        return x
+
+class TransformerModelPyTorch(nn.Module):
+    """PyTorch Transformer model for time series analysis"""
+    
+    def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
+        """
+        Initialize the Transformer model.
+        
+        Args:
+            input_shape (tuple): Shape of input data (window_size, features)
+            output_size (int): Size of output (1 for regression, 3 for classification)
+            num_heads (int): Number of attention heads
+            ff_dim (int): Feed forward dimension
+            num_transformer_blocks (int): Number of transformer blocks
+        """
+        super(TransformerModelPyTorch, self).__init__()
+        
+        window_size, num_features = input_shape
+        
+        # Positional encoding
+        self.pos_encoding = nn.Parameter(
+            torch.zeros(1, window_size, num_features),
+            requires_grad=True
+        )
+        
+        # Transformer blocks
+        self.transformer_blocks = nn.ModuleList([
+            TransformerBlock(
+                input_dim=num_features,
+                num_heads=num_heads,
+                ff_dim=ff_dim
+            ) for _ in range(num_transformer_blocks)
+        ])
+        
+        # Global average pooling
+        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
+        
+        # Dense layers
+        self.dense = nn.Sequential(
+            nn.Linear(num_features, 64),
+            nn.ReLU(),
+            nn.BatchNorm1d(64),
+            nn.Dropout(0.3),
+            nn.Linear(64, output_size)
+        )
+        
+        # Activation based on output size
+        if output_size == 1:
+            self.activation = nn.Sigmoid()  # Binary classification or regression
+        elif output_size > 1:
+            self.activation = nn.Softmax(dim=1)  # Multi-class classification
+        else:
+            self.activation = nn.Identity()  # No activation
+    
+    def forward(self, x):
+        """
+        Forward pass through the network.
+        
+        Args:
+            x: Input tensor of shape [batch_size, window_size, features]
+            
+        Returns:
+            Output tensor of shape [batch_size, output_size]
+        """
+        # Add positional encoding
+        x = x + self.pos_encoding
+        
+        # Apply transformer blocks
+        for transformer_block in self.transformer_blocks:
+            x = transformer_block(x)
+        
+        # Global average pooling
+        x = x.transpose(1, 2)  # [batch, features, window]
+        x = self.global_avg_pool(x)  # [batch, features, 1]
+        x = x.squeeze(-1)  # [batch, features]
+        
+        # Dense layers
+        x = self.dense(x)
+        
+        # Apply activation
+        return self.activation(x)
+
+
+class TransformerModelPyTorchWrapper:
+    """
+    Transformer model wrapper class for time series analysis using PyTorch.
+    
+    This class provides methods for building, training, evaluating, and making
+    predictions with the Transformer model.
+    """
+    
+    def __init__(self, window_size, num_features, output_size=3, timeframes=None):
+        """
+        Initialize the Transformer model.
+        
+        Args:
+            window_size (int): Size of the input window
+            num_features (int): Number of features in the input data
+            output_size (int): Size of the output (1 for regression, 3 for classification)
+            timeframes (list): List of timeframes used (for logging)
+        """
+        self.window_size = window_size
+        self.num_features = num_features
+        self.output_size = output_size
+        self.timeframes = timeframes or []
+        
+        # Determine device (GPU or CPU)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        
+        # Initialize model
+        self.model = None
+        self.build_model()
+        
+        # Initialize training history
+        self.history = {
+            'loss': [],
+            'val_loss': [],
+            'accuracy': [],
+            'val_accuracy': []
+        }
+    
+    def build_model(self):
+        """Build the Transformer model architecture"""
+        logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
+                   f"num_features={self.num_features}, output_size={self.output_size}")
+        
+        self.model = TransformerModelPyTorch(
+            input_shape=(self.window_size, self.num_features),
+            output_size=self.output_size
+        ).to(self.device)
+        
+        # Initialize optimizer
+        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
+        
+        # Initialize loss function based on output size
+        if self.output_size == 1:
+            self.criterion = nn.BCELoss()  # Binary classification
+        elif self.output_size > 1:
+            self.criterion = nn.CrossEntropyLoss()  # Multi-class classification
+        else:
+            self.criterion = nn.MSELoss()  # Regression
+        
+        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
+    
+    def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
+        """
+        Train the Transformer model.
+        
+        Args:
+            X_train: Training input data
+            y_train: Training target data
+            X_val: Validation input data
+            y_val: Validation target data
+            batch_size: Batch size for training
+            epochs: Number of training epochs
+            
+        Returns:
+            Training history
+        """
+        logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
+                   f"batch_size={batch_size}, epochs={epochs}")
+        
+        # Convert numpy arrays to PyTorch tensors
+        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
+        
+        # Handle different output sizes for y_train
+        if self.output_size == 1:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
+        else:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
+        
+        # Create DataLoader for training data
+        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
+        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+        
+        # Create DataLoader for validation data if provided
+        if X_val is not None and y_val is not None:
+            X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
+            if self.output_size == 1:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
+            else:
+                y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
+                
+            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
+            val_loader = DataLoader(val_dataset, batch_size=batch_size)
+        else:
+            val_loader = None
+        
+        # Training loop
+        for epoch in range(epochs):
+            # Training phase
+            self.model.train()
+            running_loss = 0.0
+            correct = 0
+            total = 0
+            
+            for inputs, targets in train_loader:
+                # Zero the parameter gradients
+                self.optimizer.zero_grad()
+                
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                # Backward pass and optimize
+                loss.backward()
+                self.optimizer.step()
+                
+                # Statistics
+                running_loss += loss.item()
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+            
+            epoch_loss = running_loss / len(train_loader)
+            epoch_acc = correct / total if total > 0 else 0
+            
+            # Validation phase
+            if val_loader is not None:
+                val_loss, val_acc = self._validate(val_loader)
+                
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
+                           f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
+                
+                # Update history
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+                self.history['val_loss'].append(val_loss)
+                self.history['val_accuracy'].append(val_acc)
+            else:
+                logger.info(f"Epoch {epoch+1}/{epochs} - "
+                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
+                
+                # Update history without validation
+                self.history['loss'].append(epoch_loss)
+                self.history['accuracy'].append(epoch_acc)
+        
+        logger.info("Training completed")
+        return self.history
+    
+    def _validate(self, val_loader):
+        """Validate the model using the validation set"""
+        self.model.eval()
+        val_loss = 0.0
+        correct = 0
+        total = 0
+        
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                val_loss += loss.item()
+                
+                # Calculate accuracy
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+        
+        return val_loss / len(val_loader), correct / total if total > 0 else 0
+    
+    def evaluate(self, X_test, y_test):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test: Test input data
+            y_test: Test target data
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        logger.info(f"Evaluating model on {len(X_test)} samples")
+        
+        # Convert to PyTorch tensors
+        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            y_pred = self.model(X_test_tensor)
+            
+            if self.output_size > 1:
+                _, y_pred_class = torch.max(y_pred, 1)
+                y_pred_class = y_pred_class.cpu().numpy()
+            else:
+                y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
+        
+        # Calculate metrics
+        if self.output_size > 1:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class, average='weighted')
+            recall = recall_score(y_test, y_pred_class, average='weighted')
+            f1 = f1_score(y_test, y_pred_class, average='weighted')
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        else:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class)
+            recall = recall_score(y_test, y_pred_class)
+            f1 = f1_score(y_test, y_pred_class)
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        
+        logger.info(f"Evaluation metrics: {metrics}")
+        return metrics
+    
+    def predict(self, X):
+        """
+        Make predictions with the model.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Predictions
+        """
+        # Convert to PyTorch tensor
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        # Get predictions
+        self.model.eval()
+        with torch.no_grad():
+            predictions = self.model(X_tensor)
+            
+            if self.output_size > 1:
+                # Multi-class classification
+                probs = predictions.cpu().numpy()
+                _, class_preds = torch.max(predictions, 1)
+                class_preds = class_preds.cpu().numpy()
+                return class_preds, probs
+            else:
+                # Binary classification or regression
+                preds = predictions.cpu().numpy()
+                if self.output_size == 1:
+                    # Binary classification
+                    class_preds = (preds > 0.5).astype(int)
+                    return class_preds.flatten(), preds.flatten()
+                else:
+                    # Regression
+                    return preds.flatten(), None
+    
+    def save(self, filepath):
+        """
+        Save the model to a file.
+        
+        Args:
+            filepath: Path to save the model
+        """
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        
+        # Save the model state
+        model_state = {
+            'model_state_dict': self.model.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'history': self.history,
+            'window_size': self.window_size,
+            'num_features': self.num_features,
+            'output_size': self.output_size,
+            'timeframes': self.timeframes
+        }
+        
+        torch.save(model_state, f"{filepath}.pt")
+        logger.info(f"Model saved to {filepath}.pt")
+    
+    def load(self, filepath):
+        """
+        Load the model from a file.
+        
+        Args:
+            filepath: Path to load the model from
+        """
+        # Check if file exists
+        if not os.path.exists(f"{filepath}.pt"):
+            logger.error(f"Model file {filepath}.pt not found")
+            return False
+        
+        # Load the model state
+        model_state = torch.load(f"{filepath}.pt", map_location=self.device)
+        
+        # Update model parameters
+        self.window_size = model_state['window_size']
+        self.num_features = model_state['num_features']
+        self.output_size = model_state['output_size']
+        self.timeframes = model_state['timeframes']
+        
+        # Rebuild the model
+        self.build_model()
+        
+        # Load the model state
+        self.model.load_state_dict(model_state['model_state_dict'])
+        self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
+        self.history = model_state['history']
+        
+        logger.info(f"Model loaded from {filepath}.pt")
+        return True
+
+class MixtureOfExpertsModelPyTorch:
+    """
+    Mixture of Experts model implementation using PyTorch.
+    
+    This model combines predictions from multiple models (experts) using a 
+    learned weighting scheme.
+    """
+    
+    def __init__(self, output_size=3, timeframes=None):
+        """
+        Initialize the Mixture of Experts model.
+        
+        Args:
+            output_size (int): Size of the output (1 for regression, 3 for classification)
+            timeframes (list): List of timeframes used (for logging)
+        """
+        self.output_size = output_size
+        self.timeframes = timeframes or []
+        self.experts = {}
+        self.expert_weights = {}
+        
+        # Determine device (GPU or CPU)
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        
+        # Initialize model and training history
+        self.model = None
+        self.history = {
+            'loss': [],
+            'val_loss': [],
+            'accuracy': [],
+            'val_accuracy': []
+        }
+    
+    def add_expert(self, name, model):
+        """
+        Add an expert model.
+        
+        Args:
+            name (str): Name of the expert
+            model: Expert model
+        """
+        self.experts[name] = model
+        logger.info(f"Added expert: {name}")
+    
+    def predict(self, X):
+        """
+        Make predictions using all experts and combine them.
+        
+        Args:
+            X: Input data
+            
+        Returns:
+            Combined predictions
+        """
+        if not self.experts:
+            logger.error("No experts added to the MoE model")
+            return None
+        
+        # Get predictions from each expert
+        expert_predictions = {}
+        for name, expert in self.experts.items():
+            pred, _ = expert.predict(X)
+            expert_predictions[name] = pred
+        
+        # Combine predictions based on weights
+        final_pred = None
+        for name, pred in expert_predictions.items():
+            weight = self.expert_weights.get(name, 1.0 / len(self.experts))
+            if final_pred is None:
+                final_pred = weight * pred
+            else:
+                final_pred += weight * pred
+        
+        # For classification, convert to class indices
+        if self.output_size > 1:
+            # Get class with highest probability
+            class_pred = np.argmax(final_pred, axis=1)
+            return class_pred, final_pred
+        else:
+            # Binary classification
+            class_pred = (final_pred > 0.5).astype(int)
+            return class_pred, final_pred
+    
+    def evaluate(self, X_test, y_test):
+        """
+        Evaluate the model on test data.
+        
+        Args:
+            X_test: Test input data
+            y_test: Test target data
+            
+        Returns:
+            dict: Evaluation metrics
+        """
+        logger.info(f"Evaluating MoE model on {len(X_test)} samples")
+        
+        # Get predictions
+        y_pred_class, _ = self.predict(X_test)
+        
+        # Calculate metrics
+        if self.output_size > 1:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class, average='weighted')
+            recall = recall_score(y_test, y_pred_class, average='weighted')
+            f1 = f1_score(y_test, y_pred_class, average='weighted')
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        else:
+            accuracy = accuracy_score(y_test, y_pred_class)
+            precision = precision_score(y_test, y_pred_class)
+            recall = recall_score(y_test, y_pred_class)
+            f1 = f1_score(y_test, y_pred_class)
+            
+            metrics = {
+                'accuracy': accuracy,
+                'precision': precision,
+                'recall': recall,
+                'f1_score': f1
+            }
+        
+        logger.info(f"MoE evaluation metrics: {metrics}")
+        return metrics
+    
+    def save(self, filepath):
+        """
+        Save the model weights to a file.
+        
+        Args:
+            filepath: Path to save the model
+        """
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        
+        # Save the model state
+        model_state = {
+            'expert_weights': self.expert_weights,
+            'output_size': self.output_size,
+            'timeframes': self.timeframes
+        }
+        
+        torch.save(model_state, f"{filepath}_moe.pt")
+        logger.info(f"MoE model saved to {filepath}_moe.pt")
+    
+    def load(self, filepath):
+        """
+        Load the model from a file.
+        
+        Args:
+            filepath: Path to load the model from
+        """
+        # Check if file exists
+        if not os.path.exists(f"{filepath}_moe.pt"):
+            logger.error(f"MoE model file {filepath}_moe.pt not found")
+            return False
+        
+        # Load the model state
+        model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
+        
+        # Update model parameters
+        self.expert_weights = model_state['expert_weights']
+        self.output_size = model_state['output_size']
+        self.timeframes = model_state['timeframes']
+        
+        logger.info(f"MoE model loaded from {filepath}_moe.pt")
+        return True