#!/usr/bin/env python3 """ Transformer Model - PyTorch Implementation This module implements a Transformer model using PyTorch for time series analysis. The model consists of a Transformer encoder and a Mixture of Experts model. """ import os import logging import numpy as np import matplotlib.pyplot as plt from datetime import datetime import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score # Configure logging logger = logging.getLogger(__name__) class TransformerBlock(nn.Module): """Transformer Block with self-attention mechanism""" def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1): super(TransformerBlock, self).__init__() self.attention = nn.MultiheadAttention( embed_dim=input_dim, num_heads=num_heads, dropout=dropout, batch_first=True ) self.feed_forward = nn.Sequential( nn.Linear(input_dim, ff_dim), nn.ReLU(), nn.Linear(ff_dim, input_dim) ) self.layernorm1 = nn.LayerNorm(input_dim) self.layernorm2 = nn.LayerNorm(input_dim) self.dropout1 = nn.Dropout(dropout) self.dropout2 = nn.Dropout(dropout) def forward(self, x): # Self-attention attn_output, _ = self.attention(x, x, x) x = x + self.dropout1(attn_output) x = self.layernorm1(x) # Feed forward ff_output = self.feed_forward(x) x = x + self.dropout2(ff_output) x = self.layernorm2(x) return x class TransformerModelPyTorch(nn.Module): """PyTorch Transformer model for time series analysis""" def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2): """ Initialize the Transformer model. Args: input_shape (tuple): Shape of input data (window_size, features) output_size (int): Size of output (1 for regression, 3 for classification) num_heads (int): Number of attention heads ff_dim (int): Feed forward dimension num_transformer_blocks (int): Number of transformer blocks """ super(TransformerModelPyTorch, self).__init__() window_size, num_features = input_shape # Positional encoding self.pos_encoding = nn.Parameter( torch.zeros(1, window_size, num_features), requires_grad=True ) # Transformer blocks self.transformer_blocks = nn.ModuleList([ TransformerBlock( input_dim=num_features, num_heads=num_heads, ff_dim=ff_dim ) for _ in range(num_transformer_blocks) ]) # Global average pooling self.global_avg_pool = nn.AdaptiveAvgPool1d(1) # Dense layers self.dense = nn.Sequential( nn.Linear(num_features, 64), nn.ReLU(), nn.BatchNorm1d(64), nn.Dropout(0.3), nn.Linear(64, output_size) ) # Activation based on output size if output_size == 1: self.activation = nn.Sigmoid() # Binary classification or regression elif output_size > 1: self.activation = nn.Softmax(dim=1) # Multi-class classification else: self.activation = nn.Identity() # No activation def forward(self, x): """ Forward pass through the network. Args: x: Input tensor of shape [batch_size, window_size, features] Returns: Output tensor of shape [batch_size, output_size] """ # Add positional encoding x = x + self.pos_encoding # Apply transformer blocks for transformer_block in self.transformer_blocks: x = transformer_block(x) # Global average pooling x = x.transpose(1, 2) # [batch, features, window] x = self.global_avg_pool(x) # [batch, features, 1] x = x.squeeze(-1) # [batch, features] # Dense layers x = self.dense(x) # Apply activation return self.activation(x) class TransformerModelPyTorchWrapper: """ Transformer model wrapper class for time series analysis using PyTorch. This class provides methods for building, training, evaluating, and making predictions with the Transformer model. """ def __init__(self, window_size, num_features, output_size=3, timeframes=None): """ Initialize the Transformer model. Args: window_size (int): Size of the input window num_features (int): Number of features in the input data output_size (int): Size of the output (1 for regression, 3 for classification) timeframes (list): List of timeframes used (for logging) """ self.window_size = window_size self.num_features = num_features self.output_size = output_size self.timeframes = timeframes or [] # Determine device (GPU or CPU) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger.info(f"Using device: {self.device}") # Initialize model self.model = None self.build_model() # Initialize training history self.history = { 'loss': [], 'val_loss': [], 'accuracy': [], 'val_accuracy': [] } def build_model(self): """Build the Transformer model architecture""" logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, " f"num_features={self.num_features}, output_size={self.output_size}") self.model = TransformerModelPyTorch( input_shape=(self.window_size, self.num_features), output_size=self.output_size ).to(self.device) # Initialize optimizer self.optimizer = optim.Adam(self.model.parameters(), lr=0.001) # Initialize loss function based on output size if self.output_size == 1: self.criterion = nn.BCELoss() # Binary classification elif self.output_size > 1: self.criterion = nn.CrossEntropyLoss() # Multi-class classification else: self.criterion = nn.MSELoss() # Regression logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters") def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100): """ Train the Transformer model. Args: X_train: Training input data y_train: Training target data X_val: Validation input data y_val: Validation target data batch_size: Batch size for training epochs: Number of training epochs Returns: Training history """ logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, " f"batch_size={batch_size}, epochs={epochs}") # Convert numpy arrays to PyTorch tensors X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device) # Handle different output sizes for y_train if self.output_size == 1: y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device) else: y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device) # Create DataLoader for training data train_dataset = TensorDataset(X_train_tensor, y_train_tensor) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # Create DataLoader for validation data if provided if X_val is not None and y_val is not None: X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device) if self.output_size == 1: y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device) else: y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device) val_dataset = TensorDataset(X_val_tensor, y_val_tensor) val_loader = DataLoader(val_dataset, batch_size=batch_size) else: val_loader = None # Training loop for epoch in range(epochs): # Training phase self.model.train() running_loss = 0.0 correct = 0 total = 0 for inputs, targets in train_loader: # Zero the parameter gradients self.optimizer.zero_grad() # Forward pass outputs = self.model(inputs) # Calculate loss if self.output_size == 1: loss = self.criterion(outputs, targets.unsqueeze(1)) else: loss = self.criterion(outputs, targets) # Backward pass and optimize loss.backward() self.optimizer.step() # Statistics running_loss += loss.item() if self.output_size > 1: _, predicted = torch.max(outputs, 1) total += targets.size(0) correct += (predicted == targets).sum().item() epoch_loss = running_loss / len(train_loader) epoch_acc = correct / total if total > 0 else 0 # Validation phase if val_loader is not None: val_loss, val_acc = self._validate(val_loader) logger.info(f"Epoch {epoch+1}/{epochs} - " f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - " f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}") # Update history self.history['loss'].append(epoch_loss) self.history['accuracy'].append(epoch_acc) self.history['val_loss'].append(val_loss) self.history['val_accuracy'].append(val_acc) else: logger.info(f"Epoch {epoch+1}/{epochs} - " f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}") # Update history without validation self.history['loss'].append(epoch_loss) self.history['accuracy'].append(epoch_acc) logger.info("Training completed") return self.history def _validate(self, val_loader): """Validate the model using the validation set""" self.model.eval() val_loss = 0.0 correct = 0 total = 0 with torch.no_grad(): for inputs, targets in val_loader: # Forward pass outputs = self.model(inputs) # Calculate loss if self.output_size == 1: loss = self.criterion(outputs, targets.unsqueeze(1)) else: loss = self.criterion(outputs, targets) val_loss += loss.item() # Calculate accuracy if self.output_size > 1: _, predicted = torch.max(outputs, 1) total += targets.size(0) correct += (predicted == targets).sum().item() return val_loss / len(val_loader), correct / total if total > 0 else 0 def evaluate(self, X_test, y_test): """ Evaluate the model on test data. Args: X_test: Test input data y_test: Test target data Returns: dict: Evaluation metrics """ logger.info(f"Evaluating model on {len(X_test)} samples") # Convert to PyTorch tensors X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device) # Get predictions self.model.eval() with torch.no_grad(): y_pred = self.model(X_test_tensor) if self.output_size > 1: _, y_pred_class = torch.max(y_pred, 1) y_pred_class = y_pred_class.cpu().numpy() else: y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten() # Calculate metrics if self.output_size > 1: accuracy = accuracy_score(y_test, y_pred_class) precision = precision_score(y_test, y_pred_class, average='weighted') recall = recall_score(y_test, y_pred_class, average='weighted') f1 = f1_score(y_test, y_pred_class, average='weighted') metrics = { 'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1_score': f1 } else: accuracy = accuracy_score(y_test, y_pred_class) precision = precision_score(y_test, y_pred_class) recall = recall_score(y_test, y_pred_class) f1 = f1_score(y_test, y_pred_class) metrics = { 'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1_score': f1 } logger.info(f"Evaluation metrics: {metrics}") return metrics def predict(self, X): """ Make predictions with the model. Args: X: Input data Returns: Predictions """ # Convert to PyTorch tensor X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device) # Get predictions self.model.eval() with torch.no_grad(): predictions = self.model(X_tensor) if self.output_size > 1: # Multi-class classification probs = predictions.cpu().numpy() _, class_preds = torch.max(predictions, 1) class_preds = class_preds.cpu().numpy() return class_preds, probs else: # Binary classification or regression preds = predictions.cpu().numpy() if self.output_size == 1: # Binary classification class_preds = (preds > 0.5).astype(int) return class_preds.flatten(), preds.flatten() else: # Regression return preds.flatten(), None def save(self, filepath): """ Save the model to a file. Args: filepath: Path to save the model """ # Create directory if it doesn't exist os.makedirs(os.path.dirname(filepath), exist_ok=True) # Save the model state model_state = { 'model_state_dict': self.model.state_dict(), 'optimizer_state_dict': self.optimizer.state_dict(), 'history': self.history, 'window_size': self.window_size, 'num_features': self.num_features, 'output_size': self.output_size, 'timeframes': self.timeframes } torch.save(model_state, f"{filepath}.pt") logger.info(f"Model saved to {filepath}.pt") def load(self, filepath): """ Load the model from a file. Args: filepath: Path to load the model from """ # Check if file exists if not os.path.exists(f"{filepath}.pt"): logger.error(f"Model file {filepath}.pt not found") return False # Load the model state model_state = torch.load(f"{filepath}.pt", map_location=self.device) # Update model parameters self.window_size = model_state['window_size'] self.num_features = model_state['num_features'] self.output_size = model_state['output_size'] self.timeframes = model_state['timeframes'] # Rebuild the model self.build_model() # Load the model state self.model.load_state_dict(model_state['model_state_dict']) self.optimizer.load_state_dict(model_state['optimizer_state_dict']) self.history = model_state['history'] logger.info(f"Model loaded from {filepath}.pt") return True class MixtureOfExpertsModelPyTorch: """ Mixture of Experts model implementation using PyTorch. This model combines predictions from multiple models (experts) using a learned weighting scheme. """ def __init__(self, output_size=3, timeframes=None): """ Initialize the Mixture of Experts model. Args: output_size (int): Size of the output (1 for regression, 3 for classification) timeframes (list): List of timeframes used (for logging) """ self.output_size = output_size self.timeframes = timeframes or [] self.experts = {} self.expert_weights = {} # Determine device (GPU or CPU) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logger.info(f"Using device: {self.device}") # Initialize model and training history self.model = None self.history = { 'loss': [], 'val_loss': [], 'accuracy': [], 'val_accuracy': [] } def add_expert(self, name, model): """ Add an expert model. Args: name (str): Name of the expert model: Expert model """ self.experts[name] = model logger.info(f"Added expert: {name}") def predict(self, X): """ Make predictions using all experts and combine them. Args: X: Input data Returns: Combined predictions """ if not self.experts: logger.error("No experts added to the MoE model") return None # Get predictions from each expert expert_predictions = {} for name, expert in self.experts.items(): pred, _ = expert.predict(X) expert_predictions[name] = pred # Combine predictions based on weights final_pred = None for name, pred in expert_predictions.items(): weight = self.expert_weights.get(name, 1.0 / len(self.experts)) if final_pred is None: final_pred = weight * pred else: final_pred += weight * pred # For classification, convert to class indices if self.output_size > 1: # Get class with highest probability class_pred = np.argmax(final_pred, axis=1) return class_pred, final_pred else: # Binary classification class_pred = (final_pred > 0.5).astype(int) return class_pred, final_pred def evaluate(self, X_test, y_test): """ Evaluate the model on test data. Args: X_test: Test input data y_test: Test target data Returns: dict: Evaluation metrics """ logger.info(f"Evaluating MoE model on {len(X_test)} samples") # Get predictions y_pred_class, _ = self.predict(X_test) # Calculate metrics if self.output_size > 1: accuracy = accuracy_score(y_test, y_pred_class) precision = precision_score(y_test, y_pred_class, average='weighted') recall = recall_score(y_test, y_pred_class, average='weighted') f1 = f1_score(y_test, y_pred_class, average='weighted') metrics = { 'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1_score': f1 } else: accuracy = accuracy_score(y_test, y_pred_class) precision = precision_score(y_test, y_pred_class) recall = recall_score(y_test, y_pred_class) f1 = f1_score(y_test, y_pred_class) metrics = { 'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1_score': f1 } logger.info(f"MoE evaluation metrics: {metrics}") return metrics def save(self, filepath): """ Save the model weights to a file. Args: filepath: Path to save the model """ # Create directory if it doesn't exist os.makedirs(os.path.dirname(filepath), exist_ok=True) # Save the model state model_state = { 'expert_weights': self.expert_weights, 'output_size': self.output_size, 'timeframes': self.timeframes } torch.save(model_state, f"{filepath}_moe.pt") logger.info(f"MoE model saved to {filepath}_moe.pt") def load(self, filepath): """ Load the model from a file. Args: filepath: Path to load the model from """ # Check if file exists if not os.path.exists(f"{filepath}_moe.pt"): logger.error(f"MoE model file {filepath}_moe.pt not found") return False # Load the model state model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device) # Update model parameters self.expert_weights = model_state['expert_weights'] self.output_size = model_state['output_size'] self.timeframes = model_state['timeframes'] logger.info(f"MoE model loaded from {filepath}_moe.pt") return True