gogo2/NN/models/transformer_model_pytorch.py
Dobromir Popov 0042581275 new nn wip
2025-03-25 13:38:25 +02:00

653 lines
22 KiB
Python

#!/usr/bin/env python3
"""
Transformer Model - PyTorch Implementation
This module implements a Transformer model using PyTorch for time series analysis.
The model consists of a Transformer encoder and a Mixture of Experts model.
"""
import os
import logging
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Configure logging
logger = logging.getLogger(__name__)
class TransformerBlock(nn.Module):
"""Transformer Block with self-attention mechanism"""
def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
super(TransformerBlock, self).__init__()
self.attention = nn.MultiheadAttention(
embed_dim=input_dim,
num_heads=num_heads,
dropout=dropout,
batch_first=True
)
self.feed_forward = nn.Sequential(
nn.Linear(input_dim, ff_dim),
nn.ReLU(),
nn.Linear(ff_dim, input_dim)
)
self.layernorm1 = nn.LayerNorm(input_dim)
self.layernorm2 = nn.LayerNorm(input_dim)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)
def forward(self, x):
# Self-attention
attn_output, _ = self.attention(x, x, x)
x = x + self.dropout1(attn_output)
x = self.layernorm1(x)
# Feed forward
ff_output = self.feed_forward(x)
x = x + self.dropout2(ff_output)
x = self.layernorm2(x)
return x
class TransformerModelPyTorch(nn.Module):
"""PyTorch Transformer model for time series analysis"""
def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
"""
Initialize the Transformer model.
Args:
input_shape (tuple): Shape of input data (window_size, features)
output_size (int): Size of output (1 for regression, 3 for classification)
num_heads (int): Number of attention heads
ff_dim (int): Feed forward dimension
num_transformer_blocks (int): Number of transformer blocks
"""
super(TransformerModelPyTorch, self).__init__()
window_size, num_features = input_shape
# Positional encoding
self.pos_encoding = nn.Parameter(
torch.zeros(1, window_size, num_features),
requires_grad=True
)
# Transformer blocks
self.transformer_blocks = nn.ModuleList([
TransformerBlock(
input_dim=num_features,
num_heads=num_heads,
ff_dim=ff_dim
) for _ in range(num_transformer_blocks)
])
# Global average pooling
self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
# Dense layers
self.dense = nn.Sequential(
nn.Linear(num_features, 64),
nn.ReLU(),
nn.BatchNorm1d(64),
nn.Dropout(0.3),
nn.Linear(64, output_size)
)
# Activation based on output size
if output_size == 1:
self.activation = nn.Sigmoid() # Binary classification or regression
elif output_size > 1:
self.activation = nn.Softmax(dim=1) # Multi-class classification
else:
self.activation = nn.Identity() # No activation
def forward(self, x):
"""
Forward pass through the network.
Args:
x: Input tensor of shape [batch_size, window_size, features]
Returns:
Output tensor of shape [batch_size, output_size]
"""
# Add positional encoding
x = x + self.pos_encoding
# Apply transformer blocks
for transformer_block in self.transformer_blocks:
x = transformer_block(x)
# Global average pooling
x = x.transpose(1, 2) # [batch, features, window]
x = self.global_avg_pool(x) # [batch, features, 1]
x = x.squeeze(-1) # [batch, features]
# Dense layers
x = self.dense(x)
# Apply activation
return self.activation(x)
class TransformerModelPyTorchWrapper:
"""
Transformer model wrapper class for time series analysis using PyTorch.
This class provides methods for building, training, evaluating, and making
predictions with the Transformer model.
"""
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
"""
Initialize the Transformer model.
Args:
window_size (int): Size of the input window
num_features (int): Number of features in the input data
output_size (int): Size of the output (1 for regression, 3 for classification)
timeframes (list): List of timeframes used (for logging)
"""
self.window_size = window_size
self.num_features = num_features
self.output_size = output_size
self.timeframes = timeframes or []
# Determine device (GPU or CPU)
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {self.device}")
# Initialize model
self.model = None
self.build_model()
# Initialize training history
self.history = {
'loss': [],
'val_loss': [],
'accuracy': [],
'val_accuracy': []
}
def build_model(self):
"""Build the Transformer model architecture"""
logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
f"num_features={self.num_features}, output_size={self.output_size}")
self.model = TransformerModelPyTorch(
input_shape=(self.window_size, self.num_features),
output_size=self.output_size
).to(self.device)
# Initialize optimizer
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
# Initialize loss function based on output size
if self.output_size == 1:
self.criterion = nn.BCELoss() # Binary classification
elif self.output_size > 1:
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
else:
self.criterion = nn.MSELoss() # Regression
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
"""
Train the Transformer model.
Args:
X_train: Training input data
y_train: Training target data
X_val: Validation input data
y_val: Validation target data
batch_size: Batch size for training
epochs: Number of training epochs
Returns:
Training history
"""
logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
f"batch_size={batch_size}, epochs={epochs}")
# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
# Handle different output sizes for y_train
if self.output_size == 1:
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
else:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
# Create DataLoader for training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Create DataLoader for validation data if provided
if X_val is not None and y_val is not None:
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
if self.output_size == 1:
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
else:
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
else:
val_loader = None
# Training loop
for epoch in range(epochs):
# Training phase
self.model.train()
running_loss = 0.0
correct = 0
total = 0
for inputs, targets in train_loader:
# Zero the parameter gradients
self.optimizer.zero_grad()
# Forward pass
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
# Backward pass and optimize
loss.backward()
self.optimizer.step()
# Statistics
running_loss += loss.item()
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
epoch_loss = running_loss / len(train_loader)
epoch_acc = correct / total if total > 0 else 0
# Validation phase
if val_loader is not None:
val_loss, val_acc = self._validate(val_loader)
logger.info(f"Epoch {epoch+1}/{epochs} - "
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
# Update history
self.history['loss'].append(epoch_loss)
self.history['accuracy'].append(epoch_acc)
self.history['val_loss'].append(val_loss)
self.history['val_accuracy'].append(val_acc)
else:
logger.info(f"Epoch {epoch+1}/{epochs} - "
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
# Update history without validation
self.history['loss'].append(epoch_loss)
self.history['accuracy'].append(epoch_acc)
logger.info("Training completed")
return self.history
def _validate(self, val_loader):
"""Validate the model using the validation set"""
self.model.eval()
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, targets in val_loader:
# Forward pass
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
val_loss += loss.item()
# Calculate accuracy
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
return val_loss / len(val_loader), correct / total if total > 0 else 0
def evaluate(self, X_test, y_test):
"""
Evaluate the model on test data.
Args:
X_test: Test input data
y_test: Test target data
Returns:
dict: Evaluation metrics
"""
logger.info(f"Evaluating model on {len(X_test)} samples")
# Convert to PyTorch tensors
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
# Get predictions
self.model.eval()
with torch.no_grad():
y_pred = self.model(X_test_tensor)
if self.output_size > 1:
_, y_pred_class = torch.max(y_pred, 1)
y_pred_class = y_pred_class.cpu().numpy()
else:
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
# Calculate metrics
if self.output_size > 1:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class, average='weighted')
recall = recall_score(y_test, y_pred_class, average='weighted')
f1 = f1_score(y_test, y_pred_class, average='weighted')
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
else:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
logger.info(f"Evaluation metrics: {metrics}")
return metrics
def predict(self, X):
"""
Make predictions with the model.
Args:
X: Input data
Returns:
Predictions
"""
# Convert to PyTorch tensor
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
# Get predictions
self.model.eval()
with torch.no_grad():
predictions = self.model(X_tensor)
if self.output_size > 1:
# Multi-class classification
probs = predictions.cpu().numpy()
_, class_preds = torch.max(predictions, 1)
class_preds = class_preds.cpu().numpy()
return class_preds, probs
else:
# Binary classification or regression
preds = predictions.cpu().numpy()
if self.output_size == 1:
# Binary classification
class_preds = (preds > 0.5).astype(int)
return class_preds.flatten(), preds.flatten()
else:
# Regression
return preds.flatten(), None
def save(self, filepath):
"""
Save the model to a file.
Args:
filepath: Path to save the model
"""
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filepath), exist_ok=True)
# Save the model state
model_state = {
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'history': self.history,
'window_size': self.window_size,
'num_features': self.num_features,
'output_size': self.output_size,
'timeframes': self.timeframes
}
torch.save(model_state, f"{filepath}.pt")
logger.info(f"Model saved to {filepath}.pt")
def load(self, filepath):
"""
Load the model from a file.
Args:
filepath: Path to load the model from
"""
# Check if file exists
if not os.path.exists(f"{filepath}.pt"):
logger.error(f"Model file {filepath}.pt not found")
return False
# Load the model state
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
# Update model parameters
self.window_size = model_state['window_size']
self.num_features = model_state['num_features']
self.output_size = model_state['output_size']
self.timeframes = model_state['timeframes']
# Rebuild the model
self.build_model()
# Load the model state
self.model.load_state_dict(model_state['model_state_dict'])
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
self.history = model_state['history']
logger.info(f"Model loaded from {filepath}.pt")
return True
class MixtureOfExpertsModelPyTorch:
"""
Mixture of Experts model implementation using PyTorch.
This model combines predictions from multiple models (experts) using a
learned weighting scheme.
"""
def __init__(self, output_size=3, timeframes=None):
"""
Initialize the Mixture of Experts model.
Args:
output_size (int): Size of the output (1 for regression, 3 for classification)
timeframes (list): List of timeframes used (for logging)
"""
self.output_size = output_size
self.timeframes = timeframes or []
self.experts = {}
self.expert_weights = {}
# Determine device (GPU or CPU)
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {self.device}")
# Initialize model and training history
self.model = None
self.history = {
'loss': [],
'val_loss': [],
'accuracy': [],
'val_accuracy': []
}
def add_expert(self, name, model):
"""
Add an expert model.
Args:
name (str): Name of the expert
model: Expert model
"""
self.experts[name] = model
logger.info(f"Added expert: {name}")
def predict(self, X):
"""
Make predictions using all experts and combine them.
Args:
X: Input data
Returns:
Combined predictions
"""
if not self.experts:
logger.error("No experts added to the MoE model")
return None
# Get predictions from each expert
expert_predictions = {}
for name, expert in self.experts.items():
pred, _ = expert.predict(X)
expert_predictions[name] = pred
# Combine predictions based on weights
final_pred = None
for name, pred in expert_predictions.items():
weight = self.expert_weights.get(name, 1.0 / len(self.experts))
if final_pred is None:
final_pred = weight * pred
else:
final_pred += weight * pred
# For classification, convert to class indices
if self.output_size > 1:
# Get class with highest probability
class_pred = np.argmax(final_pred, axis=1)
return class_pred, final_pred
else:
# Binary classification
class_pred = (final_pred > 0.5).astype(int)
return class_pred, final_pred
def evaluate(self, X_test, y_test):
"""
Evaluate the model on test data.
Args:
X_test: Test input data
y_test: Test target data
Returns:
dict: Evaluation metrics
"""
logger.info(f"Evaluating MoE model on {len(X_test)} samples")
# Get predictions
y_pred_class, _ = self.predict(X_test)
# Calculate metrics
if self.output_size > 1:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class, average='weighted')
recall = recall_score(y_test, y_pred_class, average='weighted')
f1 = f1_score(y_test, y_pred_class, average='weighted')
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
else:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
logger.info(f"MoE evaluation metrics: {metrics}")
return metrics
def save(self, filepath):
"""
Save the model weights to a file.
Args:
filepath: Path to save the model
"""
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filepath), exist_ok=True)
# Save the model state
model_state = {
'expert_weights': self.expert_weights,
'output_size': self.output_size,
'timeframes': self.timeframes
}
torch.save(model_state, f"{filepath}_moe.pt")
logger.info(f"MoE model saved to {filepath}_moe.pt")
def load(self, filepath):
"""
Load the model from a file.
Args:
filepath: Path to load the model from
"""
# Check if file exists
if not os.path.exists(f"{filepath}_moe.pt"):
logger.error(f"MoE model file {filepath}_moe.pt not found")
return False
# Load the model state
model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
# Update model parameters
self.expert_weights = model_state['expert_weights']
self.output_size = model_state['output_size']
self.timeframes = model_state['timeframes']
logger.info(f"MoE model loaded from {filepath}_moe.pt")
return True