653 lines
22 KiB
Python
653 lines
22 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Transformer Model - PyTorch Implementation
|
|
|
|
This module implements a Transformer model using PyTorch for time series analysis.
|
|
The model consists of a Transformer encoder and a Mixture of Experts model.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from datetime import datetime
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
from torch.utils.data import DataLoader, TensorDataset
|
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class TransformerBlock(nn.Module):
|
|
"""Transformer Block with self-attention mechanism"""
|
|
|
|
def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
|
|
super(TransformerBlock, self).__init__()
|
|
|
|
self.attention = nn.MultiheadAttention(
|
|
embed_dim=input_dim,
|
|
num_heads=num_heads,
|
|
dropout=dropout,
|
|
batch_first=True
|
|
)
|
|
|
|
self.feed_forward = nn.Sequential(
|
|
nn.Linear(input_dim, ff_dim),
|
|
nn.ReLU(),
|
|
nn.Linear(ff_dim, input_dim)
|
|
)
|
|
|
|
self.layernorm1 = nn.LayerNorm(input_dim)
|
|
self.layernorm2 = nn.LayerNorm(input_dim)
|
|
self.dropout1 = nn.Dropout(dropout)
|
|
self.dropout2 = nn.Dropout(dropout)
|
|
|
|
def forward(self, x):
|
|
# Self-attention
|
|
attn_output, _ = self.attention(x, x, x)
|
|
x = x + self.dropout1(attn_output)
|
|
x = self.layernorm1(x)
|
|
|
|
# Feed forward
|
|
ff_output = self.feed_forward(x)
|
|
x = x + self.dropout2(ff_output)
|
|
x = self.layernorm2(x)
|
|
|
|
return x
|
|
|
|
class TransformerModelPyTorch(nn.Module):
|
|
"""PyTorch Transformer model for time series analysis"""
|
|
|
|
def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
|
|
"""
|
|
Initialize the Transformer model.
|
|
|
|
Args:
|
|
input_shape (tuple): Shape of input data (window_size, features)
|
|
output_size (int): Size of output (1 for regression, 3 for classification)
|
|
num_heads (int): Number of attention heads
|
|
ff_dim (int): Feed forward dimension
|
|
num_transformer_blocks (int): Number of transformer blocks
|
|
"""
|
|
super(TransformerModelPyTorch, self).__init__()
|
|
|
|
window_size, num_features = input_shape
|
|
|
|
# Positional encoding
|
|
self.pos_encoding = nn.Parameter(
|
|
torch.zeros(1, window_size, num_features),
|
|
requires_grad=True
|
|
)
|
|
|
|
# Transformer blocks
|
|
self.transformer_blocks = nn.ModuleList([
|
|
TransformerBlock(
|
|
input_dim=num_features,
|
|
num_heads=num_heads,
|
|
ff_dim=ff_dim
|
|
) for _ in range(num_transformer_blocks)
|
|
])
|
|
|
|
# Global average pooling
|
|
self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
|
|
|
|
# Dense layers
|
|
self.dense = nn.Sequential(
|
|
nn.Linear(num_features, 64),
|
|
nn.ReLU(),
|
|
nn.BatchNorm1d(64),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(64, output_size)
|
|
)
|
|
|
|
# Activation based on output size
|
|
if output_size == 1:
|
|
self.activation = nn.Sigmoid() # Binary classification or regression
|
|
elif output_size > 1:
|
|
self.activation = nn.Softmax(dim=1) # Multi-class classification
|
|
else:
|
|
self.activation = nn.Identity() # No activation
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Forward pass through the network.
|
|
|
|
Args:
|
|
x: Input tensor of shape [batch_size, window_size, features]
|
|
|
|
Returns:
|
|
Output tensor of shape [batch_size, output_size]
|
|
"""
|
|
# Add positional encoding
|
|
x = x + self.pos_encoding
|
|
|
|
# Apply transformer blocks
|
|
for transformer_block in self.transformer_blocks:
|
|
x = transformer_block(x)
|
|
|
|
# Global average pooling
|
|
x = x.transpose(1, 2) # [batch, features, window]
|
|
x = self.global_avg_pool(x) # [batch, features, 1]
|
|
x = x.squeeze(-1) # [batch, features]
|
|
|
|
# Dense layers
|
|
x = self.dense(x)
|
|
|
|
# Apply activation
|
|
return self.activation(x)
|
|
|
|
|
|
class TransformerModelPyTorchWrapper:
|
|
"""
|
|
Transformer model wrapper class for time series analysis using PyTorch.
|
|
|
|
This class provides methods for building, training, evaluating, and making
|
|
predictions with the Transformer model.
|
|
"""
|
|
|
|
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
|
|
"""
|
|
Initialize the Transformer model.
|
|
|
|
Args:
|
|
window_size (int): Size of the input window
|
|
num_features (int): Number of features in the input data
|
|
output_size (int): Size of the output (1 for regression, 3 for classification)
|
|
timeframes (list): List of timeframes used (for logging)
|
|
"""
|
|
self.window_size = window_size
|
|
self.num_features = num_features
|
|
self.output_size = output_size
|
|
self.timeframes = timeframes or []
|
|
|
|
# Determine device (GPU or CPU)
|
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
logger.info(f"Using device: {self.device}")
|
|
|
|
# Initialize model
|
|
self.model = None
|
|
self.build_model()
|
|
|
|
# Initialize training history
|
|
self.history = {
|
|
'loss': [],
|
|
'val_loss': [],
|
|
'accuracy': [],
|
|
'val_accuracy': []
|
|
}
|
|
|
|
def build_model(self):
|
|
"""Build the Transformer model architecture"""
|
|
logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
|
|
f"num_features={self.num_features}, output_size={self.output_size}")
|
|
|
|
self.model = TransformerModelPyTorch(
|
|
input_shape=(self.window_size, self.num_features),
|
|
output_size=self.output_size
|
|
).to(self.device)
|
|
|
|
# Initialize optimizer
|
|
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
|
|
|
|
# Initialize loss function based on output size
|
|
if self.output_size == 1:
|
|
self.criterion = nn.BCELoss() # Binary classification
|
|
elif self.output_size > 1:
|
|
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
|
|
else:
|
|
self.criterion = nn.MSELoss() # Regression
|
|
|
|
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
|
|
|
|
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
|
|
"""
|
|
Train the Transformer model.
|
|
|
|
Args:
|
|
X_train: Training input data
|
|
y_train: Training target data
|
|
X_val: Validation input data
|
|
y_val: Validation target data
|
|
batch_size: Batch size for training
|
|
epochs: Number of training epochs
|
|
|
|
Returns:
|
|
Training history
|
|
"""
|
|
logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
|
|
f"batch_size={batch_size}, epochs={epochs}")
|
|
|
|
# Convert numpy arrays to PyTorch tensors
|
|
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
|
|
|
|
# Handle different output sizes for y_train
|
|
if self.output_size == 1:
|
|
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
|
|
else:
|
|
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
|
|
|
|
# Create DataLoader for training data
|
|
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
|
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
|
|
|
# Create DataLoader for validation data if provided
|
|
if X_val is not None and y_val is not None:
|
|
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
|
|
if self.output_size == 1:
|
|
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
|
|
else:
|
|
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
|
|
|
|
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
|
|
val_loader = DataLoader(val_dataset, batch_size=batch_size)
|
|
else:
|
|
val_loader = None
|
|
|
|
# Training loop
|
|
for epoch in range(epochs):
|
|
# Training phase
|
|
self.model.train()
|
|
running_loss = 0.0
|
|
correct = 0
|
|
total = 0
|
|
|
|
for inputs, targets in train_loader:
|
|
# Zero the parameter gradients
|
|
self.optimizer.zero_grad()
|
|
|
|
# Forward pass
|
|
outputs = self.model(inputs)
|
|
|
|
# Calculate loss
|
|
if self.output_size == 1:
|
|
loss = self.criterion(outputs, targets.unsqueeze(1))
|
|
else:
|
|
loss = self.criterion(outputs, targets)
|
|
|
|
# Backward pass and optimize
|
|
loss.backward()
|
|
self.optimizer.step()
|
|
|
|
# Statistics
|
|
running_loss += loss.item()
|
|
if self.output_size > 1:
|
|
_, predicted = torch.max(outputs, 1)
|
|
total += targets.size(0)
|
|
correct += (predicted == targets).sum().item()
|
|
|
|
epoch_loss = running_loss / len(train_loader)
|
|
epoch_acc = correct / total if total > 0 else 0
|
|
|
|
# Validation phase
|
|
if val_loader is not None:
|
|
val_loss, val_acc = self._validate(val_loader)
|
|
|
|
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
|
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
|
|
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
|
|
|
|
# Update history
|
|
self.history['loss'].append(epoch_loss)
|
|
self.history['accuracy'].append(epoch_acc)
|
|
self.history['val_loss'].append(val_loss)
|
|
self.history['val_accuracy'].append(val_acc)
|
|
else:
|
|
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
|
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
|
|
|
|
# Update history without validation
|
|
self.history['loss'].append(epoch_loss)
|
|
self.history['accuracy'].append(epoch_acc)
|
|
|
|
logger.info("Training completed")
|
|
return self.history
|
|
|
|
def _validate(self, val_loader):
|
|
"""Validate the model using the validation set"""
|
|
self.model.eval()
|
|
val_loss = 0.0
|
|
correct = 0
|
|
total = 0
|
|
|
|
with torch.no_grad():
|
|
for inputs, targets in val_loader:
|
|
# Forward pass
|
|
outputs = self.model(inputs)
|
|
|
|
# Calculate loss
|
|
if self.output_size == 1:
|
|
loss = self.criterion(outputs, targets.unsqueeze(1))
|
|
else:
|
|
loss = self.criterion(outputs, targets)
|
|
|
|
val_loss += loss.item()
|
|
|
|
# Calculate accuracy
|
|
if self.output_size > 1:
|
|
_, predicted = torch.max(outputs, 1)
|
|
total += targets.size(0)
|
|
correct += (predicted == targets).sum().item()
|
|
|
|
return val_loss / len(val_loader), correct / total if total > 0 else 0
|
|
|
|
def evaluate(self, X_test, y_test):
|
|
"""
|
|
Evaluate the model on test data.
|
|
|
|
Args:
|
|
X_test: Test input data
|
|
y_test: Test target data
|
|
|
|
Returns:
|
|
dict: Evaluation metrics
|
|
"""
|
|
logger.info(f"Evaluating model on {len(X_test)} samples")
|
|
|
|
# Convert to PyTorch tensors
|
|
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
|
|
|
|
# Get predictions
|
|
self.model.eval()
|
|
with torch.no_grad():
|
|
y_pred = self.model(X_test_tensor)
|
|
|
|
if self.output_size > 1:
|
|
_, y_pred_class = torch.max(y_pred, 1)
|
|
y_pred_class = y_pred_class.cpu().numpy()
|
|
else:
|
|
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
|
|
|
|
# Calculate metrics
|
|
if self.output_size > 1:
|
|
accuracy = accuracy_score(y_test, y_pred_class)
|
|
precision = precision_score(y_test, y_pred_class, average='weighted')
|
|
recall = recall_score(y_test, y_pred_class, average='weighted')
|
|
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
|
|
|
metrics = {
|
|
'accuracy': accuracy,
|
|
'precision': precision,
|
|
'recall': recall,
|
|
'f1_score': f1
|
|
}
|
|
else:
|
|
accuracy = accuracy_score(y_test, y_pred_class)
|
|
precision = precision_score(y_test, y_pred_class)
|
|
recall = recall_score(y_test, y_pred_class)
|
|
f1 = f1_score(y_test, y_pred_class)
|
|
|
|
metrics = {
|
|
'accuracy': accuracy,
|
|
'precision': precision,
|
|
'recall': recall,
|
|
'f1_score': f1
|
|
}
|
|
|
|
logger.info(f"Evaluation metrics: {metrics}")
|
|
return metrics
|
|
|
|
def predict(self, X):
|
|
"""
|
|
Make predictions with the model.
|
|
|
|
Args:
|
|
X: Input data
|
|
|
|
Returns:
|
|
Predictions
|
|
"""
|
|
# Convert to PyTorch tensor
|
|
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
|
|
|
# Get predictions
|
|
self.model.eval()
|
|
with torch.no_grad():
|
|
predictions = self.model(X_tensor)
|
|
|
|
if self.output_size > 1:
|
|
# Multi-class classification
|
|
probs = predictions.cpu().numpy()
|
|
_, class_preds = torch.max(predictions, 1)
|
|
class_preds = class_preds.cpu().numpy()
|
|
return class_preds, probs
|
|
else:
|
|
# Binary classification or regression
|
|
preds = predictions.cpu().numpy()
|
|
if self.output_size == 1:
|
|
# Binary classification
|
|
class_preds = (preds > 0.5).astype(int)
|
|
return class_preds.flatten(), preds.flatten()
|
|
else:
|
|
# Regression
|
|
return preds.flatten(), None
|
|
|
|
def save(self, filepath):
|
|
"""
|
|
Save the model to a file.
|
|
|
|
Args:
|
|
filepath: Path to save the model
|
|
"""
|
|
# Create directory if it doesn't exist
|
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
|
|
# Save the model state
|
|
model_state = {
|
|
'model_state_dict': self.model.state_dict(),
|
|
'optimizer_state_dict': self.optimizer.state_dict(),
|
|
'history': self.history,
|
|
'window_size': self.window_size,
|
|
'num_features': self.num_features,
|
|
'output_size': self.output_size,
|
|
'timeframes': self.timeframes
|
|
}
|
|
|
|
torch.save(model_state, f"{filepath}.pt")
|
|
logger.info(f"Model saved to {filepath}.pt")
|
|
|
|
def load(self, filepath):
|
|
"""
|
|
Load the model from a file.
|
|
|
|
Args:
|
|
filepath: Path to load the model from
|
|
"""
|
|
# Check if file exists
|
|
if not os.path.exists(f"{filepath}.pt"):
|
|
logger.error(f"Model file {filepath}.pt not found")
|
|
return False
|
|
|
|
# Load the model state
|
|
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
|
|
|
|
# Update model parameters
|
|
self.window_size = model_state['window_size']
|
|
self.num_features = model_state['num_features']
|
|
self.output_size = model_state['output_size']
|
|
self.timeframes = model_state['timeframes']
|
|
|
|
# Rebuild the model
|
|
self.build_model()
|
|
|
|
# Load the model state
|
|
self.model.load_state_dict(model_state['model_state_dict'])
|
|
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
|
|
self.history = model_state['history']
|
|
|
|
logger.info(f"Model loaded from {filepath}.pt")
|
|
return True
|
|
|
|
class MixtureOfExpertsModelPyTorch:
|
|
"""
|
|
Mixture of Experts model implementation using PyTorch.
|
|
|
|
This model combines predictions from multiple models (experts) using a
|
|
learned weighting scheme.
|
|
"""
|
|
|
|
def __init__(self, output_size=3, timeframes=None):
|
|
"""
|
|
Initialize the Mixture of Experts model.
|
|
|
|
Args:
|
|
output_size (int): Size of the output (1 for regression, 3 for classification)
|
|
timeframes (list): List of timeframes used (for logging)
|
|
"""
|
|
self.output_size = output_size
|
|
self.timeframes = timeframes or []
|
|
self.experts = {}
|
|
self.expert_weights = {}
|
|
|
|
# Determine device (GPU or CPU)
|
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
logger.info(f"Using device: {self.device}")
|
|
|
|
# Initialize model and training history
|
|
self.model = None
|
|
self.history = {
|
|
'loss': [],
|
|
'val_loss': [],
|
|
'accuracy': [],
|
|
'val_accuracy': []
|
|
}
|
|
|
|
def add_expert(self, name, model):
|
|
"""
|
|
Add an expert model.
|
|
|
|
Args:
|
|
name (str): Name of the expert
|
|
model: Expert model
|
|
"""
|
|
self.experts[name] = model
|
|
logger.info(f"Added expert: {name}")
|
|
|
|
def predict(self, X):
|
|
"""
|
|
Make predictions using all experts and combine them.
|
|
|
|
Args:
|
|
X: Input data
|
|
|
|
Returns:
|
|
Combined predictions
|
|
"""
|
|
if not self.experts:
|
|
logger.error("No experts added to the MoE model")
|
|
return None
|
|
|
|
# Get predictions from each expert
|
|
expert_predictions = {}
|
|
for name, expert in self.experts.items():
|
|
pred, _ = expert.predict(X)
|
|
expert_predictions[name] = pred
|
|
|
|
# Combine predictions based on weights
|
|
final_pred = None
|
|
for name, pred in expert_predictions.items():
|
|
weight = self.expert_weights.get(name, 1.0 / len(self.experts))
|
|
if final_pred is None:
|
|
final_pred = weight * pred
|
|
else:
|
|
final_pred += weight * pred
|
|
|
|
# For classification, convert to class indices
|
|
if self.output_size > 1:
|
|
# Get class with highest probability
|
|
class_pred = np.argmax(final_pred, axis=1)
|
|
return class_pred, final_pred
|
|
else:
|
|
# Binary classification
|
|
class_pred = (final_pred > 0.5).astype(int)
|
|
return class_pred, final_pred
|
|
|
|
def evaluate(self, X_test, y_test):
|
|
"""
|
|
Evaluate the model on test data.
|
|
|
|
Args:
|
|
X_test: Test input data
|
|
y_test: Test target data
|
|
|
|
Returns:
|
|
dict: Evaluation metrics
|
|
"""
|
|
logger.info(f"Evaluating MoE model on {len(X_test)} samples")
|
|
|
|
# Get predictions
|
|
y_pred_class, _ = self.predict(X_test)
|
|
|
|
# Calculate metrics
|
|
if self.output_size > 1:
|
|
accuracy = accuracy_score(y_test, y_pred_class)
|
|
precision = precision_score(y_test, y_pred_class, average='weighted')
|
|
recall = recall_score(y_test, y_pred_class, average='weighted')
|
|
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
|
|
|
metrics = {
|
|
'accuracy': accuracy,
|
|
'precision': precision,
|
|
'recall': recall,
|
|
'f1_score': f1
|
|
}
|
|
else:
|
|
accuracy = accuracy_score(y_test, y_pred_class)
|
|
precision = precision_score(y_test, y_pred_class)
|
|
recall = recall_score(y_test, y_pred_class)
|
|
f1 = f1_score(y_test, y_pred_class)
|
|
|
|
metrics = {
|
|
'accuracy': accuracy,
|
|
'precision': precision,
|
|
'recall': recall,
|
|
'f1_score': f1
|
|
}
|
|
|
|
logger.info(f"MoE evaluation metrics: {metrics}")
|
|
return metrics
|
|
|
|
def save(self, filepath):
|
|
"""
|
|
Save the model weights to a file.
|
|
|
|
Args:
|
|
filepath: Path to save the model
|
|
"""
|
|
# Create directory if it doesn't exist
|
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
|
|
# Save the model state
|
|
model_state = {
|
|
'expert_weights': self.expert_weights,
|
|
'output_size': self.output_size,
|
|
'timeframes': self.timeframes
|
|
}
|
|
|
|
torch.save(model_state, f"{filepath}_moe.pt")
|
|
logger.info(f"MoE model saved to {filepath}_moe.pt")
|
|
|
|
def load(self, filepath):
|
|
"""
|
|
Load the model from a file.
|
|
|
|
Args:
|
|
filepath: Path to load the model from
|
|
"""
|
|
# Check if file exists
|
|
if not os.path.exists(f"{filepath}_moe.pt"):
|
|
logger.error(f"MoE model file {filepath}_moe.pt not found")
|
|
return False
|
|
|
|
# Load the model state
|
|
model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
|
|
|
|
# Update model parameters
|
|
self.expert_weights = model_state['expert_weights']
|
|
self.output_size = model_state['output_size']
|
|
self.timeframes = model_state['timeframes']
|
|
|
|
logger.info(f"MoE model loaded from {filepath}_moe.pt")
|
|
return True |