new nn wip
This commit is contained in:
560
NN/models/cnn_model.py
Normal file
560
NN/models/cnn_model.py
Normal file
@@ -0,0 +1,560 @@
|
||||
"""
|
||||
Convolutional Neural Network for timeseries analysis
|
||||
|
||||
This module implements a deep CNN model for cryptocurrency price analysis.
|
||||
The model uses multiple parallel convolutional pathways and LSTM layers
|
||||
to detect patterns at different time scales.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model, load_model
|
||||
from tensorflow.keras.layers import (
|
||||
Input, Conv1D, MaxPooling1D, Dense, Dropout, BatchNormalization,
|
||||
LSTM, Bidirectional, Flatten, Concatenate, GlobalAveragePooling1D,
|
||||
LeakyReLU, Attention
|
||||
)
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
||||
from tensorflow.keras.metrics import AUC
|
||||
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
|
||||
import datetime
|
||||
import json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CNNModel:
|
||||
"""
|
||||
Convolutional Neural Network for time series analysis.
|
||||
|
||||
This model uses a multi-pathway architecture with different filter sizes
|
||||
to detect patterns at different time scales, combined with LSTM layers
|
||||
for temporal dependencies.
|
||||
"""
|
||||
|
||||
def __init__(self, input_shape=(20, 5), output_size=1, model_dir="NN/models/saved"):
|
||||
"""
|
||||
Initialize the CNN model.
|
||||
|
||||
Args:
|
||||
input_shape (tuple): Shape of input data (sequence_length, features)
|
||||
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
|
||||
model_dir (str): Directory to save trained models
|
||||
"""
|
||||
self.input_shape = input_shape
|
||||
self.output_size = output_size
|
||||
self.model_dir = model_dir
|
||||
self.model = None
|
||||
self.history = None
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
os.makedirs(self.model_dir, exist_ok=True)
|
||||
|
||||
logger.info(f"Initialized CNN model with input shape {input_shape} and output size {output_size}")
|
||||
|
||||
def build_model(self, filters=(32, 64, 128), kernel_sizes=(3, 5, 7),
|
||||
dropout_rate=0.3, learning_rate=0.001):
|
||||
"""
|
||||
Build the CNN model architecture.
|
||||
|
||||
Args:
|
||||
filters (tuple): Number of filters for each convolutional pathway
|
||||
kernel_sizes (tuple): Kernel sizes for each convolutional pathway
|
||||
dropout_rate (float): Dropout rate for regularization
|
||||
learning_rate (float): Learning rate for Adam optimizer
|
||||
|
||||
Returns:
|
||||
The compiled model
|
||||
"""
|
||||
# Input layer
|
||||
inputs = Input(shape=self.input_shape)
|
||||
|
||||
# Multiple parallel convolutional pathways with different kernel sizes
|
||||
# to capture patterns at different time scales
|
||||
conv_layers = []
|
||||
|
||||
for i, (filter_size, kernel_size) in enumerate(zip(filters, kernel_sizes)):
|
||||
conv_path = Conv1D(
|
||||
filters=filter_size,
|
||||
kernel_size=kernel_size,
|
||||
padding='same',
|
||||
name=f'conv1d_{i+1}'
|
||||
)(inputs)
|
||||
conv_path = BatchNormalization()(conv_path)
|
||||
conv_path = LeakyReLU(alpha=0.1)(conv_path)
|
||||
conv_path = MaxPooling1D(pool_size=2, padding='same')(conv_path)
|
||||
conv_path = Dropout(dropout_rate)(conv_path)
|
||||
conv_layers.append(conv_path)
|
||||
|
||||
# Merge convolutional pathways
|
||||
if len(conv_layers) > 1:
|
||||
merged = Concatenate()(conv_layers)
|
||||
else:
|
||||
merged = conv_layers[0]
|
||||
|
||||
# Add another Conv1D layer after merging
|
||||
x = Conv1D(filters=filters[-1], kernel_size=3, padding='same')(merged)
|
||||
x = BatchNormalization()(x)
|
||||
x = LeakyReLU(alpha=0.1)(x)
|
||||
x = MaxPooling1D(pool_size=2, padding='same')(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Bidirectional LSTM for temporal dependencies
|
||||
x = Bidirectional(LSTM(128, return_sequences=True))(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Attention mechanism to focus on important time steps
|
||||
x = Bidirectional(LSTM(64, return_sequences=True))(x)
|
||||
|
||||
# Global average pooling to reduce parameters
|
||||
x = GlobalAveragePooling1D()(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Dense layers for final classification/regression
|
||||
x = Dense(64, activation='relu')(x)
|
||||
x = BatchNormalization()(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Output layer
|
||||
if self.output_size == 1:
|
||||
# Binary classification (up/down)
|
||||
outputs = Dense(1, activation='sigmoid', name='output')(x)
|
||||
loss = 'binary_crossentropy'
|
||||
metrics = ['accuracy', AUC()]
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification (buy/hold/sell)
|
||||
outputs = Dense(3, activation='softmax', name='output')(x)
|
||||
loss = 'categorical_crossentropy'
|
||||
metrics = ['accuracy']
|
||||
else:
|
||||
# Regression
|
||||
outputs = Dense(self.output_size, activation='linear', name='output')(x)
|
||||
loss = 'mse'
|
||||
metrics = ['mae']
|
||||
|
||||
# Create and compile model
|
||||
self.model = Model(inputs=inputs, outputs=outputs)
|
||||
|
||||
# Compile with Adam optimizer
|
||||
self.model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss=loss,
|
||||
metrics=metrics
|
||||
)
|
||||
|
||||
# Log model summary
|
||||
self.model.summary(print_fn=lambda x: logger.info(x))
|
||||
|
||||
return self.model
|
||||
|
||||
def train(self, X_train, y_train, batch_size=32, epochs=100, validation_split=0.2,
|
||||
callbacks=None, class_weights=None):
|
||||
"""
|
||||
Train the CNN model on the provided data.
|
||||
|
||||
Args:
|
||||
X_train (numpy.ndarray): Training features
|
||||
y_train (numpy.ndarray): Training targets
|
||||
batch_size (int): Batch size
|
||||
epochs (int): Number of epochs
|
||||
validation_split (float): Fraction of data to use for validation
|
||||
callbacks (list): List of Keras callbacks
|
||||
class_weights (dict): Class weights for imbalanced datasets
|
||||
|
||||
Returns:
|
||||
History object containing training metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build_model()
|
||||
|
||||
# Default callbacks if none provided
|
||||
if callbacks is None:
|
||||
# Create a timestamp for model checkpoints
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
callbacks = [
|
||||
EarlyStopping(
|
||||
monitor='val_loss',
|
||||
patience=10,
|
||||
restore_best_weights=True
|
||||
),
|
||||
ReduceLROnPlateau(
|
||||
monitor='val_loss',
|
||||
factor=0.5,
|
||||
patience=5,
|
||||
min_lr=1e-6
|
||||
),
|
||||
ModelCheckpoint(
|
||||
filepath=os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5"),
|
||||
monitor='val_loss',
|
||||
save_best_only=True
|
||||
)
|
||||
]
|
||||
|
||||
# Check if y_train needs to be one-hot encoded for multi-class
|
||||
if self.output_size == 3 and len(y_train.shape) == 1:
|
||||
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
|
||||
|
||||
# Train the model
|
||||
logger.info(f"Training CNN model with {len(X_train)} samples, batch size {batch_size}, epochs {epochs}")
|
||||
self.history = self.model.fit(
|
||||
X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
validation_split=validation_split,
|
||||
callbacks=callbacks,
|
||||
class_weight=class_weights,
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Save the trained model
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
model_path = os.path.join(self.model_dir, f"cnn_model_final_{timestamp}.h5")
|
||||
self.model.save(model_path)
|
||||
logger.info(f"Model saved to {model_path}")
|
||||
|
||||
# Save training history
|
||||
history_path = os.path.join(self.model_dir, f"cnn_model_history_{timestamp}.json")
|
||||
with open(history_path, 'w') as f:
|
||||
# Convert numpy values to Python native types for JSON serialization
|
||||
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
|
||||
json.dump(history_dict, f, indent=2)
|
||||
|
||||
return self.history
|
||||
|
||||
def evaluate(self, X_test, y_test, plot_results=False):
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
X_test (numpy.ndarray): Test features
|
||||
y_test (numpy.ndarray): Test targets
|
||||
plot_results (bool): Whether to plot evaluation results
|
||||
|
||||
Returns:
|
||||
dict: Evaluation metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Convert y_test to one-hot encoding for multi-class
|
||||
y_test_original = y_test.copy()
|
||||
if self.output_size == 3 and len(y_test.shape) == 1:
|
||||
y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)
|
||||
|
||||
# Evaluate model
|
||||
logger.info(f"Evaluating CNN model on {len(X_test)} samples")
|
||||
eval_results = self.model.evaluate(X_test, y_test, verbose=0)
|
||||
|
||||
metrics = {}
|
||||
for metric, value in zip(self.model.metrics_names, eval_results):
|
||||
metrics[metric] = value
|
||||
logger.info(f"{metric}: {value:.4f}")
|
||||
|
||||
# Get predictions
|
||||
y_pred_prob = self.model.predict(X_test)
|
||||
|
||||
# Different processing based on output type
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
y_pred = (y_pred_prob > 0.5).astype(int).flatten()
|
||||
|
||||
# Classification report
|
||||
report = classification_report(y_test, y_pred)
|
||||
logger.info(f"Classification Report:\n{report}")
|
||||
|
||||
# Confusion matrix
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
logger.info(f"Confusion Matrix:\n{cm}")
|
||||
|
||||
# ROC curve and AUC
|
||||
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
|
||||
roc_auc = auc(fpr, tpr)
|
||||
metrics['auc'] = roc_auc
|
||||
|
||||
if plot_results:
|
||||
self._plot_binary_results(y_test, y_pred, y_pred_prob, fpr, tpr, roc_auc)
|
||||
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification
|
||||
y_pred = np.argmax(y_pred_prob, axis=1)
|
||||
|
||||
# Classification report
|
||||
report = classification_report(y_test_original, y_pred)
|
||||
logger.info(f"Classification Report:\n{report}")
|
||||
|
||||
# Confusion matrix
|
||||
cm = confusion_matrix(y_test_original, y_pred)
|
||||
logger.info(f"Confusion Matrix:\n{cm}")
|
||||
|
||||
if plot_results:
|
||||
self._plot_multiclass_results(y_test_original, y_pred, y_pred_prob)
|
||||
|
||||
return metrics
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Make predictions on new data.
|
||||
|
||||
Args:
|
||||
X (numpy.ndarray): Input features
|
||||
|
||||
Returns:
|
||||
tuple: (y_pred, y_proba) where:
|
||||
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
|
||||
y_proba is the class probability
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Ensure X has the right shape
|
||||
if len(X.shape) == 2:
|
||||
# Single sample, add batch dimension
|
||||
X = np.expand_dims(X, axis=0)
|
||||
|
||||
# Get predictions
|
||||
y_proba = self.model.predict(X)
|
||||
|
||||
# Process based on output type
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
y_pred = (y_proba > 0.5).astype(int).flatten()
|
||||
return y_pred, y_proba.flatten()
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification
|
||||
y_pred = np.argmax(y_proba, axis=1)
|
||||
return y_pred, y_proba
|
||||
else:
|
||||
# Regression
|
||||
return y_proba, y_proba
|
||||
|
||||
def save(self, filepath=None):
|
||||
"""
|
||||
Save the model to disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to save the model
|
||||
|
||||
Returns:
|
||||
str: Path where the model was saved
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built yet")
|
||||
|
||||
if filepath is None:
|
||||
# Create a default filepath with timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filepath = os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5")
|
||||
|
||||
self.model.save(filepath)
|
||||
logger.info(f"Model saved to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load a saved model from disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the saved model
|
||||
|
||||
Returns:
|
||||
The loaded model
|
||||
"""
|
||||
self.model = load_model(filepath)
|
||||
logger.info(f"Model loaded from {filepath}")
|
||||
return self.model
|
||||
|
||||
def extract_hidden_features(self, X):
|
||||
"""
|
||||
Extract features from the last hidden layer of the CNN for transfer learning.
|
||||
|
||||
Args:
|
||||
X (numpy.ndarray): Input data
|
||||
|
||||
Returns:
|
||||
numpy.ndarray: Extracted features
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Create a new model that outputs the features from the layer before the output
|
||||
feature_layer_name = self.model.layers[-2].name
|
||||
feature_extractor = Model(
|
||||
inputs=self.model.input,
|
||||
outputs=self.model.get_layer(feature_layer_name).output
|
||||
)
|
||||
|
||||
# Extract features
|
||||
features = feature_extractor.predict(X)
|
||||
|
||||
return features
|
||||
|
||||
def _plot_binary_results(self, y_true, y_pred, y_proba, fpr, tpr, roc_auc):
|
||||
"""
|
||||
Plot evaluation results for binary classification.
|
||||
|
||||
Args:
|
||||
y_true (numpy.ndarray): True labels
|
||||
y_pred (numpy.ndarray): Predicted labels
|
||||
y_proba (numpy.ndarray): Prediction probabilities
|
||||
fpr (numpy.ndarray): False positive rates for ROC curve
|
||||
tpr (numpy.ndarray): True positive rates for ROC curve
|
||||
roc_auc (float): Area under ROC curve
|
||||
"""
|
||||
plt.figure(figsize=(15, 5))
|
||||
|
||||
# Confusion Matrix
|
||||
plt.subplot(1, 3, 1)
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
|
||||
plt.title('Confusion Matrix')
|
||||
plt.colorbar()
|
||||
tick_marks = [0, 1]
|
||||
plt.xticks(tick_marks, ['0', '1'])
|
||||
plt.yticks(tick_marks, ['0', '1'])
|
||||
plt.xlabel('Predicted Label')
|
||||
plt.ylabel('True Label')
|
||||
|
||||
# Add text annotations to confusion matrix
|
||||
thresh = cm.max() / 2.
|
||||
for i in range(cm.shape[0]):
|
||||
for j in range(cm.shape[1]):
|
||||
plt.text(j, i, format(cm[i, j], 'd'),
|
||||
horizontalalignment="center",
|
||||
color="white" if cm[i, j] > thresh else "black")
|
||||
|
||||
# Histogram of prediction probabilities
|
||||
plt.subplot(1, 3, 2)
|
||||
plt.hist(y_proba[y_true == 0], alpha=0.5, label='Class 0')
|
||||
plt.hist(y_proba[y_true == 1], alpha=0.5, label='Class 1')
|
||||
plt.title('Prediction Probabilities')
|
||||
plt.xlabel('Probability of Class 1')
|
||||
plt.ylabel('Count')
|
||||
plt.legend()
|
||||
|
||||
# ROC Curve
|
||||
plt.subplot(1, 3, 3)
|
||||
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.3f})')
|
||||
plt.plot([0, 1], [0, 1], 'k--')
|
||||
plt.xlim([0.0, 1.0])
|
||||
plt.ylim([0.0, 1.05])
|
||||
plt.xlabel('False Positive Rate')
|
||||
plt.ylabel('True Positive Rate')
|
||||
plt.title('Receiver Operating Characteristic')
|
||||
plt.legend(loc="lower right")
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save figure
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
fig_path = os.path.join(self.model_dir, f"cnn_evaluation_{timestamp}.png")
|
||||
plt.savefig(fig_path)
|
||||
plt.close()
|
||||
|
||||
logger.info(f"Evaluation plots saved to {fig_path}")
|
||||
|
||||
def _plot_multiclass_results(self, y_true, y_pred, y_proba):
|
||||
"""
|
||||
Plot evaluation results for multi-class classification.
|
||||
|
||||
Args:
|
||||
y_true (numpy.ndarray): True labels
|
||||
y_pred (numpy.ndarray): Predicted labels
|
||||
y_proba (numpy.ndarray): Prediction probabilities
|
||||
"""
|
||||
plt.figure(figsize=(12, 5))
|
||||
|
||||
# Confusion Matrix
|
||||
plt.subplot(1, 2, 1)
|
||||
cm = confusion_matrix(y_true, y_pred)
|
||||
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
|
||||
plt.title('Confusion Matrix')
|
||||
plt.colorbar()
|
||||
classes = ['BUY', 'HOLD', 'SELL'] # Assumes classes are 0, 1, 2
|
||||
tick_marks = np.arange(len(classes))
|
||||
plt.xticks(tick_marks, classes)
|
||||
plt.yticks(tick_marks, classes)
|
||||
plt.xlabel('Predicted Label')
|
||||
plt.ylabel('True Label')
|
||||
|
||||
# Add text annotations to confusion matrix
|
||||
thresh = cm.max() / 2.
|
||||
for i in range(cm.shape[0]):
|
||||
for j in range(cm.shape[1]):
|
||||
plt.text(j, i, format(cm[i, j], 'd'),
|
||||
horizontalalignment="center",
|
||||
color="white" if cm[i, j] > thresh else "black")
|
||||
|
||||
# Class probability distributions
|
||||
plt.subplot(1, 2, 2)
|
||||
for i, cls in enumerate(classes):
|
||||
plt.hist(y_proba[y_true == i, i], alpha=0.5, label=f'Class {cls}')
|
||||
plt.title('Class Probability Distributions')
|
||||
plt.xlabel('Probability')
|
||||
plt.ylabel('Count')
|
||||
plt.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save figure
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
fig_path = os.path.join(self.model_dir, f"cnn_multiclass_evaluation_{timestamp}.png")
|
||||
plt.savefig(fig_path)
|
||||
plt.close()
|
||||
|
||||
logger.info(f"Multiclass evaluation plots saved to {fig_path}")
|
||||
|
||||
def plot_training_history(self):
|
||||
"""
|
||||
Plot training history (loss and metrics).
|
||||
|
||||
Returns:
|
||||
str: Path to the saved plot
|
||||
"""
|
||||
if self.history is None:
|
||||
raise ValueError("Model has not been trained yet")
|
||||
|
||||
plt.figure(figsize=(12, 5))
|
||||
|
||||
# Plot loss
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.plot(self.history.history['loss'], label='Training Loss')
|
||||
if 'val_loss' in self.history.history:
|
||||
plt.plot(self.history.history['val_loss'], label='Validation Loss')
|
||||
plt.title('Model Loss')
|
||||
plt.xlabel('Epoch')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend()
|
||||
|
||||
# Plot accuracy
|
||||
plt.subplot(1, 2, 2)
|
||||
|
||||
if 'accuracy' in self.history.history:
|
||||
plt.plot(self.history.history['accuracy'], label='Training Accuracy')
|
||||
if 'val_accuracy' in self.history.history:
|
||||
plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
|
||||
plt.title('Model Accuracy')
|
||||
plt.ylabel('Accuracy')
|
||||
elif 'mae' in self.history.history:
|
||||
plt.plot(self.history.history['mae'], label='Training MAE')
|
||||
if 'val_mae' in self.history.history:
|
||||
plt.plot(self.history.history['val_mae'], label='Validation MAE')
|
||||
plt.title('Model MAE')
|
||||
plt.ylabel('MAE')
|
||||
|
||||
plt.xlabel('Epoch')
|
||||
plt.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save figure
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
fig_path = os.path.join(self.model_dir, f"cnn_training_history_{timestamp}.png")
|
||||
plt.savefig(fig_path)
|
||||
plt.close()
|
||||
|
||||
logger.info(f"Training history plot saved to {fig_path}")
|
||||
return fig_path
|
546
NN/models/cnn_model_pytorch.py
Normal file
546
NN/models/cnn_model_pytorch.py
Normal file
@@ -0,0 +1,546 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
CNN Model - PyTorch Implementation
|
||||
|
||||
This module implements a CNN model using PyTorch for time series analysis.
|
||||
The model consists of multiple convolutional pathways and LSTM layers.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CNNPyTorch(nn.Module):
|
||||
"""PyTorch CNN model for time series analysis"""
|
||||
|
||||
def __init__(self, input_shape, output_size=3):
|
||||
"""
|
||||
Initialize the CNN model.
|
||||
|
||||
Args:
|
||||
input_shape (tuple): Shape of input data (window_size, features)
|
||||
output_size (int): Size of output (1 for regression, 3 for classification)
|
||||
"""
|
||||
super(CNNPyTorch, self).__init__()
|
||||
|
||||
window_size, num_features = input_shape
|
||||
|
||||
# Architecture parameters
|
||||
filters = [32, 64, 128]
|
||||
kernel_sizes = [3, 5, 7]
|
||||
lstm_units = 100
|
||||
dense_units = 64
|
||||
dropout_rate = 0.3
|
||||
|
||||
# Create parallel convolutional pathways
|
||||
self.conv_paths = nn.ModuleList()
|
||||
|
||||
for f, k in zip(filters, kernel_sizes):
|
||||
path = nn.Sequential(
|
||||
nn.Conv1d(num_features, f, kernel_size=k, padding='same'),
|
||||
nn.ReLU(),
|
||||
nn.BatchNorm1d(f),
|
||||
nn.MaxPool1d(kernel_size=2, stride=1, padding=1),
|
||||
nn.Dropout(dropout_rate)
|
||||
)
|
||||
self.conv_paths.append(path)
|
||||
|
||||
# Calculate output size from conv paths
|
||||
conv_output_size = sum(filters) * window_size
|
||||
|
||||
# LSTM layer
|
||||
self.lstm = nn.LSTM(
|
||||
input_size=sum(filters),
|
||||
hidden_size=lstm_units,
|
||||
batch_first=True,
|
||||
bidirectional=True
|
||||
)
|
||||
|
||||
# Dense layers
|
||||
self.flatten = nn.Flatten()
|
||||
self.dense1 = nn.Sequential(
|
||||
nn.Linear(lstm_units * 2 * window_size, dense_units),
|
||||
nn.ReLU(),
|
||||
nn.BatchNorm1d(dense_units),
|
||||
nn.Dropout(dropout_rate)
|
||||
)
|
||||
|
||||
# Output layer
|
||||
self.output = nn.Linear(dense_units, output_size)
|
||||
|
||||
# Activation based on output size
|
||||
if output_size == 1:
|
||||
self.activation = nn.Sigmoid() # Binary classification or regression
|
||||
elif output_size > 1:
|
||||
self.activation = nn.Softmax(dim=1) # Multi-class classification
|
||||
else:
|
||||
self.activation = nn.Identity() # No activation
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through the network.
|
||||
|
||||
Args:
|
||||
x: Input tensor of shape [batch_size, window_size, features]
|
||||
|
||||
Returns:
|
||||
Output tensor of shape [batch_size, output_size]
|
||||
"""
|
||||
batch_size, window_size, num_features = x.shape
|
||||
|
||||
# Transpose for conv1d: [batch, features, window]
|
||||
x_t = x.transpose(1, 2)
|
||||
|
||||
# Process through parallel conv paths
|
||||
conv_outputs = []
|
||||
for path in self.conv_paths:
|
||||
conv_outputs.append(path(x_t))
|
||||
|
||||
# Concatenate conv outputs
|
||||
conv_concat = torch.cat(conv_outputs, dim=1)
|
||||
|
||||
# Transpose back for LSTM: [batch, window, features]
|
||||
conv_concat = conv_concat.transpose(1, 2)
|
||||
|
||||
# LSTM processing
|
||||
lstm_out, _ = self.lstm(conv_concat)
|
||||
|
||||
# Flatten
|
||||
flattened = self.flatten(lstm_out)
|
||||
|
||||
# Dense processing
|
||||
dense_out = self.dense1(flattened)
|
||||
|
||||
# Output
|
||||
output = self.output(dense_out)
|
||||
|
||||
# Apply activation
|
||||
return self.activation(output)
|
||||
|
||||
|
||||
class CNNModelPyTorch:
|
||||
"""
|
||||
CNN model wrapper class for time series analysis using PyTorch.
|
||||
|
||||
This class provides methods for building, training, evaluating, and making
|
||||
predictions with the CNN model.
|
||||
"""
|
||||
|
||||
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
|
||||
"""
|
||||
Initialize the CNN model.
|
||||
|
||||
Args:
|
||||
window_size (int): Size of the input window
|
||||
num_features (int): Number of features in the input data
|
||||
output_size (int): Size of the output (1 for regression, 3 for classification)
|
||||
timeframes (list): List of timeframes used (for logging)
|
||||
"""
|
||||
self.window_size = window_size
|
||||
self.num_features = num_features
|
||||
self.output_size = output_size
|
||||
self.timeframes = timeframes or []
|
||||
|
||||
# Determine device (GPU or CPU)
|
||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
logger.info(f"Using device: {self.device}")
|
||||
|
||||
# Initialize model
|
||||
self.model = None
|
||||
self.build_model()
|
||||
|
||||
# Initialize training history
|
||||
self.history = {
|
||||
'loss': [],
|
||||
'val_loss': [],
|
||||
'accuracy': [],
|
||||
'val_accuracy': []
|
||||
}
|
||||
|
||||
def build_model(self):
|
||||
"""Build the CNN model architecture"""
|
||||
logger.info(f"Building PyTorch CNN model with window_size={self.window_size}, "
|
||||
f"num_features={self.num_features}, output_size={self.output_size}")
|
||||
|
||||
self.model = CNNPyTorch(
|
||||
input_shape=(self.window_size, self.num_features),
|
||||
output_size=self.output_size
|
||||
).to(self.device)
|
||||
|
||||
# Initialize optimizer
|
||||
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
|
||||
|
||||
# Initialize loss function based on output size
|
||||
if self.output_size == 1:
|
||||
self.criterion = nn.BCELoss() # Binary classification
|
||||
elif self.output_size > 1:
|
||||
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
|
||||
else:
|
||||
self.criterion = nn.MSELoss() # Regression
|
||||
|
||||
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
|
||||
|
||||
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
|
||||
"""
|
||||
Train the CNN model.
|
||||
|
||||
Args:
|
||||
X_train: Training input data
|
||||
y_train: Training target data
|
||||
X_val: Validation input data
|
||||
y_val: Validation target data
|
||||
batch_size: Batch size for training
|
||||
epochs: Number of training epochs
|
||||
|
||||
Returns:
|
||||
Training history
|
||||
"""
|
||||
logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, "
|
||||
f"batch_size={batch_size}, epochs={epochs}")
|
||||
|
||||
# Convert numpy arrays to PyTorch tensors
|
||||
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
|
||||
|
||||
# Handle different output sizes for y_train
|
||||
if self.output_size == 1:
|
||||
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
|
||||
else:
|
||||
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
|
||||
|
||||
# Create DataLoader for training data
|
||||
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
||||
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Create DataLoader for validation data if provided
|
||||
if X_val is not None and y_val is not None:
|
||||
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
|
||||
if self.output_size == 1:
|
||||
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
|
||||
else:
|
||||
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
|
||||
|
||||
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
|
||||
val_loader = DataLoader(val_dataset, batch_size=batch_size)
|
||||
else:
|
||||
val_loader = None
|
||||
|
||||
# Training loop
|
||||
for epoch in range(epochs):
|
||||
# Training phase
|
||||
self.model.train()
|
||||
running_loss = 0.0
|
||||
correct = 0
|
||||
total = 0
|
||||
|
||||
for inputs, targets in train_loader:
|
||||
# Zero the parameter gradients
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
# Forward pass
|
||||
outputs = self.model(inputs)
|
||||
|
||||
# Calculate loss
|
||||
if self.output_size == 1:
|
||||
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||
else:
|
||||
loss = self.criterion(outputs, targets)
|
||||
|
||||
# Backward pass and optimize
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
# Statistics
|
||||
running_loss += loss.item()
|
||||
if self.output_size > 1:
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
total += targets.size(0)
|
||||
correct += (predicted == targets).sum().item()
|
||||
|
||||
epoch_loss = running_loss / len(train_loader)
|
||||
epoch_acc = correct / total if total > 0 else 0
|
||||
|
||||
# Validation phase
|
||||
if val_loader is not None:
|
||||
val_loss, val_acc = self._validate(val_loader)
|
||||
|
||||
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
|
||||
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
|
||||
|
||||
# Update history
|
||||
self.history['loss'].append(epoch_loss)
|
||||
self.history['accuracy'].append(epoch_acc)
|
||||
self.history['val_loss'].append(val_loss)
|
||||
self.history['val_accuracy'].append(val_acc)
|
||||
else:
|
||||
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
|
||||
|
||||
# Update history without validation
|
||||
self.history['loss'].append(epoch_loss)
|
||||
self.history['accuracy'].append(epoch_acc)
|
||||
|
||||
logger.info("Training completed")
|
||||
return self.history
|
||||
|
||||
def _validate(self, val_loader):
|
||||
"""Validate the model using the validation set"""
|
||||
self.model.eval()
|
||||
val_loss = 0.0
|
||||
correct = 0
|
||||
total = 0
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, targets in val_loader:
|
||||
# Forward pass
|
||||
outputs = self.model(inputs)
|
||||
|
||||
# Calculate loss
|
||||
if self.output_size == 1:
|
||||
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||
else:
|
||||
loss = self.criterion(outputs, targets)
|
||||
|
||||
val_loss += loss.item()
|
||||
|
||||
# Calculate accuracy
|
||||
if self.output_size > 1:
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
total += targets.size(0)
|
||||
correct += (predicted == targets).sum().item()
|
||||
|
||||
return val_loss / len(val_loader), correct / total if total > 0 else 0
|
||||
|
||||
def evaluate(self, X_test, y_test):
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
X_test: Test input data
|
||||
y_test: Test target data
|
||||
|
||||
Returns:
|
||||
dict: Evaluation metrics
|
||||
"""
|
||||
logger.info(f"Evaluating model on {len(X_test)} samples")
|
||||
|
||||
# Convert to PyTorch tensors
|
||||
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
|
||||
|
||||
# Get predictions
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
y_pred = self.model(X_test_tensor)
|
||||
|
||||
if self.output_size > 1:
|
||||
_, y_pred_class = torch.max(y_pred, 1)
|
||||
y_pred_class = y_pred_class.cpu().numpy()
|
||||
else:
|
||||
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
|
||||
|
||||
# Calculate metrics
|
||||
if self.output_size > 1:
|
||||
accuracy = accuracy_score(y_test, y_pred_class)
|
||||
precision = precision_score(y_test, y_pred_class, average='weighted')
|
||||
recall = recall_score(y_test, y_pred_class, average='weighted')
|
||||
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
||||
|
||||
metrics = {
|
||||
'accuracy': accuracy,
|
||||
'precision': precision,
|
||||
'recall': recall,
|
||||
'f1_score': f1
|
||||
}
|
||||
else:
|
||||
accuracy = accuracy_score(y_test, y_pred_class)
|
||||
precision = precision_score(y_test, y_pred_class)
|
||||
recall = recall_score(y_test, y_pred_class)
|
||||
f1 = f1_score(y_test, y_pred_class)
|
||||
|
||||
metrics = {
|
||||
'accuracy': accuracy,
|
||||
'precision': precision,
|
||||
'recall': recall,
|
||||
'f1_score': f1
|
||||
}
|
||||
|
||||
logger.info(f"Evaluation metrics: {metrics}")
|
||||
return metrics
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Make predictions with the model.
|
||||
|
||||
Args:
|
||||
X: Input data
|
||||
|
||||
Returns:
|
||||
Predictions
|
||||
"""
|
||||
# Convert to PyTorch tensor
|
||||
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
||||
|
||||
# Get predictions
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
predictions = self.model(X_tensor)
|
||||
|
||||
if self.output_size > 1:
|
||||
# Multi-class classification
|
||||
probs = predictions.cpu().numpy()
|
||||
_, class_preds = torch.max(predictions, 1)
|
||||
class_preds = class_preds.cpu().numpy()
|
||||
return class_preds, probs
|
||||
else:
|
||||
# Binary classification or regression
|
||||
preds = predictions.cpu().numpy()
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
class_preds = (preds > 0.5).astype(int)
|
||||
return class_preds.flatten(), preds.flatten()
|
||||
else:
|
||||
# Regression
|
||||
return preds.flatten(), None
|
||||
|
||||
def save(self, filepath):
|
||||
"""
|
||||
Save the model to a file.
|
||||
|
||||
Args:
|
||||
filepath: Path to save the model
|
||||
"""
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
|
||||
# Save the model state
|
||||
model_state = {
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||
'history': self.history,
|
||||
'window_size': self.window_size,
|
||||
'num_features': self.num_features,
|
||||
'output_size': self.output_size,
|
||||
'timeframes': self.timeframes
|
||||
}
|
||||
|
||||
torch.save(model_state, f"{filepath}.pt")
|
||||
logger.info(f"Model saved to {filepath}.pt")
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load the model from a file.
|
||||
|
||||
Args:
|
||||
filepath: Path to load the model from
|
||||
"""
|
||||
# Check if file exists
|
||||
if not os.path.exists(f"{filepath}.pt"):
|
||||
logger.error(f"Model file {filepath}.pt not found")
|
||||
return False
|
||||
|
||||
# Load the model state
|
||||
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
|
||||
|
||||
# Update model parameters
|
||||
self.window_size = model_state['window_size']
|
||||
self.num_features = model_state['num_features']
|
||||
self.output_size = model_state['output_size']
|
||||
self.timeframes = model_state['timeframes']
|
||||
|
||||
# Rebuild the model
|
||||
self.build_model()
|
||||
|
||||
# Load the model state
|
||||
self.model.load_state_dict(model_state['model_state_dict'])
|
||||
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
|
||||
self.history = model_state['history']
|
||||
|
||||
logger.info(f"Model loaded from {filepath}.pt")
|
||||
return True
|
||||
|
||||
def plot_training_history(self):
|
||||
"""Plot the training history"""
|
||||
if not self.history['loss']:
|
||||
logger.warning("No training history to plot")
|
||||
return
|
||||
|
||||
plt.figure(figsize=(12, 4))
|
||||
|
||||
# Plot loss
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.plot(self.history['loss'], label='Training Loss')
|
||||
if 'val_loss' in self.history and self.history['val_loss']:
|
||||
plt.plot(self.history['val_loss'], label='Validation Loss')
|
||||
plt.title('Model Loss')
|
||||
plt.ylabel('Loss')
|
||||
plt.xlabel('Epoch')
|
||||
plt.legend()
|
||||
|
||||
# Plot accuracy
|
||||
plt.subplot(1, 2, 2)
|
||||
plt.plot(self.history['accuracy'], label='Training Accuracy')
|
||||
if 'val_accuracy' in self.history and self.history['val_accuracy']:
|
||||
plt.plot(self.history['val_accuracy'], label='Validation Accuracy')
|
||||
plt.title('Model Accuracy')
|
||||
plt.ylabel('Accuracy')
|
||||
plt.xlabel('Epoch')
|
||||
plt.legend()
|
||||
|
||||
# Save the plot
|
||||
os.makedirs('plots', exist_ok=True)
|
||||
plt.savefig(os.path.join('plots', f"cnn_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"))
|
||||
plt.close()
|
||||
|
||||
logger.info("Training history plots saved to plots directory")
|
||||
|
||||
def extract_hidden_features(self, X):
|
||||
"""
|
||||
Extract hidden features from the model.
|
||||
|
||||
Args:
|
||||
X: Input data
|
||||
|
||||
Returns:
|
||||
Hidden features
|
||||
"""
|
||||
# Convert to PyTorch tensor
|
||||
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
||||
|
||||
# Forward pass through the model up to the last hidden layer
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
# Get features before the output layer
|
||||
x_t = X_tensor.transpose(1, 2)
|
||||
|
||||
# Process through parallel conv paths
|
||||
conv_outputs = []
|
||||
for path in self.model.conv_paths:
|
||||
conv_outputs.append(path(x_t))
|
||||
|
||||
# Concatenate conv outputs
|
||||
conv_concat = torch.cat(conv_outputs, dim=1)
|
||||
|
||||
# Transpose back for LSTM
|
||||
conv_concat = conv_concat.transpose(1, 2)
|
||||
|
||||
# LSTM processing
|
||||
lstm_out, _ = self.model.lstm(conv_concat)
|
||||
|
||||
# Flatten
|
||||
flattened = self.model.flatten(lstm_out)
|
||||
|
||||
# Dense processing
|
||||
hidden_features = self.model.dense1(flattened)
|
||||
|
||||
return hidden_features.cpu().numpy()
|
File diff suppressed because it is too large
Load Diff
653
NN/models/transformer_model_pytorch.py
Normal file
653
NN/models/transformer_model_pytorch.py
Normal file
@@ -0,0 +1,653 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Transformer Model - PyTorch Implementation
|
||||
|
||||
This module implements a Transformer model using PyTorch for time series analysis.
|
||||
The model consists of a Transformer encoder and a Mixture of Experts model.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
"""Transformer Block with self-attention mechanism"""
|
||||
|
||||
def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
|
||||
super(TransformerBlock, self).__init__()
|
||||
|
||||
self.attention = nn.MultiheadAttention(
|
||||
embed_dim=input_dim,
|
||||
num_heads=num_heads,
|
||||
dropout=dropout,
|
||||
batch_first=True
|
||||
)
|
||||
|
||||
self.feed_forward = nn.Sequential(
|
||||
nn.Linear(input_dim, ff_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(ff_dim, input_dim)
|
||||
)
|
||||
|
||||
self.layernorm1 = nn.LayerNorm(input_dim)
|
||||
self.layernorm2 = nn.LayerNorm(input_dim)
|
||||
self.dropout1 = nn.Dropout(dropout)
|
||||
self.dropout2 = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x):
|
||||
# Self-attention
|
||||
attn_output, _ = self.attention(x, x, x)
|
||||
x = x + self.dropout1(attn_output)
|
||||
x = self.layernorm1(x)
|
||||
|
||||
# Feed forward
|
||||
ff_output = self.feed_forward(x)
|
||||
x = x + self.dropout2(ff_output)
|
||||
x = self.layernorm2(x)
|
||||
|
||||
return x
|
||||
|
||||
class TransformerModelPyTorch(nn.Module):
|
||||
"""PyTorch Transformer model for time series analysis"""
|
||||
|
||||
def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
|
||||
"""
|
||||
Initialize the Transformer model.
|
||||
|
||||
Args:
|
||||
input_shape (tuple): Shape of input data (window_size, features)
|
||||
output_size (int): Size of output (1 for regression, 3 for classification)
|
||||
num_heads (int): Number of attention heads
|
||||
ff_dim (int): Feed forward dimension
|
||||
num_transformer_blocks (int): Number of transformer blocks
|
||||
"""
|
||||
super(TransformerModelPyTorch, self).__init__()
|
||||
|
||||
window_size, num_features = input_shape
|
||||
|
||||
# Positional encoding
|
||||
self.pos_encoding = nn.Parameter(
|
||||
torch.zeros(1, window_size, num_features),
|
||||
requires_grad=True
|
||||
)
|
||||
|
||||
# Transformer blocks
|
||||
self.transformer_blocks = nn.ModuleList([
|
||||
TransformerBlock(
|
||||
input_dim=num_features,
|
||||
num_heads=num_heads,
|
||||
ff_dim=ff_dim
|
||||
) for _ in range(num_transformer_blocks)
|
||||
])
|
||||
|
||||
# Global average pooling
|
||||
self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
|
||||
|
||||
# Dense layers
|
||||
self.dense = nn.Sequential(
|
||||
nn.Linear(num_features, 64),
|
||||
nn.ReLU(),
|
||||
nn.BatchNorm1d(64),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(64, output_size)
|
||||
)
|
||||
|
||||
# Activation based on output size
|
||||
if output_size == 1:
|
||||
self.activation = nn.Sigmoid() # Binary classification or regression
|
||||
elif output_size > 1:
|
||||
self.activation = nn.Softmax(dim=1) # Multi-class classification
|
||||
else:
|
||||
self.activation = nn.Identity() # No activation
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through the network.
|
||||
|
||||
Args:
|
||||
x: Input tensor of shape [batch_size, window_size, features]
|
||||
|
||||
Returns:
|
||||
Output tensor of shape [batch_size, output_size]
|
||||
"""
|
||||
# Add positional encoding
|
||||
x = x + self.pos_encoding
|
||||
|
||||
# Apply transformer blocks
|
||||
for transformer_block in self.transformer_blocks:
|
||||
x = transformer_block(x)
|
||||
|
||||
# Global average pooling
|
||||
x = x.transpose(1, 2) # [batch, features, window]
|
||||
x = self.global_avg_pool(x) # [batch, features, 1]
|
||||
x = x.squeeze(-1) # [batch, features]
|
||||
|
||||
# Dense layers
|
||||
x = self.dense(x)
|
||||
|
||||
# Apply activation
|
||||
return self.activation(x)
|
||||
|
||||
|
||||
class TransformerModelPyTorchWrapper:
|
||||
"""
|
||||
Transformer model wrapper class for time series analysis using PyTorch.
|
||||
|
||||
This class provides methods for building, training, evaluating, and making
|
||||
predictions with the Transformer model.
|
||||
"""
|
||||
|
||||
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
|
||||
"""
|
||||
Initialize the Transformer model.
|
||||
|
||||
Args:
|
||||
window_size (int): Size of the input window
|
||||
num_features (int): Number of features in the input data
|
||||
output_size (int): Size of the output (1 for regression, 3 for classification)
|
||||
timeframes (list): List of timeframes used (for logging)
|
||||
"""
|
||||
self.window_size = window_size
|
||||
self.num_features = num_features
|
||||
self.output_size = output_size
|
||||
self.timeframes = timeframes or []
|
||||
|
||||
# Determine device (GPU or CPU)
|
||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
logger.info(f"Using device: {self.device}")
|
||||
|
||||
# Initialize model
|
||||
self.model = None
|
||||
self.build_model()
|
||||
|
||||
# Initialize training history
|
||||
self.history = {
|
||||
'loss': [],
|
||||
'val_loss': [],
|
||||
'accuracy': [],
|
||||
'val_accuracy': []
|
||||
}
|
||||
|
||||
def build_model(self):
|
||||
"""Build the Transformer model architecture"""
|
||||
logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
|
||||
f"num_features={self.num_features}, output_size={self.output_size}")
|
||||
|
||||
self.model = TransformerModelPyTorch(
|
||||
input_shape=(self.window_size, self.num_features),
|
||||
output_size=self.output_size
|
||||
).to(self.device)
|
||||
|
||||
# Initialize optimizer
|
||||
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
|
||||
|
||||
# Initialize loss function based on output size
|
||||
if self.output_size == 1:
|
||||
self.criterion = nn.BCELoss() # Binary classification
|
||||
elif self.output_size > 1:
|
||||
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
|
||||
else:
|
||||
self.criterion = nn.MSELoss() # Regression
|
||||
|
||||
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
|
||||
|
||||
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
|
||||
"""
|
||||
Train the Transformer model.
|
||||
|
||||
Args:
|
||||
X_train: Training input data
|
||||
y_train: Training target data
|
||||
X_val: Validation input data
|
||||
y_val: Validation target data
|
||||
batch_size: Batch size for training
|
||||
epochs: Number of training epochs
|
||||
|
||||
Returns:
|
||||
Training history
|
||||
"""
|
||||
logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
|
||||
f"batch_size={batch_size}, epochs={epochs}")
|
||||
|
||||
# Convert numpy arrays to PyTorch tensors
|
||||
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
|
||||
|
||||
# Handle different output sizes for y_train
|
||||
if self.output_size == 1:
|
||||
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
|
||||
else:
|
||||
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
|
||||
|
||||
# Create DataLoader for training data
|
||||
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
||||
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||
|
||||
# Create DataLoader for validation data if provided
|
||||
if X_val is not None and y_val is not None:
|
||||
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
|
||||
if self.output_size == 1:
|
||||
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
|
||||
else:
|
||||
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
|
||||
|
||||
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
|
||||
val_loader = DataLoader(val_dataset, batch_size=batch_size)
|
||||
else:
|
||||
val_loader = None
|
||||
|
||||
# Training loop
|
||||
for epoch in range(epochs):
|
||||
# Training phase
|
||||
self.model.train()
|
||||
running_loss = 0.0
|
||||
correct = 0
|
||||
total = 0
|
||||
|
||||
for inputs, targets in train_loader:
|
||||
# Zero the parameter gradients
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
# Forward pass
|
||||
outputs = self.model(inputs)
|
||||
|
||||
# Calculate loss
|
||||
if self.output_size == 1:
|
||||
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||
else:
|
||||
loss = self.criterion(outputs, targets)
|
||||
|
||||
# Backward pass and optimize
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
# Statistics
|
||||
running_loss += loss.item()
|
||||
if self.output_size > 1:
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
total += targets.size(0)
|
||||
correct += (predicted == targets).sum().item()
|
||||
|
||||
epoch_loss = running_loss / len(train_loader)
|
||||
epoch_acc = correct / total if total > 0 else 0
|
||||
|
||||
# Validation phase
|
||||
if val_loader is not None:
|
||||
val_loss, val_acc = self._validate(val_loader)
|
||||
|
||||
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
|
||||
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
|
||||
|
||||
# Update history
|
||||
self.history['loss'].append(epoch_loss)
|
||||
self.history['accuracy'].append(epoch_acc)
|
||||
self.history['val_loss'].append(val_loss)
|
||||
self.history['val_accuracy'].append(val_acc)
|
||||
else:
|
||||
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
|
||||
|
||||
# Update history without validation
|
||||
self.history['loss'].append(epoch_loss)
|
||||
self.history['accuracy'].append(epoch_acc)
|
||||
|
||||
logger.info("Training completed")
|
||||
return self.history
|
||||
|
||||
def _validate(self, val_loader):
|
||||
"""Validate the model using the validation set"""
|
||||
self.model.eval()
|
||||
val_loss = 0.0
|
||||
correct = 0
|
||||
total = 0
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, targets in val_loader:
|
||||
# Forward pass
|
||||
outputs = self.model(inputs)
|
||||
|
||||
# Calculate loss
|
||||
if self.output_size == 1:
|
||||
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||
else:
|
||||
loss = self.criterion(outputs, targets)
|
||||
|
||||
val_loss += loss.item()
|
||||
|
||||
# Calculate accuracy
|
||||
if self.output_size > 1:
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
total += targets.size(0)
|
||||
correct += (predicted == targets).sum().item()
|
||||
|
||||
return val_loss / len(val_loader), correct / total if total > 0 else 0
|
||||
|
||||
def evaluate(self, X_test, y_test):
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
X_test: Test input data
|
||||
y_test: Test target data
|
||||
|
||||
Returns:
|
||||
dict: Evaluation metrics
|
||||
"""
|
||||
logger.info(f"Evaluating model on {len(X_test)} samples")
|
||||
|
||||
# Convert to PyTorch tensors
|
||||
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
|
||||
|
||||
# Get predictions
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
y_pred = self.model(X_test_tensor)
|
||||
|
||||
if self.output_size > 1:
|
||||
_, y_pred_class = torch.max(y_pred, 1)
|
||||
y_pred_class = y_pred_class.cpu().numpy()
|
||||
else:
|
||||
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
|
||||
|
||||
# Calculate metrics
|
||||
if self.output_size > 1:
|
||||
accuracy = accuracy_score(y_test, y_pred_class)
|
||||
precision = precision_score(y_test, y_pred_class, average='weighted')
|
||||
recall = recall_score(y_test, y_pred_class, average='weighted')
|
||||
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
||||
|
||||
metrics = {
|
||||
'accuracy': accuracy,
|
||||
'precision': precision,
|
||||
'recall': recall,
|
||||
'f1_score': f1
|
||||
}
|
||||
else:
|
||||
accuracy = accuracy_score(y_test, y_pred_class)
|
||||
precision = precision_score(y_test, y_pred_class)
|
||||
recall = recall_score(y_test, y_pred_class)
|
||||
f1 = f1_score(y_test, y_pred_class)
|
||||
|
||||
metrics = {
|
||||
'accuracy': accuracy,
|
||||
'precision': precision,
|
||||
'recall': recall,
|
||||
'f1_score': f1
|
||||
}
|
||||
|
||||
logger.info(f"Evaluation metrics: {metrics}")
|
||||
return metrics
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Make predictions with the model.
|
||||
|
||||
Args:
|
||||
X: Input data
|
||||
|
||||
Returns:
|
||||
Predictions
|
||||
"""
|
||||
# Convert to PyTorch tensor
|
||||
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
||||
|
||||
# Get predictions
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
predictions = self.model(X_tensor)
|
||||
|
||||
if self.output_size > 1:
|
||||
# Multi-class classification
|
||||
probs = predictions.cpu().numpy()
|
||||
_, class_preds = torch.max(predictions, 1)
|
||||
class_preds = class_preds.cpu().numpy()
|
||||
return class_preds, probs
|
||||
else:
|
||||
# Binary classification or regression
|
||||
preds = predictions.cpu().numpy()
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
class_preds = (preds > 0.5).astype(int)
|
||||
return class_preds.flatten(), preds.flatten()
|
||||
else:
|
||||
# Regression
|
||||
return preds.flatten(), None
|
||||
|
||||
def save(self, filepath):
|
||||
"""
|
||||
Save the model to a file.
|
||||
|
||||
Args:
|
||||
filepath: Path to save the model
|
||||
"""
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
|
||||
# Save the model state
|
||||
model_state = {
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||
'history': self.history,
|
||||
'window_size': self.window_size,
|
||||
'num_features': self.num_features,
|
||||
'output_size': self.output_size,
|
||||
'timeframes': self.timeframes
|
||||
}
|
||||
|
||||
torch.save(model_state, f"{filepath}.pt")
|
||||
logger.info(f"Model saved to {filepath}.pt")
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load the model from a file.
|
||||
|
||||
Args:
|
||||
filepath: Path to load the model from
|
||||
"""
|
||||
# Check if file exists
|
||||
if not os.path.exists(f"{filepath}.pt"):
|
||||
logger.error(f"Model file {filepath}.pt not found")
|
||||
return False
|
||||
|
||||
# Load the model state
|
||||
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
|
||||
|
||||
# Update model parameters
|
||||
self.window_size = model_state['window_size']
|
||||
self.num_features = model_state['num_features']
|
||||
self.output_size = model_state['output_size']
|
||||
self.timeframes = model_state['timeframes']
|
||||
|
||||
# Rebuild the model
|
||||
self.build_model()
|
||||
|
||||
# Load the model state
|
||||
self.model.load_state_dict(model_state['model_state_dict'])
|
||||
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
|
||||
self.history = model_state['history']
|
||||
|
||||
logger.info(f"Model loaded from {filepath}.pt")
|
||||
return True
|
||||
|
||||
class MixtureOfExpertsModelPyTorch:
|
||||
"""
|
||||
Mixture of Experts model implementation using PyTorch.
|
||||
|
||||
This model combines predictions from multiple models (experts) using a
|
||||
learned weighting scheme.
|
||||
"""
|
||||
|
||||
def __init__(self, output_size=3, timeframes=None):
|
||||
"""
|
||||
Initialize the Mixture of Experts model.
|
||||
|
||||
Args:
|
||||
output_size (int): Size of the output (1 for regression, 3 for classification)
|
||||
timeframes (list): List of timeframes used (for logging)
|
||||
"""
|
||||
self.output_size = output_size
|
||||
self.timeframes = timeframes or []
|
||||
self.experts = {}
|
||||
self.expert_weights = {}
|
||||
|
||||
# Determine device (GPU or CPU)
|
||||
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
logger.info(f"Using device: {self.device}")
|
||||
|
||||
# Initialize model and training history
|
||||
self.model = None
|
||||
self.history = {
|
||||
'loss': [],
|
||||
'val_loss': [],
|
||||
'accuracy': [],
|
||||
'val_accuracy': []
|
||||
}
|
||||
|
||||
def add_expert(self, name, model):
|
||||
"""
|
||||
Add an expert model.
|
||||
|
||||
Args:
|
||||
name (str): Name of the expert
|
||||
model: Expert model
|
||||
"""
|
||||
self.experts[name] = model
|
||||
logger.info(f"Added expert: {name}")
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Make predictions using all experts and combine them.
|
||||
|
||||
Args:
|
||||
X: Input data
|
||||
|
||||
Returns:
|
||||
Combined predictions
|
||||
"""
|
||||
if not self.experts:
|
||||
logger.error("No experts added to the MoE model")
|
||||
return None
|
||||
|
||||
# Get predictions from each expert
|
||||
expert_predictions = {}
|
||||
for name, expert in self.experts.items():
|
||||
pred, _ = expert.predict(X)
|
||||
expert_predictions[name] = pred
|
||||
|
||||
# Combine predictions based on weights
|
||||
final_pred = None
|
||||
for name, pred in expert_predictions.items():
|
||||
weight = self.expert_weights.get(name, 1.0 / len(self.experts))
|
||||
if final_pred is None:
|
||||
final_pred = weight * pred
|
||||
else:
|
||||
final_pred += weight * pred
|
||||
|
||||
# For classification, convert to class indices
|
||||
if self.output_size > 1:
|
||||
# Get class with highest probability
|
||||
class_pred = np.argmax(final_pred, axis=1)
|
||||
return class_pred, final_pred
|
||||
else:
|
||||
# Binary classification
|
||||
class_pred = (final_pred > 0.5).astype(int)
|
||||
return class_pred, final_pred
|
||||
|
||||
def evaluate(self, X_test, y_test):
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
X_test: Test input data
|
||||
y_test: Test target data
|
||||
|
||||
Returns:
|
||||
dict: Evaluation metrics
|
||||
"""
|
||||
logger.info(f"Evaluating MoE model on {len(X_test)} samples")
|
||||
|
||||
# Get predictions
|
||||
y_pred_class, _ = self.predict(X_test)
|
||||
|
||||
# Calculate metrics
|
||||
if self.output_size > 1:
|
||||
accuracy = accuracy_score(y_test, y_pred_class)
|
||||
precision = precision_score(y_test, y_pred_class, average='weighted')
|
||||
recall = recall_score(y_test, y_pred_class, average='weighted')
|
||||
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
||||
|
||||
metrics = {
|
||||
'accuracy': accuracy,
|
||||
'precision': precision,
|
||||
'recall': recall,
|
||||
'f1_score': f1
|
||||
}
|
||||
else:
|
||||
accuracy = accuracy_score(y_test, y_pred_class)
|
||||
precision = precision_score(y_test, y_pred_class)
|
||||
recall = recall_score(y_test, y_pred_class)
|
||||
f1 = f1_score(y_test, y_pred_class)
|
||||
|
||||
metrics = {
|
||||
'accuracy': accuracy,
|
||||
'precision': precision,
|
||||
'recall': recall,
|
||||
'f1_score': f1
|
||||
}
|
||||
|
||||
logger.info(f"MoE evaluation metrics: {metrics}")
|
||||
return metrics
|
||||
|
||||
def save(self, filepath):
|
||||
"""
|
||||
Save the model weights to a file.
|
||||
|
||||
Args:
|
||||
filepath: Path to save the model
|
||||
"""
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||
|
||||
# Save the model state
|
||||
model_state = {
|
||||
'expert_weights': self.expert_weights,
|
||||
'output_size': self.output_size,
|
||||
'timeframes': self.timeframes
|
||||
}
|
||||
|
||||
torch.save(model_state, f"{filepath}_moe.pt")
|
||||
logger.info(f"MoE model saved to {filepath}_moe.pt")
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load the model from a file.
|
||||
|
||||
Args:
|
||||
filepath: Path to load the model from
|
||||
"""
|
||||
# Check if file exists
|
||||
if not os.path.exists(f"{filepath}_moe.pt"):
|
||||
logger.error(f"MoE model file {filepath}_moe.pt not found")
|
||||
return False
|
||||
|
||||
# Load the model state
|
||||
model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
|
||||
|
||||
# Update model parameters
|
||||
self.expert_weights = model_state['expert_weights']
|
||||
self.output_size = model_state['output_size']
|
||||
self.timeframes = model_state['timeframes']
|
||||
|
||||
logger.info(f"MoE model loaded from {filepath}_moe.pt")
|
||||
return True
|
Reference in New Issue
Block a user