BIG CLEANUP
This commit is contained in:
@ -1,27 +0,0 @@
|
||||
"""
|
||||
Neural Network Models
|
||||
====================
|
||||
|
||||
This package contains the neural network models used in the trading system:
|
||||
- CNN Model: Deep convolutional neural network for feature extraction
|
||||
- DQN Agent: Deep Q-Network for reinforcement learning
|
||||
- COB RL Model: Specialized RL model for order book data
|
||||
- Advanced Transformer: High-performance transformer for trading
|
||||
|
||||
PyTorch implementation only.
|
||||
"""
|
||||
|
||||
# Import core models
|
||||
from NN.models.dqn_agent import DQNAgent
|
||||
from NN.models.cob_rl_model import COBRLModelInterface
|
||||
from NN.models.advanced_transformer_trading import AdvancedTradingTransformer, TradingTransformerConfig
|
||||
from NN.models.standardized_cnn import StandardizedCNN # Use the unified CNN model
|
||||
|
||||
# Import model interfaces
|
||||
from NN.models.model_interfaces import ModelInterface, CNNModelInterface, RLAgentInterface, ExtremaTrainerInterface
|
||||
|
||||
# Export the unified StandardizedCNN as CNNModel for compatibility
|
||||
CNNModel = StandardizedCNN
|
||||
|
||||
__all__ = ['CNNModel', 'StandardizedCNN', 'DQNAgent', 'COBRLModelInterface', 'AdvancedTradingTransformer', 'TradingTransformerConfig',
|
||||
'ModelInterface', 'CNNModelInterface', 'RLAgentInterface', 'ExtremaTrainerInterface']
|
@ -1,201 +0,0 @@
|
||||
# """
|
||||
# Legacy CNN Model Compatibility Layer
|
||||
|
||||
# This module provides compatibility redirects to the unified StandardizedCNN model.
|
||||
# All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired
|
||||
# in favor of the StandardizedCNN architecture.
|
||||
# """
|
||||
|
||||
# import logging
|
||||
# import warnings
|
||||
# from typing import Tuple, Dict, Any, Optional
|
||||
# import torch
|
||||
# import numpy as np
|
||||
|
||||
# # Import the standardized CNN model
|
||||
# from .standardized_cnn import StandardizedCNN
|
||||
|
||||
# logger = logging.getLogger(__name__)
|
||||
|
||||
# # Compatibility aliases and wrappers
|
||||
# class EnhancedCNNModel:
|
||||
# """Legacy compatibility wrapper - redirects to StandardizedCNN"""
|
||||
|
||||
# def __init__(self, *args, **kwargs):
|
||||
# warnings.warn(
|
||||
# "EnhancedCNNModel is deprecated. Use StandardizedCNN instead.",
|
||||
# DeprecationWarning,
|
||||
# stacklevel=2
|
||||
# )
|
||||
# # Create StandardizedCNN with default parameters
|
||||
# self.standardized_cnn = StandardizedCNN()
|
||||
# logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN")
|
||||
|
||||
# def __getattr__(self, name):
|
||||
# """Delegate all method calls to StandardizedCNN"""
|
||||
# return getattr(self.standardized_cnn, name)
|
||||
|
||||
|
||||
# class CNNModelTrainer:
|
||||
# """Legacy compatibility wrapper for CNN training"""
|
||||
|
||||
# def __init__(self, model=None, *args, **kwargs):
|
||||
# warnings.warn(
|
||||
# "CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.",
|
||||
# DeprecationWarning,
|
||||
# stacklevel=2
|
||||
# )
|
||||
# if isinstance(model, EnhancedCNNModel):
|
||||
# self.model = model.standardized_cnn
|
||||
# else:
|
||||
# self.model = StandardizedCNN()
|
||||
# logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()")
|
||||
|
||||
# def train_step(self, x, y, *args, **kwargs):
|
||||
# """Legacy train step wrapper"""
|
||||
# try:
|
||||
# # Convert to BaseDataInput format if needed
|
||||
# if hasattr(x, 'get_feature_vector'):
|
||||
# # Already BaseDataInput
|
||||
# base_input = x
|
||||
# else:
|
||||
# # Create mock BaseDataInput for legacy compatibility
|
||||
# from core.data_models import BaseDataInput
|
||||
# base_input = BaseDataInput()
|
||||
# # Set mock feature vector
|
||||
# if isinstance(x, torch.Tensor):
|
||||
# feature_vector = x.flatten().cpu().numpy()
|
||||
# else:
|
||||
# feature_vector = np.array(x).flatten()
|
||||
|
||||
# # Pad or truncate to expected size
|
||||
# expected_size = self.model.expected_feature_dim
|
||||
# if len(feature_vector) < expected_size:
|
||||
# padding = np.zeros(expected_size - len(feature_vector))
|
||||
# feature_vector = np.concatenate([feature_vector, padding])
|
||||
# else:
|
||||
# feature_vector = feature_vector[:expected_size]
|
||||
|
||||
# base_input._feature_vector = feature_vector
|
||||
|
||||
# # Convert target to string format
|
||||
# if isinstance(y, torch.Tensor):
|
||||
# y_val = y.item() if y.numel() == 1 else y.argmax().item()
|
||||
# else:
|
||||
# y_val = int(y) if np.isscalar(y) else int(np.argmax(y))
|
||||
|
||||
# target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'}
|
||||
# target = target_map.get(y_val, 'HOLD')
|
||||
|
||||
# # Use StandardizedCNN training
|
||||
# optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
|
||||
# loss = self.model.train_step([base_input], [target], optimizer)
|
||||
|
||||
# return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5}
|
||||
|
||||
# except Exception as e:
|
||||
# logger.error(f"Legacy train_step error: {e}")
|
||||
# return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5}
|
||||
|
||||
|
||||
# # class CNNModel:
|
||||
# # """Legacy compatibility wrapper for CNN model interface"""
|
||||
|
||||
# # def __init__(self, input_shape=(900, 50), output_size=3, model_path=None):
|
||||
# # warnings.warn(
|
||||
# # "CNNModel is deprecated. Use StandardizedCNN directly.",
|
||||
# # DeprecationWarning,
|
||||
# # stacklevel=2
|
||||
# # )
|
||||
# # self.input_shape = input_shape
|
||||
# # self.output_size = output_size
|
||||
# # self.standardized_cnn = StandardizedCNN()
|
||||
# # self.trainer = CNNModelTrainer(self.standardized_cnn)
|
||||
# # logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN")
|
||||
|
||||
# # def build_model(self, **kwargs):
|
||||
# # """Legacy build method - no-op for StandardizedCNN"""
|
||||
# # return self
|
||||
|
||||
# # def predict(self, X):
|
||||
# # """Legacy predict method"""
|
||||
# # try:
|
||||
# # # Convert input to BaseDataInput
|
||||
# # from core.data_models import BaseDataInput
|
||||
# # base_input = BaseDataInput()
|
||||
|
||||
# # if isinstance(X, np.ndarray):
|
||||
# # feature_vector = X.flatten()
|
||||
# # else:
|
||||
# # feature_vector = np.array(X).flatten()
|
||||
|
||||
# # # Pad or truncate to expected size
|
||||
# # expected_size = self.standardized_cnn.expected_feature_dim
|
||||
# # if len(feature_vector) < expected_size:
|
||||
# # padding = np.zeros(expected_size - len(feature_vector))
|
||||
# # feature_vector = np.concatenate([feature_vector, padding])
|
||||
# # else:
|
||||
# # feature_vector = feature_vector[:expected_size]
|
||||
|
||||
# # base_input._feature_vector = feature_vector
|
||||
|
||||
# # # Get prediction from StandardizedCNN
|
||||
# # result = self.standardized_cnn.predict_from_base_input(base_input)
|
||||
|
||||
# # # Convert to legacy format
|
||||
# # action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
|
||||
# # pred_class = np.array([action_map.get(result.predictions['action'], 2)])
|
||||
# # pred_proba = np.array([result.predictions['action_probabilities']])
|
||||
|
||||
# # return pred_class, pred_proba
|
||||
|
||||
# # except Exception as e:
|
||||
# # logger.error(f"Legacy predict error: {e}")
|
||||
# # # Return safe defaults
|
||||
# # pred_class = np.array([2]) # HOLD
|
||||
# # pred_proba = np.array([[0.33, 0.33, 0.34]])
|
||||
# # return pred_class, pred_proba
|
||||
|
||||
# # def fit(self, X, y, **kwargs):
|
||||
# # """Legacy fit method"""
|
||||
# # try:
|
||||
# # return self.trainer.train_step(X, y)
|
||||
# # except Exception as e:
|
||||
# # logger.error(f"Legacy fit error: {e}")
|
||||
# # return self
|
||||
|
||||
# # def save(self, filepath: str):
|
||||
# # """Legacy save method"""
|
||||
# # try:
|
||||
# # torch.save(self.standardized_cnn.state_dict(), filepath)
|
||||
# # logger.info(f"StandardizedCNN saved to {filepath}")
|
||||
# # except Exception as e:
|
||||
# # logger.error(f"Error saving model: {e}")
|
||||
|
||||
|
||||
# def create_enhanced_cnn_model(input_size: int = 60,
|
||||
# feature_dim: int = 50,
|
||||
# output_size: int = 3,
|
||||
# base_channels: int = 256,
|
||||
# device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]:
|
||||
# """Legacy compatibility function - returns StandardizedCNN"""
|
||||
# warnings.warn(
|
||||
# "create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.",
|
||||
# DeprecationWarning,
|
||||
# stacklevel=2
|
||||
# )
|
||||
|
||||
# model = StandardizedCNN()
|
||||
# trainer = CNNModelTrainer(model)
|
||||
|
||||
# logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly")
|
||||
# return model, trainer
|
||||
|
||||
|
||||
# # Export compatibility symbols
|
||||
# __all__ = [
|
||||
# 'EnhancedCNNModel',
|
||||
# 'CNNModelTrainer',
|
||||
# # 'CNNModel',
|
||||
# 'create_enhanced_cnn_model'
|
||||
# ]
|
@ -1,821 +0,0 @@
|
||||
"""
|
||||
Transformer Neural Network for timeseries analysis
|
||||
|
||||
This module implements a Transformer model with attention mechanisms for cryptocurrency price analysis.
|
||||
It also includes a Mixture of Experts model that combines predictions from multiple models.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model, load_model
|
||||
from tensorflow.keras.layers import (
|
||||
Input, Dense, Dropout, BatchNormalization,
|
||||
Concatenate, Layer, LayerNormalization, MultiHeadAttention,
|
||||
Add, GlobalAveragePooling1D, Conv1D, Reshape
|
||||
)
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
||||
import datetime
|
||||
import json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TransformerBlock(Layer):
|
||||
"""
|
||||
Transformer block implementation with multi-head attention and feed-forward networks.
|
||||
"""
|
||||
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
|
||||
super(TransformerBlock, self).__init__()
|
||||
self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
|
||||
self.ffn = tf.keras.Sequential([
|
||||
Dense(ff_dim, activation="relu"),
|
||||
Dense(embed_dim),
|
||||
])
|
||||
self.layernorm1 = LayerNormalization(epsilon=1e-6)
|
||||
self.layernorm2 = LayerNormalization(epsilon=1e-6)
|
||||
self.dropout1 = Dropout(rate)
|
||||
self.dropout2 = Dropout(rate)
|
||||
|
||||
def call(self, inputs, training=False):
|
||||
attn_output = self.att(inputs, inputs)
|
||||
attn_output = self.dropout1(attn_output, training=training)
|
||||
out1 = self.layernorm1(inputs + attn_output)
|
||||
ffn_output = self.ffn(out1)
|
||||
ffn_output = self.dropout2(ffn_output, training=training)
|
||||
return self.layernorm2(out1 + ffn_output)
|
||||
|
||||
def get_config(self):
|
||||
config = super().get_config()
|
||||
config.update({
|
||||
'att': self.att,
|
||||
'ffn': self.ffn,
|
||||
'layernorm1': self.layernorm1,
|
||||
'layernorm2': self.layernorm2,
|
||||
'dropout1': self.dropout1,
|
||||
'dropout2': self.dropout2
|
||||
})
|
||||
return config
|
||||
|
||||
class PositionalEncoding(Layer):
|
||||
"""
|
||||
Positional encoding layer to add position information to input embeddings.
|
||||
"""
|
||||
def __init__(self, position, d_model):
|
||||
super(PositionalEncoding, self).__init__()
|
||||
self.position = position
|
||||
self.d_model = d_model
|
||||
self.pos_encoding = self.positional_encoding(position, d_model)
|
||||
|
||||
def get_angles(self, position, i, d_model):
|
||||
angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
|
||||
return position * angles
|
||||
|
||||
def positional_encoding(self, position, d_model):
|
||||
angle_rads = self.get_angles(
|
||||
position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
|
||||
i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
|
||||
d_model=d_model
|
||||
)
|
||||
|
||||
# Apply sin to even indices in the array
|
||||
sines = tf.math.sin(angle_rads[:, 0::2])
|
||||
|
||||
# Apply cos to odd indices in the array
|
||||
cosines = tf.math.cos(angle_rads[:, 1::2])
|
||||
|
||||
pos_encoding = tf.concat([sines, cosines], axis=-1)
|
||||
pos_encoding = pos_encoding[tf.newaxis, ...]
|
||||
|
||||
return tf.cast(pos_encoding, tf.float32)
|
||||
|
||||
def call(self, inputs):
|
||||
return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
|
||||
|
||||
def get_config(self):
|
||||
config = super().get_config()
|
||||
config.update({
|
||||
'position': self.position,
|
||||
'd_model': self.d_model,
|
||||
'pos_encoding': self.pos_encoding
|
||||
})
|
||||
return config
|
||||
|
||||
class TransformerModel:
|
||||
"""
|
||||
Transformer Neural Network for time series analysis.
|
||||
|
||||
This model uses self-attention mechanisms to capture relationships between
|
||||
different time points in the input data.
|
||||
"""
|
||||
|
||||
def __init__(self, ts_input_shape=(20, 5), feature_input_shape=64, output_size=1, model_dir="NN/models/saved"):
|
||||
"""
|
||||
Initialize the Transformer model.
|
||||
|
||||
Args:
|
||||
ts_input_shape (tuple): Shape of time series input data (sequence_length, features)
|
||||
feature_input_shape (int): Shape of additional feature input (e.g., from CNN)
|
||||
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
|
||||
model_dir (str): Directory to save trained models
|
||||
"""
|
||||
self.ts_input_shape = ts_input_shape
|
||||
self.feature_input_shape = feature_input_shape
|
||||
self.output_size = output_size
|
||||
self.model_dir = model_dir
|
||||
self.model = None
|
||||
self.history = None
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
os.makedirs(self.model_dir, exist_ok=True)
|
||||
|
||||
logger.info(f"Initialized Transformer model with TS input shape {ts_input_shape}, "
|
||||
f"feature input shape {feature_input_shape}, and output size {output_size}")
|
||||
|
||||
def build_model(self, embed_dim=32, num_heads=4, ff_dim=64, num_transformer_blocks=2, dropout_rate=0.1, learning_rate=0.001):
|
||||
"""
|
||||
Build the Transformer model architecture.
|
||||
|
||||
Args:
|
||||
embed_dim (int): Embedding dimension for transformer
|
||||
num_heads (int): Number of attention heads
|
||||
ff_dim (int): Hidden dimension of the feed forward network
|
||||
num_transformer_blocks (int): Number of transformer blocks
|
||||
dropout_rate (float): Dropout rate for regularization
|
||||
learning_rate (float): Learning rate for Adam optimizer
|
||||
|
||||
Returns:
|
||||
The compiled model
|
||||
"""
|
||||
# Time series input
|
||||
ts_inputs = Input(shape=self.ts_input_shape, name="ts_input")
|
||||
|
||||
# Additional feature input (e.g., from CNN)
|
||||
feature_inputs = Input(shape=(self.feature_input_shape,), name="feature_input")
|
||||
|
||||
# Process time series with transformer
|
||||
# First, project the input to the embedding dimension
|
||||
x = Conv1D(embed_dim, 1, activation="relu")(ts_inputs)
|
||||
|
||||
# Add positional encoding
|
||||
x = PositionalEncoding(self.ts_input_shape[0], embed_dim)(x)
|
||||
|
||||
# Add transformer blocks
|
||||
for _ in range(num_transformer_blocks):
|
||||
x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
|
||||
|
||||
# Global pooling to get a single vector representation
|
||||
x = GlobalAveragePooling1D()(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Combine with additional features
|
||||
combined = Concatenate()([x, feature_inputs])
|
||||
|
||||
# Dense layers for final classification/regression
|
||||
x = Dense(64, activation="relu")(combined)
|
||||
x = BatchNormalization()(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Output layer
|
||||
if self.output_size == 1:
|
||||
# Binary classification (up/down)
|
||||
outputs = Dense(1, activation='sigmoid', name='output')(x)
|
||||
loss = 'binary_crossentropy'
|
||||
metrics = ['accuracy']
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification (buy/hold/sell)
|
||||
outputs = Dense(3, activation='softmax', name='output')(x)
|
||||
loss = 'categorical_crossentropy'
|
||||
metrics = ['accuracy']
|
||||
else:
|
||||
# Regression
|
||||
outputs = Dense(self.output_size, activation='linear', name='output')(x)
|
||||
loss = 'mse'
|
||||
metrics = ['mae']
|
||||
|
||||
# Create and compile model
|
||||
self.model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
|
||||
|
||||
# Compile with Adam optimizer
|
||||
self.model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss=loss,
|
||||
metrics=metrics
|
||||
)
|
||||
|
||||
# Log model summary
|
||||
self.model.summary(print_fn=lambda x: logger.info(x))
|
||||
|
||||
return self.model
|
||||
|
||||
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
|
||||
callbacks=None, class_weights=None):
|
||||
"""
|
||||
Train the Transformer model on the provided data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
y (numpy.ndarray): Target labels
|
||||
batch_size (int): Batch size
|
||||
epochs (int): Number of epochs
|
||||
validation_split (float): Fraction of data to use for validation
|
||||
callbacks (list): List of Keras callbacks
|
||||
class_weights (dict): Class weights for imbalanced datasets
|
||||
|
||||
Returns:
|
||||
History object containing training metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build_model()
|
||||
|
||||
# Default callbacks if none provided
|
||||
if callbacks is None:
|
||||
# Create a timestamp for model checkpoints
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
callbacks = [
|
||||
EarlyStopping(
|
||||
monitor='val_loss',
|
||||
patience=10,
|
||||
restore_best_weights=True
|
||||
),
|
||||
ReduceLROnPlateau(
|
||||
monitor='val_loss',
|
||||
factor=0.5,
|
||||
patience=5,
|
||||
min_lr=1e-6
|
||||
),
|
||||
ModelCheckpoint(
|
||||
filepath=os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5"),
|
||||
monitor='val_loss',
|
||||
save_best_only=True
|
||||
)
|
||||
]
|
||||
|
||||
# Check if y needs to be one-hot encoded for multi-class
|
||||
if self.output_size == 3 and len(y.shape) == 1:
|
||||
y = tf.keras.utils.to_categorical(y, num_classes=3)
|
||||
|
||||
# Train the model
|
||||
logger.info(f"Training Transformer model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
|
||||
self.history = self.model.fit(
|
||||
[X_ts, X_features], y,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
validation_split=validation_split,
|
||||
callbacks=callbacks,
|
||||
class_weight=class_weights,
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Save the trained model
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
model_path = os.path.join(self.model_dir, f"transformer_model_final_{timestamp}.h5")
|
||||
self.model.save(model_path)
|
||||
logger.info(f"Model saved to {model_path}")
|
||||
|
||||
# Save training history
|
||||
history_path = os.path.join(self.model_dir, f"transformer_model_history_{timestamp}.json")
|
||||
with open(history_path, 'w') as f:
|
||||
# Convert numpy values to Python native types for JSON serialization
|
||||
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
|
||||
json.dump(history_dict, f, indent=2)
|
||||
|
||||
return self.history
|
||||
|
||||
def evaluate(self, X_ts, X_features, y):
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
y (numpy.ndarray): Target labels
|
||||
|
||||
Returns:
|
||||
dict: Evaluation metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Convert y to one-hot encoding for multi-class
|
||||
if self.output_size == 3 and len(y.shape) == 1:
|
||||
y = tf.keras.utils.to_categorical(y, num_classes=3)
|
||||
|
||||
# Evaluate model
|
||||
logger.info(f"Evaluating Transformer model on {len(X_ts)} samples")
|
||||
eval_results = self.model.evaluate([X_ts, X_features], y, verbose=0)
|
||||
|
||||
metrics = {}
|
||||
for metric, value in zip(self.model.metrics_names, eval_results):
|
||||
metrics[metric] = value
|
||||
logger.info(f"{metric}: {value:.4f}")
|
||||
|
||||
return metrics
|
||||
|
||||
def predict(self, X_ts, X_features=None):
|
||||
"""
|
||||
Make predictions on new data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
|
||||
Returns:
|
||||
tuple: (y_pred, y_proba) where:
|
||||
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
|
||||
y_proba is the class probability
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Ensure X_ts has the right shape
|
||||
if len(X_ts.shape) == 2:
|
||||
# Single sample, add batch dimension
|
||||
X_ts = np.expand_dims(X_ts, axis=0)
|
||||
|
||||
# Ensure X_features has the right shape
|
||||
if X_features is None:
|
||||
# Extract features from time series data if no external features provided
|
||||
X_features = self._extract_features_from_timeseries(X_ts)
|
||||
elif len(X_features.shape) == 1:
|
||||
# Single sample, add batch dimension
|
||||
X_features = np.expand_dims(X_features, axis=0)
|
||||
|
||||
def _extract_features_from_timeseries(self, X_ts: np.ndarray) -> np.ndarray:
|
||||
"""Extract meaningful features from time series data instead of using dummy zeros"""
|
||||
try:
|
||||
batch_size = X_ts.shape[0]
|
||||
features = []
|
||||
|
||||
for i in range(batch_size):
|
||||
sample = X_ts[i] # Shape: (timesteps, features)
|
||||
|
||||
# Extract statistical features from each feature dimension
|
||||
sample_features = []
|
||||
|
||||
for feature_idx in range(sample.shape[1]):
|
||||
feature_data = sample[:, feature_idx]
|
||||
|
||||
# Basic statistical features
|
||||
sample_features.extend([
|
||||
np.mean(feature_data), # Mean
|
||||
np.std(feature_data), # Standard deviation
|
||||
np.min(feature_data), # Minimum
|
||||
np.max(feature_data), # Maximum
|
||||
np.percentile(feature_data, 25), # 25th percentile
|
||||
np.percentile(feature_data, 75), # 75th percentile
|
||||
])
|
||||
|
||||
# Trend features
|
||||
if len(feature_data) > 1:
|
||||
# Linear trend (slope)
|
||||
x = np.arange(len(feature_data))
|
||||
slope = np.polyfit(x, feature_data, 1)[0]
|
||||
sample_features.append(slope)
|
||||
|
||||
# Rate of change
|
||||
rate_of_change = (feature_data[-1] - feature_data[0]) / feature_data[0] if feature_data[0] != 0 else 0
|
||||
sample_features.append(rate_of_change)
|
||||
else:
|
||||
sample_features.extend([0.0, 0.0])
|
||||
|
||||
# Pad or truncate to expected feature size
|
||||
while len(sample_features) < self.feature_input_shape:
|
||||
sample_features.append(0.0)
|
||||
sample_features = sample_features[:self.feature_input_shape]
|
||||
|
||||
features.append(sample_features)
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting features from time series: {e}")
|
||||
# Fallback to zeros if extraction fails
|
||||
return np.zeros((X_ts.shape[0], self.feature_input_shape), dtype=np.float32)
|
||||
|
||||
# Get predictions
|
||||
y_proba = self.model.predict([X_ts, X_features])
|
||||
|
||||
# Process based on output type
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
y_pred = (y_proba > 0.5).astype(int).flatten()
|
||||
return y_pred, y_proba.flatten()
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification
|
||||
y_pred = np.argmax(y_proba, axis=1)
|
||||
return y_pred, y_proba
|
||||
else:
|
||||
# Regression
|
||||
return y_proba, y_proba
|
||||
|
||||
def save(self, filepath=None):
|
||||
"""
|
||||
Save the model to disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to save the model
|
||||
|
||||
Returns:
|
||||
str: Path where the model was saved
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built yet")
|
||||
|
||||
if filepath is None:
|
||||
# Create a default filepath with timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filepath = os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5")
|
||||
|
||||
self.model.save(filepath)
|
||||
logger.info(f"Model saved to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load a saved model from disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the saved model
|
||||
|
||||
Returns:
|
||||
The loaded model
|
||||
"""
|
||||
# Register custom layers
|
||||
custom_objects = {
|
||||
'TransformerBlock': TransformerBlock,
|
||||
'PositionalEncoding': PositionalEncoding
|
||||
}
|
||||
|
||||
self.model = load_model(filepath, custom_objects=custom_objects)
|
||||
logger.info(f"Model loaded from {filepath}")
|
||||
return self.model
|
||||
|
||||
def plot_training_history(self):
|
||||
"""
|
||||
Plot training history (loss and metrics).
|
||||
|
||||
Returns:
|
||||
str: Path to the saved plot
|
||||
"""
|
||||
if self.history is None:
|
||||
raise ValueError("Model has not been trained yet")
|
||||
|
||||
plt.figure(figsize=(12, 5))
|
||||
|
||||
# Plot loss
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.plot(self.history.history['loss'], label='Training Loss')
|
||||
if 'val_loss' in self.history.history:
|
||||
plt.plot(self.history.history['val_loss'], label='Validation Loss')
|
||||
plt.title('Model Loss')
|
||||
plt.xlabel('Epoch')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend()
|
||||
|
||||
# Plot accuracy
|
||||
plt.subplot(1, 2, 2)
|
||||
|
||||
if 'accuracy' in self.history.history:
|
||||
plt.plot(self.history.history['accuracy'], label='Training Accuracy')
|
||||
if 'val_accuracy' in self.history.history:
|
||||
plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
|
||||
plt.title('Model Accuracy')
|
||||
plt.ylabel('Accuracy')
|
||||
elif 'mae' in self.history.history:
|
||||
plt.plot(self.history.history['mae'], label='Training MAE')
|
||||
if 'val_mae' in self.history.history:
|
||||
plt.plot(self.history.history['val_mae'], label='Validation MAE')
|
||||
plt.title('Model MAE')
|
||||
plt.ylabel('MAE')
|
||||
|
||||
plt.xlabel('Epoch')
|
||||
plt.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save figure
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
fig_path = os.path.join(self.model_dir, f"transformer_training_history_{timestamp}.png")
|
||||
plt.savefig(fig_path)
|
||||
plt.close()
|
||||
|
||||
logger.info(f"Training history plot saved to {fig_path}")
|
||||
return fig_path
|
||||
|
||||
|
||||
class MixtureOfExpertsModel:
|
||||
"""
|
||||
Mixture of Experts (MoE) model.
|
||||
|
||||
This model combines predictions from multiple expert models (such as CNN and Transformer)
|
||||
using a weighted ensemble approach.
|
||||
"""
|
||||
|
||||
def __init__(self, output_size=1, model_dir="NN/models/saved"):
|
||||
"""
|
||||
Initialize the MoE model.
|
||||
|
||||
Args:
|
||||
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
|
||||
model_dir (str): Directory to save trained models
|
||||
"""
|
||||
self.output_size = output_size
|
||||
self.model_dir = model_dir
|
||||
self.model = None
|
||||
self.history = None
|
||||
self.experts = {}
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
os.makedirs(self.model_dir, exist_ok=True)
|
||||
|
||||
logger.info(f"Initialized Mixture of Experts model with output size {output_size}")
|
||||
|
||||
def add_expert(self, name, model):
|
||||
"""
|
||||
Add an expert model to the MoE.
|
||||
|
||||
Args:
|
||||
name (str): Name of the expert model
|
||||
model: The expert model instance
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
self.experts[name] = model
|
||||
logger.info(f"Added expert model '{name}' to MoE")
|
||||
|
||||
def build_model(self, ts_input_shape=(20, 5), expert_weights=None, learning_rate=0.001):
|
||||
"""
|
||||
Build the MoE model by combining expert models.
|
||||
|
||||
Args:
|
||||
ts_input_shape (tuple): Shape of time series input data
|
||||
expert_weights (dict): Weights for each expert model
|
||||
learning_rate (float): Learning rate for Adam optimizer
|
||||
|
||||
Returns:
|
||||
The compiled model
|
||||
"""
|
||||
# Time series input
|
||||
ts_inputs = Input(shape=ts_input_shape, name="ts_input")
|
||||
|
||||
# Additional feature input (from CNN)
|
||||
feature_inputs = Input(shape=(64,), name="feature_input") # Default size for features
|
||||
|
||||
# Process with each expert model
|
||||
expert_outputs = []
|
||||
expert_names = []
|
||||
|
||||
for name, expert in self.experts.items():
|
||||
# Skip if expert model is not valid or doesn't have a call/predict method
|
||||
if expert is None:
|
||||
logger.warning(f"Expert model '{name}' is None, skipping")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Different handling based on model type
|
||||
if name == 'cnn':
|
||||
# CNN model takes only time series input
|
||||
expert_output = expert(ts_inputs)
|
||||
expert_outputs.append(expert_output)
|
||||
expert_names.append(name)
|
||||
elif name == 'transformer':
|
||||
# Transformer model takes both time series and feature inputs
|
||||
expert_output = expert([ts_inputs, feature_inputs])
|
||||
expert_outputs.append(expert_output)
|
||||
expert_names.append(name)
|
||||
else:
|
||||
logger.warning(f"Unknown expert model type: {name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding expert '{name}': {str(e)}")
|
||||
|
||||
if not expert_outputs:
|
||||
logger.error("No valid expert models found")
|
||||
return None
|
||||
|
||||
# Use expert weighting
|
||||
if expert_weights is None:
|
||||
# Equal weighting
|
||||
weights = [1.0 / len(expert_outputs)] * len(expert_outputs)
|
||||
else:
|
||||
# User-provided weights
|
||||
weights = [expert_weights.get(name, 1.0 / len(expert_outputs)) for name in expert_names]
|
||||
# Normalize weights
|
||||
weights = [w / sum(weights) for w in weights]
|
||||
|
||||
# Combine expert outputs using weighted average
|
||||
if len(expert_outputs) == 1:
|
||||
# Only one expert, use its output directly
|
||||
combined_output = expert_outputs[0]
|
||||
else:
|
||||
# Multiple experts, compute weighted average
|
||||
weighted_outputs = [output * weight for output, weight in zip(expert_outputs, weights)]
|
||||
combined_output = Add()(weighted_outputs)
|
||||
|
||||
# Create the MoE model
|
||||
moe_model = Model(inputs=[ts_inputs, feature_inputs], outputs=combined_output)
|
||||
|
||||
# Compile the model
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
moe_model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy']
|
||||
)
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification for BUY/HOLD/SELL
|
||||
moe_model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy']
|
||||
)
|
||||
else:
|
||||
# Regression
|
||||
moe_model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss='mse',
|
||||
metrics=['mae']
|
||||
)
|
||||
|
||||
self.model = moe_model
|
||||
|
||||
# Log model summary
|
||||
self.model.summary(print_fn=lambda x: logger.info(x))
|
||||
|
||||
logger.info(f"Built MoE model with weights: {weights}")
|
||||
return self.model
|
||||
|
||||
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
|
||||
callbacks=None, class_weights=None):
|
||||
"""
|
||||
Train the MoE model on the provided data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
y (numpy.ndarray): Target labels
|
||||
batch_size (int): Batch size
|
||||
epochs (int): Number of epochs
|
||||
validation_split (float): Fraction of data to use for validation
|
||||
callbacks (list): List of Keras callbacks
|
||||
class_weights (dict): Class weights for imbalanced datasets
|
||||
|
||||
Returns:
|
||||
History object containing training metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
logger.error("MoE model has not been built yet")
|
||||
return None
|
||||
|
||||
# Default callbacks if none provided
|
||||
if callbacks is None:
|
||||
# Create a timestamp for model checkpoints
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
callbacks = [
|
||||
EarlyStopping(
|
||||
monitor='val_loss',
|
||||
patience=10,
|
||||
restore_best_weights=True
|
||||
),
|
||||
ReduceLROnPlateau(
|
||||
monitor='val_loss',
|
||||
factor=0.5,
|
||||
patience=5,
|
||||
min_lr=1e-6
|
||||
),
|
||||
ModelCheckpoint(
|
||||
filepath=os.path.join(self.model_dir, f"moe_model_{timestamp}.h5"),
|
||||
monitor='val_loss',
|
||||
save_best_only=True
|
||||
)
|
||||
]
|
||||
|
||||
# Check if y needs to be one-hot encoded for multi-class
|
||||
if self.output_size == 3 and len(y.shape) == 1:
|
||||
y = tf.keras.utils.to_categorical(y, num_classes=3)
|
||||
|
||||
# Train the model
|
||||
logger.info(f"Training MoE model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
|
||||
self.history = self.model.fit(
|
||||
[X_ts, X_features], y,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
validation_split=validation_split,
|
||||
callbacks=callbacks,
|
||||
class_weight=class_weights,
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Save the trained model
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
model_path = os.path.join(self.model_dir, f"moe_model_final_{timestamp}.h5")
|
||||
self.model.save(model_path)
|
||||
logger.info(f"Model saved to {model_path}")
|
||||
|
||||
# Save training history
|
||||
history_path = os.path.join(self.model_dir, f"moe_model_history_{timestamp}.json")
|
||||
with open(history_path, 'w') as f:
|
||||
# Convert numpy values to Python native types for JSON serialization
|
||||
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
|
||||
json.dump(history_dict, f, indent=2)
|
||||
|
||||
return self.history
|
||||
|
||||
def predict(self, X_ts, X_features=None):
|
||||
"""
|
||||
Make predictions on new data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
|
||||
Returns:
|
||||
tuple: (y_pred, y_proba) where:
|
||||
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
|
||||
y_proba is the class probability
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Ensure X_ts has the right shape
|
||||
if len(X_ts.shape) == 2:
|
||||
# Single sample, add batch dimension
|
||||
X_ts = np.expand_dims(X_ts, axis=0)
|
||||
|
||||
# Ensure X_features has the right shape
|
||||
if X_features is None:
|
||||
# Create dummy features with zeros
|
||||
X_features = np.zeros((X_ts.shape[0], 64)) # Default size
|
||||
elif len(X_features.shape) == 1:
|
||||
# Single sample, add batch dimension
|
||||
X_features = np.expand_dims(X_features, axis=0)
|
||||
|
||||
# Get predictions
|
||||
y_proba = self.model.predict([X_ts, X_features])
|
||||
|
||||
# Process based on output type
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
y_pred = (y_proba > 0.5).astype(int).flatten()
|
||||
return y_pred, y_proba.flatten()
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification
|
||||
y_pred = np.argmax(y_proba, axis=1)
|
||||
return y_pred, y_proba
|
||||
else:
|
||||
# Regression
|
||||
return y_proba, y_proba
|
||||
|
||||
def save(self, filepath=None):
|
||||
"""
|
||||
Save the model to disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to save the model
|
||||
|
||||
Returns:
|
||||
str: Path where the model was saved
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built yet")
|
||||
|
||||
if filepath is None:
|
||||
# Create a default filepath with timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filepath = os.path.join(self.model_dir, f"moe_model_{timestamp}.h5")
|
||||
|
||||
self.model.save(filepath)
|
||||
logger.info(f"Model saved to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load a saved model from disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the saved model
|
||||
|
||||
Returns:
|
||||
The loaded model
|
||||
"""
|
||||
# Register custom layers
|
||||
custom_objects = {
|
||||
'TransformerBlock': TransformerBlock,
|
||||
'PositionalEncoding': PositionalEncoding
|
||||
}
|
||||
|
||||
self.model = load_model(filepath, custom_objects=custom_objects)
|
||||
logger.info(f"Model loaded from {filepath}")
|
||||
return self.model
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# This would be a complete implementation in a real system
|
||||
print("Transformer and MoE models defined, but not implemented here.")
|
Reference in New Issue
Block a user