new nn wip

This commit is contained in:
Dobromir Popov 2025-03-25 13:38:25 +02:00
parent 50eb50696b
commit 0042581275
18 changed files with 3358 additions and 294 deletions

1
.gitignore vendored
View File

@ -14,3 +14,4 @@ models/trading_agent_final.pt
models/trading_agent_final.pt.backup
*.pt
*.backup
logs/

38
.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,38 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "Start TensorBoard",
"type": "shell",
"command": "python",
"args": [
"-m",
"tensorboard.main",
"--logdir=NN/models/saved/logs",
"--port=6006",
"--host=localhost"
],
"isBackground": true,
"problemMatcher": {
"pattern": {
"regexp": "^.*$",
"file": 1,
"location": 2,
"message": 3
},
"background": {
"activeOnStart": true,
"beginsPattern": ".*TensorBoard.*",
"endsPattern": ".*TensorBoard.*"
}
},
"presentation": {
"reveal": "always",
"panel": "new"
},
"runOptions": {
"runOn": "folderOpen"
}
}
]
}

Binary file not shown.

Binary file not shown.

13
NN/_notes.md Normal file
View File

@ -0,0 +1,13 @@
great. realtime.py works. now let's examine and contunue with our 500m NN in a NN folder with different modules - first module will be around 100m Convolutional NN that is historically used for image recognition with great success by detecting features on multiple levels - deep NN. create the NN class and integrated RL pipeline that will use historical data to retrospectively identify buy/sell opportunities and use that to train the module. use the data from realtime.py (add easy to use realtime data interface if existing functions are not convenient enough)
create a new main file in the NN folder for our new MoE model. we'll use one main NN module that will orchestrate data flows. our CNN module should have training and inference pipelines implemented internally, but the orchestrator will get the realtime data and forward it. use a common interface. another module later will be Transformer module that will take as input raw data from the latest hidden layers of the CNN where high end features are learned as well as the output, which will be BUY/HOLD/SELL signals as well as key support/resistance trend lines
# Train a CNN model
python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --model-type cnn --epochs 100
# Make predictions with a trained model
python -m NN.main --mode predict --symbol BTC/USDT --timeframe 1h --model-type cnn
# Run real-time analysis
python -m NN.main --mode realtime --symbol BTC/USDT --timeframe 1h --inference-interval 60

265
NN/main.py Normal file
View File

@ -0,0 +1,265 @@
#!/usr/bin/env python3
"""
Neural Network Trading System Main Module
This module serves as the main entry point for the NN trading system,
coordinating data flow between different components and implementing
training and inference pipelines.
"""
import os
import sys
import logging
import argparse
from datetime import datetime
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler(os.path.join('logs', f'nn_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'))
]
)
logger = logging.getLogger('NN')
# Create logs directory if it doesn't exist
os.makedirs('logs', exist_ok=True)
def parse_arguments():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description='Neural Network Trading System')
parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
help='Mode to run (train, predict, realtime)')
parser.add_argument('--symbol', type=str, default='BTC/USDT',
help='Trading pair symbol')
parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
help='Timeframes to use')
parser.add_argument('--window-size', type=int, default=20,
help='Window size for input data')
parser.add_argument('--output-size', type=int, default=3,
help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
parser.add_argument('--batch-size', type=int, default=32,
help='Batch size for training')
parser.add_argument('--epochs', type=int, default=100,
help='Number of epochs for training')
parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
help='Model type to use')
parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
help='Deep learning framework to use')
return parser.parse_args()
def main():
"""Main entry point for the NN trading system"""
# Parse arguments
args = parse_arguments()
logger.info(f"Starting NN Trading System in {args.mode} mode")
logger.info(f"Configuration: Symbol={args.symbol}, Timeframes={args.timeframes}, "
f"Window Size={args.window_size}, Output Size={args.output_size}, "
f"Model Type={args.model_type}, Framework={args.framework}")
# Import the appropriate modules based on the framework
if args.framework == 'pytorch':
try:
import torch
logger.info(f"Using PyTorch {torch.__version__}")
# Import PyTorch-based modules
from NN.utils.data_interface import DataInterface
if args.model_type == 'cnn':
from NN.models.cnn_model_pytorch import CNNModelPyTorch as Model
elif args.model_type == 'transformer':
from NN.models.transformer_model_pytorch import TransformerModelPyTorchWrapper as Model
elif args.model_type == 'moe':
from NN.models.transformer_model_pytorch import MixtureOfExpertsModelPyTorch as Model
else:
logger.error(f"Unknown model type: {args.model_type}")
return
except ImportError as e:
logger.error(f"Failed to import PyTorch modules: {str(e)}")
logger.error("Please make sure PyTorch is installed or use the TensorFlow framework.")
return
elif args.framework == 'tensorflow':
try:
import tensorflow as tf
logger.info(f"Using TensorFlow {tf.__version__}")
# Import TensorFlow-based modules
from NN.utils.data_interface import DataInterface
if args.model_type == 'cnn':
from NN.models.cnn_model import CNNModel as Model
elif args.model_type == 'transformer':
from NN.models.transformer_model import TransformerModel as Model
elif args.model_type == 'moe':
from NN.models.transformer_model import MixtureOfExpertsModel as Model
else:
logger.error(f"Unknown model type: {args.model_type}")
return
except ImportError as e:
logger.error(f"Failed to import TensorFlow modules: {str(e)}")
logger.error("Please make sure TensorFlow is installed or use the PyTorch framework.")
return
else:
logger.error(f"Unknown framework: {args.framework}")
return
# Initialize data interface
try:
logger.info("Initializing data interface...")
data_interface = DataInterface(
symbol=args.symbol,
timeframes=args.timeframes,
window_size=args.window_size,
output_size=args.output_size
)
except Exception as e:
logger.error(f"Failed to initialize data interface: {str(e)}")
return
# Initialize model
try:
logger.info(f"Initializing {args.model_type.upper()} model...")
model = Model(
window_size=args.window_size,
num_features=data_interface.get_feature_count(),
output_size=args.output_size,
timeframes=args.timeframes
)
except Exception as e:
logger.error(f"Failed to initialize model: {str(e)}")
return
# Execute the requested mode
if args.mode == 'train':
train(data_interface, model, args)
elif args.mode == 'predict':
predict(data_interface, model, args)
elif args.mode == 'realtime':
realtime(data_interface, model, args)
else:
logger.error(f"Unknown mode: {args.mode}")
return
logger.info("Neural Network Trading System finished successfully")
def train(data_interface, model, args):
"""Train the model using the data interface"""
logger.info("Starting training mode...")
try:
# Prepare training data
logger.info("Preparing training data...")
X_train, y_train, X_val, y_val = data_interface.prepare_training_data()
# Train the model
logger.info("Training model...")
model.train(
X_train, y_train,
X_val, y_val,
batch_size=args.batch_size,
epochs=args.epochs
)
# Save the model
model_path = os.path.join(
'models',
f"{args.model_type}_{args.symbol.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
)
logger.info(f"Saving model to {model_path}...")
model.save(model_path)
# Evaluate the model
logger.info("Evaluating model...")
metrics = model.evaluate(X_val, y_val)
logger.info(f"Evaluation metrics: {metrics}")
except Exception as e:
logger.error(f"Error in training mode: {str(e)}")
return
def predict(data_interface, model, args):
"""Make predictions using the trained model"""
logger.info("Starting prediction mode...")
try:
# Load the latest model
model_dir = os.path.join('models')
model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
if not model_files:
logger.error(f"No saved model found for type {args.model_type}")
return
latest_model = sorted(model_files)[-1]
model_path = os.path.join(model_dir, latest_model)
logger.info(f"Loading model from {model_path}...")
model.load(model_path)
# Prepare prediction data
logger.info("Preparing prediction data...")
X_pred = data_interface.prepare_prediction_data()
# Make predictions
logger.info("Making predictions...")
predictions = model.predict(X_pred)
# Process and display predictions
logger.info("Processing predictions...")
data_interface.process_predictions(predictions)
except Exception as e:
logger.error(f"Error in prediction mode: {str(e)}")
return
def realtime(data_interface, model, args):
"""Run the model in real-time mode"""
logger.info("Starting real-time mode...")
try:
# Import realtime module
from NN.realtime import RealtimeAnalyzer
# Load the latest model
model_dir = os.path.join('models')
model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
if not model_files:
logger.error(f"No saved model found for type {args.model_type}")
return
latest_model = sorted(model_files)[-1]
model_path = os.path.join(model_dir, latest_model)
logger.info(f"Loading model from {model_path}...")
model.load(model_path)
# Initialize realtime analyzer
logger.info("Initializing real-time analyzer...")
realtime_analyzer = RealtimeAnalyzer(
data_interface=data_interface,
model=model,
symbol=args.symbol,
timeframes=args.timeframes
)
# Start real-time analysis
logger.info("Starting real-time analysis...")
realtime_analyzer.start()
except Exception as e:
logger.error(f"Error in real-time mode: {str(e)}")
return
if __name__ == "__main__":
main()

560
NN/models/cnn_model.py Normal file
View File

@ -0,0 +1,560 @@
"""
Convolutional Neural Network for timeseries analysis
This module implements a deep CNN model for cryptocurrency price analysis.
The model uses multiple parallel convolutional pathways and LSTM layers
to detect patterns at different time scales.
"""
import os
import logging
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
Input, Conv1D, MaxPooling1D, Dense, Dropout, BatchNormalization,
LSTM, Bidirectional, Flatten, Concatenate, GlobalAveragePooling1D,
LeakyReLU, Attention
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.metrics import AUC
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import datetime
import json
logger = logging.getLogger(__name__)
class CNNModel:
"""
Convolutional Neural Network for time series analysis.
This model uses a multi-pathway architecture with different filter sizes
to detect patterns at different time scales, combined with LSTM layers
for temporal dependencies.
"""
def __init__(self, input_shape=(20, 5), output_size=1, model_dir="NN/models/saved"):
"""
Initialize the CNN model.
Args:
input_shape (tuple): Shape of input data (sequence_length, features)
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
model_dir (str): Directory to save trained models
"""
self.input_shape = input_shape
self.output_size = output_size
self.model_dir = model_dir
self.model = None
self.history = None
# Create model directory if it doesn't exist
os.makedirs(self.model_dir, exist_ok=True)
logger.info(f"Initialized CNN model with input shape {input_shape} and output size {output_size}")
def build_model(self, filters=(32, 64, 128), kernel_sizes=(3, 5, 7),
dropout_rate=0.3, learning_rate=0.001):
"""
Build the CNN model architecture.
Args:
filters (tuple): Number of filters for each convolutional pathway
kernel_sizes (tuple): Kernel sizes for each convolutional pathway
dropout_rate (float): Dropout rate for regularization
learning_rate (float): Learning rate for Adam optimizer
Returns:
The compiled model
"""
# Input layer
inputs = Input(shape=self.input_shape)
# Multiple parallel convolutional pathways with different kernel sizes
# to capture patterns at different time scales
conv_layers = []
for i, (filter_size, kernel_size) in enumerate(zip(filters, kernel_sizes)):
conv_path = Conv1D(
filters=filter_size,
kernel_size=kernel_size,
padding='same',
name=f'conv1d_{i+1}'
)(inputs)
conv_path = BatchNormalization()(conv_path)
conv_path = LeakyReLU(alpha=0.1)(conv_path)
conv_path = MaxPooling1D(pool_size=2, padding='same')(conv_path)
conv_path = Dropout(dropout_rate)(conv_path)
conv_layers.append(conv_path)
# Merge convolutional pathways
if len(conv_layers) > 1:
merged = Concatenate()(conv_layers)
else:
merged = conv_layers[0]
# Add another Conv1D layer after merging
x = Conv1D(filters=filters[-1], kernel_size=3, padding='same')(merged)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling1D(pool_size=2, padding='same')(x)
x = Dropout(dropout_rate)(x)
# Bidirectional LSTM for temporal dependencies
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Dropout(dropout_rate)(x)
# Attention mechanism to focus on important time steps
x = Bidirectional(LSTM(64, return_sequences=True))(x)
# Global average pooling to reduce parameters
x = GlobalAveragePooling1D()(x)
x = Dropout(dropout_rate)(x)
# Dense layers for final classification/regression
x = Dense(64, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(dropout_rate)(x)
# Output layer
if self.output_size == 1:
# Binary classification (up/down)
outputs = Dense(1, activation='sigmoid', name='output')(x)
loss = 'binary_crossentropy'
metrics = ['accuracy', AUC()]
elif self.output_size == 3:
# Multi-class classification (buy/hold/sell)
outputs = Dense(3, activation='softmax', name='output')(x)
loss = 'categorical_crossentropy'
metrics = ['accuracy']
else:
# Regression
outputs = Dense(self.output_size, activation='linear', name='output')(x)
loss = 'mse'
metrics = ['mae']
# Create and compile model
self.model = Model(inputs=inputs, outputs=outputs)
# Compile with Adam optimizer
self.model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss=loss,
metrics=metrics
)
# Log model summary
self.model.summary(print_fn=lambda x: logger.info(x))
return self.model
def train(self, X_train, y_train, batch_size=32, epochs=100, validation_split=0.2,
callbacks=None, class_weights=None):
"""
Train the CNN model on the provided data.
Args:
X_train (numpy.ndarray): Training features
y_train (numpy.ndarray): Training targets
batch_size (int): Batch size
epochs (int): Number of epochs
validation_split (float): Fraction of data to use for validation
callbacks (list): List of Keras callbacks
class_weights (dict): Class weights for imbalanced datasets
Returns:
History object containing training metrics
"""
if self.model is None:
self.build_model()
# Default callbacks if none provided
if callbacks is None:
# Create a timestamp for model checkpoints
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
callbacks = [
EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
),
ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=1e-6
),
ModelCheckpoint(
filepath=os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5"),
monitor='val_loss',
save_best_only=True
)
]
# Check if y_train needs to be one-hot encoded for multi-class
if self.output_size == 3 and len(y_train.shape) == 1:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
# Train the model
logger.info(f"Training CNN model with {len(X_train)} samples, batch size {batch_size}, epochs {epochs}")
self.history = self.model.fit(
X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_split=validation_split,
callbacks=callbacks,
class_weight=class_weights,
verbose=2
)
# Save the trained model
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
model_path = os.path.join(self.model_dir, f"cnn_model_final_{timestamp}.h5")
self.model.save(model_path)
logger.info(f"Model saved to {model_path}")
# Save training history
history_path = os.path.join(self.model_dir, f"cnn_model_history_{timestamp}.json")
with open(history_path, 'w') as f:
# Convert numpy values to Python native types for JSON serialization
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
json.dump(history_dict, f, indent=2)
return self.history
def evaluate(self, X_test, y_test, plot_results=False):
"""
Evaluate the model on test data.
Args:
X_test (numpy.ndarray): Test features
y_test (numpy.ndarray): Test targets
plot_results (bool): Whether to plot evaluation results
Returns:
dict: Evaluation metrics
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Convert y_test to one-hot encoding for multi-class
y_test_original = y_test.copy()
if self.output_size == 3 and len(y_test.shape) == 1:
y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)
# Evaluate model
logger.info(f"Evaluating CNN model on {len(X_test)} samples")
eval_results = self.model.evaluate(X_test, y_test, verbose=0)
metrics = {}
for metric, value in zip(self.model.metrics_names, eval_results):
metrics[metric] = value
logger.info(f"{metric}: {value:.4f}")
# Get predictions
y_pred_prob = self.model.predict(X_test)
# Different processing based on output type
if self.output_size == 1:
# Binary classification
y_pred = (y_pred_prob > 0.5).astype(int).flatten()
# Classification report
report = classification_report(y_test, y_pred)
logger.info(f"Classification Report:\n{report}")
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
logger.info(f"Confusion Matrix:\n{cm}")
# ROC curve and AUC
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)
metrics['auc'] = roc_auc
if plot_results:
self._plot_binary_results(y_test, y_pred, y_pred_prob, fpr, tpr, roc_auc)
elif self.output_size == 3:
# Multi-class classification
y_pred = np.argmax(y_pred_prob, axis=1)
# Classification report
report = classification_report(y_test_original, y_pred)
logger.info(f"Classification Report:\n{report}")
# Confusion matrix
cm = confusion_matrix(y_test_original, y_pred)
logger.info(f"Confusion Matrix:\n{cm}")
if plot_results:
self._plot_multiclass_results(y_test_original, y_pred, y_pred_prob)
return metrics
def predict(self, X):
"""
Make predictions on new data.
Args:
X (numpy.ndarray): Input features
Returns:
tuple: (y_pred, y_proba) where:
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
y_proba is the class probability
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Ensure X has the right shape
if len(X.shape) == 2:
# Single sample, add batch dimension
X = np.expand_dims(X, axis=0)
# Get predictions
y_proba = self.model.predict(X)
# Process based on output type
if self.output_size == 1:
# Binary classification
y_pred = (y_proba > 0.5).astype(int).flatten()
return y_pred, y_proba.flatten()
elif self.output_size == 3:
# Multi-class classification
y_pred = np.argmax(y_proba, axis=1)
return y_pred, y_proba
else:
# Regression
return y_proba, y_proba
def save(self, filepath=None):
"""
Save the model to disk.
Args:
filepath (str): Path to save the model
Returns:
str: Path where the model was saved
"""
if self.model is None:
raise ValueError("Model has not been built yet")
if filepath is None:
# Create a default filepath with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5")
self.model.save(filepath)
logger.info(f"Model saved to {filepath}")
return filepath
def load(self, filepath):
"""
Load a saved model from disk.
Args:
filepath (str): Path to the saved model
Returns:
The loaded model
"""
self.model = load_model(filepath)
logger.info(f"Model loaded from {filepath}")
return self.model
def extract_hidden_features(self, X):
"""
Extract features from the last hidden layer of the CNN for transfer learning.
Args:
X (numpy.ndarray): Input data
Returns:
numpy.ndarray: Extracted features
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Create a new model that outputs the features from the layer before the output
feature_layer_name = self.model.layers[-2].name
feature_extractor = Model(
inputs=self.model.input,
outputs=self.model.get_layer(feature_layer_name).output
)
# Extract features
features = feature_extractor.predict(X)
return features
def _plot_binary_results(self, y_true, y_pred, y_proba, fpr, tpr, roc_auc):
"""
Plot evaluation results for binary classification.
Args:
y_true (numpy.ndarray): True labels
y_pred (numpy.ndarray): Predicted labels
y_proba (numpy.ndarray): Prediction probabilities
fpr (numpy.ndarray): False positive rates for ROC curve
tpr (numpy.ndarray): True positive rates for ROC curve
roc_auc (float): Area under ROC curve
"""
plt.figure(figsize=(15, 5))
# Confusion Matrix
plt.subplot(1, 3, 1)
cm = confusion_matrix(y_true, y_pred)
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = [0, 1]
plt.xticks(tick_marks, ['0', '1'])
plt.yticks(tick_marks, ['0', '1'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
# Add text annotations to confusion matrix
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
plt.text(j, i, format(cm[i, j], 'd'),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
# Histogram of prediction probabilities
plt.subplot(1, 3, 2)
plt.hist(y_proba[y_true == 0], alpha=0.5, label='Class 0')
plt.hist(y_proba[y_true == 1], alpha=0.5, label='Class 1')
plt.title('Prediction Probabilities')
plt.xlabel('Probability of Class 1')
plt.ylabel('Count')
plt.legend()
# ROC Curve
plt.subplot(1, 3, 3)
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.3f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.tight_layout()
# Save figure
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
fig_path = os.path.join(self.model_dir, f"cnn_evaluation_{timestamp}.png")
plt.savefig(fig_path)
plt.close()
logger.info(f"Evaluation plots saved to {fig_path}")
def _plot_multiclass_results(self, y_true, y_pred, y_proba):
"""
Plot evaluation results for multi-class classification.
Args:
y_true (numpy.ndarray): True labels
y_pred (numpy.ndarray): Predicted labels
y_proba (numpy.ndarray): Prediction probabilities
"""
plt.figure(figsize=(12, 5))
# Confusion Matrix
plt.subplot(1, 2, 1)
cm = confusion_matrix(y_true, y_pred)
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
classes = ['BUY', 'HOLD', 'SELL'] # Assumes classes are 0, 1, 2
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes)
plt.yticks(tick_marks, classes)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
# Add text annotations to confusion matrix
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
plt.text(j, i, format(cm[i, j], 'd'),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
# Class probability distributions
plt.subplot(1, 2, 2)
for i, cls in enumerate(classes):
plt.hist(y_proba[y_true == i, i], alpha=0.5, label=f'Class {cls}')
plt.title('Class Probability Distributions')
plt.xlabel('Probability')
plt.ylabel('Count')
plt.legend()
plt.tight_layout()
# Save figure
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
fig_path = os.path.join(self.model_dir, f"cnn_multiclass_evaluation_{timestamp}.png")
plt.savefig(fig_path)
plt.close()
logger.info(f"Multiclass evaluation plots saved to {fig_path}")
def plot_training_history(self):
"""
Plot training history (loss and metrics).
Returns:
str: Path to the saved plot
"""
if self.history is None:
raise ValueError("Model has not been trained yet")
plt.figure(figsize=(12, 5))
# Plot loss
plt.subplot(1, 2, 1)
plt.plot(self.history.history['loss'], label='Training Loss')
if 'val_loss' in self.history.history:
plt.plot(self.history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
# Plot accuracy
plt.subplot(1, 2, 2)
if 'accuracy' in self.history.history:
plt.plot(self.history.history['accuracy'], label='Training Accuracy')
if 'val_accuracy' in self.history.history:
plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
elif 'mae' in self.history.history:
plt.plot(self.history.history['mae'], label='Training MAE')
if 'val_mae' in self.history.history:
plt.plot(self.history.history['val_mae'], label='Validation MAE')
plt.title('Model MAE')
plt.ylabel('MAE')
plt.xlabel('Epoch')
plt.legend()
plt.tight_layout()
# Save figure
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
fig_path = os.path.join(self.model_dir, f"cnn_training_history_{timestamp}.png")
plt.savefig(fig_path)
plt.close()
logger.info(f"Training history plot saved to {fig_path}")
return fig_path

View File

@ -0,0 +1,546 @@
#!/usr/bin/env python3
"""
CNN Model - PyTorch Implementation
This module implements a CNN model using PyTorch for time series analysis.
The model consists of multiple convolutional pathways and LSTM layers.
"""
import os
import logging
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Configure logging
logger = logging.getLogger(__name__)
class CNNPyTorch(nn.Module):
"""PyTorch CNN model for time series analysis"""
def __init__(self, input_shape, output_size=3):
"""
Initialize the CNN model.
Args:
input_shape (tuple): Shape of input data (window_size, features)
output_size (int): Size of output (1 for regression, 3 for classification)
"""
super(CNNPyTorch, self).__init__()
window_size, num_features = input_shape
# Architecture parameters
filters = [32, 64, 128]
kernel_sizes = [3, 5, 7]
lstm_units = 100
dense_units = 64
dropout_rate = 0.3
# Create parallel convolutional pathways
self.conv_paths = nn.ModuleList()
for f, k in zip(filters, kernel_sizes):
path = nn.Sequential(
nn.Conv1d(num_features, f, kernel_size=k, padding='same'),
nn.ReLU(),
nn.BatchNorm1d(f),
nn.MaxPool1d(kernel_size=2, stride=1, padding=1),
nn.Dropout(dropout_rate)
)
self.conv_paths.append(path)
# Calculate output size from conv paths
conv_output_size = sum(filters) * window_size
# LSTM layer
self.lstm = nn.LSTM(
input_size=sum(filters),
hidden_size=lstm_units,
batch_first=True,
bidirectional=True
)
# Dense layers
self.flatten = nn.Flatten()
self.dense1 = nn.Sequential(
nn.Linear(lstm_units * 2 * window_size, dense_units),
nn.ReLU(),
nn.BatchNorm1d(dense_units),
nn.Dropout(dropout_rate)
)
# Output layer
self.output = nn.Linear(dense_units, output_size)
# Activation based on output size
if output_size == 1:
self.activation = nn.Sigmoid() # Binary classification or regression
elif output_size > 1:
self.activation = nn.Softmax(dim=1) # Multi-class classification
else:
self.activation = nn.Identity() # No activation
def forward(self, x):
"""
Forward pass through the network.
Args:
x: Input tensor of shape [batch_size, window_size, features]
Returns:
Output tensor of shape [batch_size, output_size]
"""
batch_size, window_size, num_features = x.shape
# Transpose for conv1d: [batch, features, window]
x_t = x.transpose(1, 2)
# Process through parallel conv paths
conv_outputs = []
for path in self.conv_paths:
conv_outputs.append(path(x_t))
# Concatenate conv outputs
conv_concat = torch.cat(conv_outputs, dim=1)
# Transpose back for LSTM: [batch, window, features]
conv_concat = conv_concat.transpose(1, 2)
# LSTM processing
lstm_out, _ = self.lstm(conv_concat)
# Flatten
flattened = self.flatten(lstm_out)
# Dense processing
dense_out = self.dense1(flattened)
# Output
output = self.output(dense_out)
# Apply activation
return self.activation(output)
class CNNModelPyTorch:
"""
CNN model wrapper class for time series analysis using PyTorch.
This class provides methods for building, training, evaluating, and making
predictions with the CNN model.
"""
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
"""
Initialize the CNN model.
Args:
window_size (int): Size of the input window
num_features (int): Number of features in the input data
output_size (int): Size of the output (1 for regression, 3 for classification)
timeframes (list): List of timeframes used (for logging)
"""
self.window_size = window_size
self.num_features = num_features
self.output_size = output_size
self.timeframes = timeframes or []
# Determine device (GPU or CPU)
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {self.device}")
# Initialize model
self.model = None
self.build_model()
# Initialize training history
self.history = {
'loss': [],
'val_loss': [],
'accuracy': [],
'val_accuracy': []
}
def build_model(self):
"""Build the CNN model architecture"""
logger.info(f"Building PyTorch CNN model with window_size={self.window_size}, "
f"num_features={self.num_features}, output_size={self.output_size}")
self.model = CNNPyTorch(
input_shape=(self.window_size, self.num_features),
output_size=self.output_size
).to(self.device)
# Initialize optimizer
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
# Initialize loss function based on output size
if self.output_size == 1:
self.criterion = nn.BCELoss() # Binary classification
elif self.output_size > 1:
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
else:
self.criterion = nn.MSELoss() # Regression
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
"""
Train the CNN model.
Args:
X_train: Training input data
y_train: Training target data
X_val: Validation input data
y_val: Validation target data
batch_size: Batch size for training
epochs: Number of training epochs
Returns:
Training history
"""
logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, "
f"batch_size={batch_size}, epochs={epochs}")
# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
# Handle different output sizes for y_train
if self.output_size == 1:
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
else:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
# Create DataLoader for training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Create DataLoader for validation data if provided
if X_val is not None and y_val is not None:
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
if self.output_size == 1:
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
else:
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
else:
val_loader = None
# Training loop
for epoch in range(epochs):
# Training phase
self.model.train()
running_loss = 0.0
correct = 0
total = 0
for inputs, targets in train_loader:
# Zero the parameter gradients
self.optimizer.zero_grad()
# Forward pass
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
# Backward pass and optimize
loss.backward()
self.optimizer.step()
# Statistics
running_loss += loss.item()
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
epoch_loss = running_loss / len(train_loader)
epoch_acc = correct / total if total > 0 else 0
# Validation phase
if val_loader is not None:
val_loss, val_acc = self._validate(val_loader)
logger.info(f"Epoch {epoch+1}/{epochs} - "
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
# Update history
self.history['loss'].append(epoch_loss)
self.history['accuracy'].append(epoch_acc)
self.history['val_loss'].append(val_loss)
self.history['val_accuracy'].append(val_acc)
else:
logger.info(f"Epoch {epoch+1}/{epochs} - "
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
# Update history without validation
self.history['loss'].append(epoch_loss)
self.history['accuracy'].append(epoch_acc)
logger.info("Training completed")
return self.history
def _validate(self, val_loader):
"""Validate the model using the validation set"""
self.model.eval()
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, targets in val_loader:
# Forward pass
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
val_loss += loss.item()
# Calculate accuracy
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
return val_loss / len(val_loader), correct / total if total > 0 else 0
def evaluate(self, X_test, y_test):
"""
Evaluate the model on test data.
Args:
X_test: Test input data
y_test: Test target data
Returns:
dict: Evaluation metrics
"""
logger.info(f"Evaluating model on {len(X_test)} samples")
# Convert to PyTorch tensors
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
# Get predictions
self.model.eval()
with torch.no_grad():
y_pred = self.model(X_test_tensor)
if self.output_size > 1:
_, y_pred_class = torch.max(y_pred, 1)
y_pred_class = y_pred_class.cpu().numpy()
else:
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
# Calculate metrics
if self.output_size > 1:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class, average='weighted')
recall = recall_score(y_test, y_pred_class, average='weighted')
f1 = f1_score(y_test, y_pred_class, average='weighted')
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
else:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
logger.info(f"Evaluation metrics: {metrics}")
return metrics
def predict(self, X):
"""
Make predictions with the model.
Args:
X: Input data
Returns:
Predictions
"""
# Convert to PyTorch tensor
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
# Get predictions
self.model.eval()
with torch.no_grad():
predictions = self.model(X_tensor)
if self.output_size > 1:
# Multi-class classification
probs = predictions.cpu().numpy()
_, class_preds = torch.max(predictions, 1)
class_preds = class_preds.cpu().numpy()
return class_preds, probs
else:
# Binary classification or regression
preds = predictions.cpu().numpy()
if self.output_size == 1:
# Binary classification
class_preds = (preds > 0.5).astype(int)
return class_preds.flatten(), preds.flatten()
else:
# Regression
return preds.flatten(), None
def save(self, filepath):
"""
Save the model to a file.
Args:
filepath: Path to save the model
"""
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filepath), exist_ok=True)
# Save the model state
model_state = {
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'history': self.history,
'window_size': self.window_size,
'num_features': self.num_features,
'output_size': self.output_size,
'timeframes': self.timeframes
}
torch.save(model_state, f"{filepath}.pt")
logger.info(f"Model saved to {filepath}.pt")
def load(self, filepath):
"""
Load the model from a file.
Args:
filepath: Path to load the model from
"""
# Check if file exists
if not os.path.exists(f"{filepath}.pt"):
logger.error(f"Model file {filepath}.pt not found")
return False
# Load the model state
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
# Update model parameters
self.window_size = model_state['window_size']
self.num_features = model_state['num_features']
self.output_size = model_state['output_size']
self.timeframes = model_state['timeframes']
# Rebuild the model
self.build_model()
# Load the model state
self.model.load_state_dict(model_state['model_state_dict'])
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
self.history = model_state['history']
logger.info(f"Model loaded from {filepath}.pt")
return True
def plot_training_history(self):
"""Plot the training history"""
if not self.history['loss']:
logger.warning("No training history to plot")
return
plt.figure(figsize=(12, 4))
# Plot loss
plt.subplot(1, 2, 1)
plt.plot(self.history['loss'], label='Training Loss')
if 'val_loss' in self.history and self.history['val_loss']:
plt.plot(self.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(self.history['accuracy'], label='Training Accuracy')
if 'val_accuracy' in self.history and self.history['val_accuracy']:
plt.plot(self.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
# Save the plot
os.makedirs('plots', exist_ok=True)
plt.savefig(os.path.join('plots', f"cnn_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"))
plt.close()
logger.info("Training history plots saved to plots directory")
def extract_hidden_features(self, X):
"""
Extract hidden features from the model.
Args:
X: Input data
Returns:
Hidden features
"""
# Convert to PyTorch tensor
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
# Forward pass through the model up to the last hidden layer
self.model.eval()
with torch.no_grad():
# Get features before the output layer
x_t = X_tensor.transpose(1, 2)
# Process through parallel conv paths
conv_outputs = []
for path in self.model.conv_paths:
conv_outputs.append(path(x_t))
# Concatenate conv outputs
conv_concat = torch.cat(conv_outputs, dim=1)
# Transpose back for LSTM
conv_concat = conv_concat.transpose(1, 2)
# LSTM processing
lstm_out, _ = self.model.lstm(conv_concat)
# Flatten
flattened = self.model.flatten(lstm_out)
# Dense processing
hidden_features = self.model.dense1(flattened)
return hidden_features.cpu().numpy()

View File

@ -1,45 +1,38 @@
"""
Transformer Neural Network for timeseries analysis
This module implements a Transformer model with attention mechanisms for cryptocurrency price analysis.
It also includes a Mixture of Experts model that combines predictions from multiple models.
"""
import os
import sys
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
Input, Dense, Dropout, LayerNormalization, MultiHeadAttention,
GlobalAveragePooling1D, Concatenate, Add, Activation, Flatten
Input, Dense, Dropout, BatchNormalization,
Concatenate, Layer, LayerNormalization, MultiHeadAttention,
Add, GlobalAveragePooling1D, Conv1D, Reshape
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (
EarlyStopping, ModelCheckpoint, ReduceLROnPlateau,
TensorBoard, CSVLogger
)
import matplotlib.pyplot as plt
import logging
import time
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import datetime
import json
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler('nn_transformer_model.log')
]
)
logger = logging.getLogger(__name__)
logger = logging.getLogger('transformer_model')
class TransformerBlock(tf.keras.layers.Layer):
class TransformerBlock(Layer):
"""
Transformer block with multi-head self-attention and feed-forward network
Transformer block implementation with multi-head attention and feed-forward networks.
"""
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super(TransformerBlock, self).__init__()
self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.ffn = tf.keras.Sequential([
Dense(ff_dim, activation="relu"),
Dense(embed_dim)
Dense(embed_dim),
])
self.layernorm1 = LayerNormalization(epsilon=1e-6)
self.layernorm2 = LayerNormalization(epsilon=1e-6)
@ -47,33 +40,86 @@ class TransformerBlock(tf.keras.layers.Layer):
self.dropout2 = Dropout(rate)
def call(self, inputs, training=False):
# Normalization and attention
attn_output = self.att(inputs, inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
# Feed-forward network
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
# Skip connection and normalization
return self.layernorm2(out1 + ffn_output)
def get_config(self):
config = super().get_config()
config.update({
'att': self.att,
'ffn': self.ffn,
'layernorm1': self.layernorm1,
'layernorm2': self.layernorm2,
'dropout1': self.dropout1,
'dropout2': self.dropout2
})
return config
class PositionalEncoding(Layer):
"""
Positional encoding layer to add position information to input embeddings.
"""
def __init__(self, position, d_model):
super(PositionalEncoding, self).__init__()
self.position = position
self.d_model = d_model
self.pos_encoding = self.positional_encoding(position, d_model)
def get_angles(self, position, i, d_model):
angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
return position * angles
def positional_encoding(self, position, d_model):
angle_rads = self.get_angles(
position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
d_model=d_model
)
# Apply sin to even indices in the array
sines = tf.math.sin(angle_rads[:, 0::2])
# Apply cos to odd indices in the array
cosines = tf.math.cos(angle_rads[:, 1::2])
pos_encoding = tf.concat([sines, cosines], axis=-1)
pos_encoding = pos_encoding[tf.newaxis, ...]
return tf.cast(pos_encoding, tf.float32)
def call(self, inputs):
return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
def get_config(self):
config = super().get_config()
config.update({
'position': self.position,
'd_model': self.d_model,
'pos_encoding': self.pos_encoding
})
return config
class TransformerModel:
"""
Transformer-based model for financial time series analysis.
This model processes both raw time series data and high-level features from the CNN model.
Transformer Neural Network for time series analysis.
This model uses self-attention mechanisms to capture relationships between
different time points in the input data.
"""
def __init__(self, ts_input_shape=(20, 5), feature_input_shape=128, output_size=3, model_dir='NN/models/saved'):
def __init__(self, ts_input_shape=(20, 5), feature_input_shape=64, output_size=1, model_dir="NN/models/saved"):
"""
Initialize the Transformer model
Initialize the Transformer model.
Args:
ts_input_shape: Shape of time series input data (sequence_length, features)
feature_input_shape: Shape of high-level feature input (from CNN)
output_size: Number of output classes or values
model_dir: Directory to save model files
ts_input_shape (tuple): Shape of time series input data (sequence_length, features)
feature_input_shape (int): Shape of additional feature input (e.g., from CNN)
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
model_dir (str): Directory to save trained models
"""
self.ts_input_shape = ts_input_shape
self.feature_input_shape = feature_input_shape
@ -83,341 +129,418 @@ class TransformerModel:
self.history = None
# Create model directory if it doesn't exist
os.makedirs(model_dir, exist_ok=True)
os.makedirs(self.model_dir, exist_ok=True)
logger.info(f"Initialized TransformerModel with time series input shape {ts_input_shape}, "
logger.info(f"Initialized Transformer model with TS input shape {ts_input_shape}, "
f"feature input shape {feature_input_shape}, and output size {output_size}")
def build_model(self, embed_dim=64, num_heads=4, ff_dim=128, num_transformer_blocks=2,
dropout_rate=0.2, learning_rate=0.001):
def build_model(self, embed_dim=32, num_heads=4, ff_dim=64, num_transformer_blocks=2, dropout_rate=0.1, learning_rate=0.001):
"""
Build the Transformer model architecture
Build the Transformer model architecture.
Args:
embed_dim: Embedding dimension for the transformer
num_heads: Number of attention heads
ff_dim: Hidden layer size in the feed-forward network
num_transformer_blocks: Number of transformer blocks to stack
dropout_rate: Dropout rate for regularization
learning_rate: Learning rate for the optimizer
embed_dim (int): Embedding dimension for transformer
num_heads (int): Number of attention heads
ff_dim (int): Hidden dimension of the feed forward network
num_transformer_blocks (int): Number of transformer blocks
dropout_rate (float): Dropout rate for regularization
learning_rate (float): Learning rate for Adam optimizer
Returns:
Compiled Keras model
The compiled model
"""
# Time series input (price and volume data)
ts_inputs = Input(shape=self.ts_input_shape, name='time_series_input')
# Time series input
ts_inputs = Input(shape=self.ts_input_shape, name="ts_input")
# High-level feature input (from CNN or other sources)
feature_inputs = Input(shape=(self.feature_input_shape,), name='feature_input')
# Additional feature input (e.g., from CNN)
feature_inputs = Input(shape=(self.feature_input_shape,), name="feature_input")
# Process time series with transformer blocks
x = ts_inputs
# Process time series with transformer
# First, project the input to the embedding dimension
x = Conv1D(embed_dim, 1, activation="relu")(ts_inputs)
# Add positional encoding
x = PositionalEncoding(self.ts_input_shape[0], embed_dim)(x)
# Add transformer blocks
for _ in range(num_transformer_blocks):
x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
# Global pooling to get fixed-size representation
# Global pooling to get a single vector representation
x = GlobalAveragePooling1D()(x)
x = Dropout(dropout_rate)(x)
# Combine with the high-level features
# Combine with additional features
combined = Concatenate()([x, feature_inputs])
# Dense layers
dense1 = Dense(128, activation='relu')(combined)
dropout1 = Dropout(dropout_rate)(dense1)
dense2 = Dense(64, activation='relu')(dropout1)
dropout2 = Dropout(dropout_rate)(dense2)
# Dense layers for final classification/regression
x = Dense(64, activation="relu")(combined)
x = BatchNormalization()(x)
x = Dropout(dropout_rate)(x)
# Output layer
if self.output_size == 1:
# Binary classification
outputs = Dense(1, activation='sigmoid')(dropout2)
# Binary classification (up/down)
outputs = Dense(1, activation='sigmoid', name='output')(x)
loss = 'binary_crossentropy'
metrics = ['accuracy']
elif self.output_size == 3:
# For BUY/HOLD/SELL signals (3 classes)
outputs = Dense(3, activation='softmax')(dropout2)
else:
# Regression or multi-class classification
outputs = Dense(self.output_size, activation='linear')(dropout2)
# Create and compile the model
model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
if self.output_size == 1:
# Binary classification
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy']
)
elif self.output_size == 3:
# Multi-class classification for BUY/HOLD/SELL
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Multi-class classification (buy/hold/sell)
outputs = Dense(3, activation='softmax', name='output')(x)
loss = 'categorical_crossentropy'
metrics = ['accuracy']
else:
# Regression
model.compile(
outputs = Dense(self.output_size, activation='linear', name='output')(x)
loss = 'mse'
metrics = ['mae']
# Create and compile model
self.model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
# Compile with Adam optimizer
self.model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='mse',
metrics=['mae']
loss=loss,
metrics=metrics
)
self.model = model
logger.info(f"Model built with {model.count_params()} parameters")
model.summary(print_fn=logger.info)
# Log model summary
self.model.summary(print_fn=lambda x: logger.info(x))
return model
return self.model
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
early_stopping_patience=20, reduce_lr_patience=10, verbose=1):
callbacks=None, class_weights=None):
"""
Train the Transformer model
Train the Transformer model on the provided data.
Args:
X_ts: Time series input data
X_features: High-level feature input data
y: Target values
batch_size: Batch size for training
epochs: Maximum number of epochs
validation_split: Fraction of data to use for validation
early_stopping_patience: Patience for early stopping
reduce_lr_patience: Patience for learning rate reduction
verbose: Verbosity level
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
y (numpy.ndarray): Target labels
batch_size (int): Batch size
epochs (int): Number of epochs
validation_split (float): Fraction of data to use for validation
callbacks (list): List of Keras callbacks
class_weights (dict): Class weights for imbalanced datasets
Returns:
Training history
History object containing training metrics
"""
if self.model is None:
logger.warning("Model not built yet, building with default parameters")
self.build_model()
# Create a timestamp for this training run
# Default callbacks if none provided
if callbacks is None:
# Create a timestamp for model checkpoints
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
model_name = f"transformer_model_{timestamp}"
# Set up callbacks
callbacks = [
# Early stopping to prevent overfitting
EarlyStopping(
monitor='val_loss',
patience=early_stopping_patience,
restore_best_weights=True,
verbose=1
patience=10,
restore_best_weights=True
),
# Reduce learning rate when training plateaus
ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=reduce_lr_patience,
min_lr=1e-6,
verbose=1
patience=5,
min_lr=1e-6
),
# Save the best model
ModelCheckpoint(
filepath=os.path.join(self.model_dir, f"{model_name}_best.h5"),
filepath=os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5"),
monitor='val_loss',
save_best_only=True,
verbose=1
),
# TensorBoard logging
TensorBoard(
log_dir=os.path.join(self.model_dir, 'logs', model_name),
histogram_freq=1
),
# CSV logging
CSVLogger(
filename=os.path.join(self.model_dir, f"{model_name}_training.csv"),
separator=',',
append=False
save_best_only=True
)
]
# Train the model
logger.info(f"Starting training with {len(X_ts)} samples, {epochs} max epochs")
# Check if y needs to be one-hot encoded for multi-class
if self.output_size == 3 and len(y.shape) == 1:
y = tf.keras.utils.to_categorical(y, num_classes=3)
start_time = time.time()
history = self.model.fit(
# Train the model
logger.info(f"Training Transformer model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
self.history = self.model.fit(
[X_ts, X_features], y,
batch_size=batch_size,
epochs=epochs,
validation_split=validation_split,
callbacks=callbacks,
verbose=verbose
class_weight=class_weights,
verbose=2
)
# Calculate training time
training_time = time.time() - start_time
logger.info(f"Training completed in {training_time:.2f} seconds")
# Save the final model
self.model.save(os.path.join(self.model_dir, f"{model_name}_final.h5"))
logger.info(f"Model saved to {os.path.join(self.model_dir, model_name + '_final.h5')}")
# Save the trained model
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
model_path = os.path.join(self.model_dir, f"transformer_model_final_{timestamp}.h5")
self.model.save(model_path)
logger.info(f"Model saved to {model_path}")
# Save training history
hist_df = pd.DataFrame(history.history)
hist_df.to_csv(os.path.join(self.model_dir, f"{model_name}_history.csv"), index=False)
history_path = os.path.join(self.model_dir, f"transformer_model_history_{timestamp}.json")
with open(history_path, 'w') as f:
# Convert numpy values to Python native types for JSON serialization
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
json.dump(history_dict, f, indent=2)
self.history = history
return history
return self.history
def predict(self, X_ts, X_features, threshold=0.5):
def evaluate(self, X_ts, X_features, y):
"""
Make predictions with the model
Evaluate the model on test data.
Args:
X_ts: Time series input data
X_features: High-level feature input data
threshold: Threshold for binary classification
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
y (numpy.ndarray): Target labels
Returns:
Predicted values or classes
dict: Evaluation metrics
"""
if self.model is None:
logger.error("Model not built or trained yet")
return None
raise ValueError("Model has not been built or trained yet")
# Get raw predictions
y_pred_proba = self.model.predict([X_ts, X_features])
# Convert y to one-hot encoding for multi-class
if self.output_size == 3 and len(y.shape) == 1:
y = tf.keras.utils.to_categorical(y, num_classes=3)
# Format predictions based on output type
# Evaluate model
logger.info(f"Evaluating Transformer model on {len(X_ts)} samples")
eval_results = self.model.evaluate([X_ts, X_features], y, verbose=0)
metrics = {}
for metric, value in zip(self.model.metrics_names, eval_results):
metrics[metric] = value
logger.info(f"{metric}: {value:.4f}")
return metrics
def predict(self, X_ts, X_features=None):
"""
Make predictions on new data.
Args:
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
Returns:
tuple: (y_pred, y_proba) where:
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
y_proba is the class probability
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Ensure X_ts has the right shape
if len(X_ts.shape) == 2:
# Single sample, add batch dimension
X_ts = np.expand_dims(X_ts, axis=0)
# Ensure X_features has the right shape
if X_features is None:
# Create dummy features with zeros
X_features = np.zeros((X_ts.shape[0], self.feature_input_shape))
elif len(X_features.shape) == 1:
# Single sample, add batch dimension
X_features = np.expand_dims(X_features, axis=0)
# Get predictions
y_proba = self.model.predict([X_ts, X_features])
# Process based on output type
if self.output_size == 1:
# Binary classification
y_pred = (y_pred_proba > threshold).astype(int).flatten()
return y_pred, y_pred_proba.flatten()
y_pred = (y_proba > 0.5).astype(int).flatten()
return y_pred, y_proba.flatten()
elif self.output_size == 3:
# Multi-class (BUY/HOLD/SELL)
y_pred = np.argmax(y_pred_proba, axis=1)
return y_pred, y_pred_proba
# Multi-class classification
y_pred = np.argmax(y_proba, axis=1)
return y_pred, y_proba
else:
# Regression
return y_pred_proba
return y_proba, y_proba
def save_model(self, filepath=None):
def save(self, filepath=None):
"""
Save the model to a file
Save the model to disk.
Args:
filepath: Path to save the model to
filepath (str): Path to save the model
Returns:
Path to the saved model
str: Path where the model was saved
"""
if self.model is None:
logger.error("Model not built or trained yet")
return None
raise ValueError("Model has not been built yet")
if filepath is None:
# Create a default filepath
# Create a default filepath with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5")
self.model.save(filepath)
logger.info(f"Model saved to {filepath}")
return filepath
def load_model(self, filepath):
def load(self, filepath):
"""
Load a model from a file
Load a saved model from disk.
Args:
filepath: Path to load the model from
filepath (str): Path to the saved model
Returns:
Loaded model
The loaded model
"""
try:
self.model = tf.keras.models.load_model(filepath)
# Register custom layers
custom_objects = {
'TransformerBlock': TransformerBlock,
'PositionalEncoding': PositionalEncoding
}
self.model = load_model(filepath, custom_objects=custom_objects)
logger.info(f"Model loaded from {filepath}")
return self.model
except Exception as e:
logger.error(f"Error loading model: {str(e)}")
return None
def plot_training_history(self):
"""
Plot training history (loss and metrics).
Returns:
str: Path to the saved plot
"""
if self.history is None:
raise ValueError("Model has not been trained yet")
plt.figure(figsize=(12, 5))
# Plot loss
plt.subplot(1, 2, 1)
plt.plot(self.history.history['loss'], label='Training Loss')
if 'val_loss' in self.history.history:
plt.plot(self.history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
# Plot accuracy
plt.subplot(1, 2, 2)
if 'accuracy' in self.history.history:
plt.plot(self.history.history['accuracy'], label='Training Accuracy')
if 'val_accuracy' in self.history.history:
plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
elif 'mae' in self.history.history:
plt.plot(self.history.history['mae'], label='Training MAE')
if 'val_mae' in self.history.history:
plt.plot(self.history.history['val_mae'], label='Validation MAE')
plt.title('Model MAE')
plt.ylabel('MAE')
plt.xlabel('Epoch')
plt.legend()
plt.tight_layout()
# Save figure
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
fig_path = os.path.join(self.model_dir, f"transformer_training_history_{timestamp}.png")
plt.savefig(fig_path)
plt.close()
logger.info(f"Training history plot saved to {fig_path}")
return fig_path
class MixtureOfExpertsModel:
"""
Mixture of Experts (MoE) model that combines predictions from multiple models.
This implementation focuses on combining CNN and Transformer models for financial analysis.
Mixture of Experts (MoE) model.
This model combines predictions from multiple expert models (such as CNN and Transformer)
using a weighted ensemble approach.
"""
def __init__(self, output_size=3, model_dir='NN/models/saved'):
def __init__(self, output_size=1, model_dir="NN/models/saved"):
"""
Initialize the MoE model
Initialize the MoE model.
Args:
output_size: Number of output classes or values
model_dir: Directory to save model files
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
model_dir (str): Directory to save trained models
"""
self.output_size = output_size
self.model_dir = model_dir
self.models = {} # Dictionary to store expert models
self.gating_model = None # Model to determine which expert to use
self.model = None # Combined MoE model
self.model = None
self.history = None
self.experts = {}
# Create model directory if it doesn't exist
os.makedirs(model_dir, exist_ok=True)
os.makedirs(self.model_dir, exist_ok=True)
logger.info(f"Initialized MixtureOfExpertsModel with output size {output_size}")
logger.info(f"Initialized Mixture of Experts model with output size {output_size}")
def add_expert(self, name, model):
"""
Add an expert model to the MoE
Add an expert model to the MoE.
Args:
name: Name of the expert
model: Expert model instance
name (str): Name of the expert model
model: The expert model instance
Returns:
None
"""
self.models[name] = model
self.experts[name] = model
logger.info(f"Added expert model '{name}' to MoE")
def build_model(self, ts_input_shape=(20, 5), expert_weights=None, learning_rate=0.001):
"""
Build the MoE model architecture
Build the MoE model by combining expert models.
Args:
ts_input_shape: Shape of time series input data
expert_weights: Dictionary of expert weights (if None, equal weighting)
learning_rate: Learning rate for the optimizer
ts_input_shape (tuple): Shape of time series input data
expert_weights (dict): Weights for each expert model
learning_rate (float): Learning rate for Adam optimizer
Returns:
Compiled Keras model
The compiled model
"""
if not self.models:
logger.error("No expert models added to MoE")
return None
# Time series input
ts_inputs = Input(shape=ts_input_shape, name='time_series_input')
ts_inputs = Input(shape=ts_input_shape, name="ts_input")
# Get predictions from each expert
# Additional feature input (from CNN)
feature_inputs = Input(shape=(64,), name="feature_input") # Default size for features
# Process with each expert model
expert_outputs = []
expert_names = []
for name, model in self.models.items():
if hasattr(model, 'predict') and callable(model.predict):
expert_names.append(name)
for name, expert in self.experts.items():
# Skip if expert model is not valid or doesn't have a call/predict method
if expert is None:
logger.warning(f"Expert model '{name}' is None, skipping")
continue
try:
# Different handling based on model type
if name == 'cnn':
# For CNN, we directly use the time series input
# We need to extract the raw prediction function from the model's predict method
# which typically returns both predictions and probabilities
expert_outputs.append(model.model(ts_inputs))
# CNN model takes only time series input
expert_output = expert(ts_inputs)
expert_outputs.append(expert_output)
expert_names.append(name)
elif name == 'transformer':
# For transformer, we need features from the CNN as well
# This is a simplification - in a real implementation, we would need to
# extract features from the CNN model and pass them to the transformer
# Here we just create dummy features
dummy_features = Dense(128, activation='relu')(Flatten()(ts_inputs))
expert_outputs.append(model.model([ts_inputs, dummy_features]))
# Transformer model takes both time series and feature inputs
expert_output = expert([ts_inputs, feature_inputs])
expert_outputs.append(expert_output)
expert_names.append(name)
else:
logger.warning(f"Unknown model type: {name}, skipping")
logger.warning(f"Unknown expert model type: {name}")
except Exception as e:
logger.error(f"Error adding expert '{name}': {str(e)}")
if not expert_outputs:
logger.error("No valid expert models found")
@ -443,7 +566,7 @@ class MixtureOfExpertsModel:
combined_output = Add()(weighted_outputs)
# Create the MoE model
moe_model = Model(inputs=ts_inputs, outputs=combined_output)
moe_model = Model(inputs=[ts_inputs, feature_inputs], outputs=combined_output)
# Compile the model
if self.output_size == 1:
@ -469,83 +592,176 @@ class MixtureOfExpertsModel:
)
self.model = moe_model
logger.info(f"MoE model built with experts: {expert_names}, weights: {weights}")
moe_model.summary(print_fn=logger.info)
return moe_model
# Log model summary
self.model.summary(print_fn=lambda x: logger.info(x))
def predict(self, X, threshold=0.5):
logger.info(f"Built MoE model with weights: {weights}")
return self.model
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
callbacks=None, class_weights=None):
"""
Make predictions with the MoE model
Train the MoE model on the provided data.
Args:
X: Input data
threshold: Threshold for binary classification
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
y (numpy.ndarray): Target labels
batch_size (int): Batch size
epochs (int): Number of epochs
validation_split (float): Fraction of data to use for validation
callbacks (list): List of Keras callbacks
class_weights (dict): Class weights for imbalanced datasets
Returns:
Predicted values or classes
History object containing training metrics
"""
if self.model is None:
logger.error("MoE model not built yet")
logger.error("MoE model has not been built yet")
return None
# Get raw predictions
y_pred_proba = self.model.predict(X)
# Default callbacks if none provided
if callbacks is None:
# Create a timestamp for model checkpoints
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# Format predictions based on output type
callbacks = [
EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
),
ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=1e-6
),
ModelCheckpoint(
filepath=os.path.join(self.model_dir, f"moe_model_{timestamp}.h5"),
monitor='val_loss',
save_best_only=True
)
]
# Check if y needs to be one-hot encoded for multi-class
if self.output_size == 3 and len(y.shape) == 1:
y = tf.keras.utils.to_categorical(y, num_classes=3)
# Train the model
logger.info(f"Training MoE model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
self.history = self.model.fit(
[X_ts, X_features], y,
batch_size=batch_size,
epochs=epochs,
validation_split=validation_split,
callbacks=callbacks,
class_weight=class_weights,
verbose=2
)
# Save the trained model
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
model_path = os.path.join(self.model_dir, f"moe_model_final_{timestamp}.h5")
self.model.save(model_path)
logger.info(f"Model saved to {model_path}")
# Save training history
history_path = os.path.join(self.model_dir, f"moe_model_history_{timestamp}.json")
with open(history_path, 'w') as f:
# Convert numpy values to Python native types for JSON serialization
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
json.dump(history_dict, f, indent=2)
return self.history
def predict(self, X_ts, X_features=None):
"""
Make predictions on new data.
Args:
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
Returns:
tuple: (y_pred, y_proba) where:
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
y_proba is the class probability
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Ensure X_ts has the right shape
if len(X_ts.shape) == 2:
# Single sample, add batch dimension
X_ts = np.expand_dims(X_ts, axis=0)
# Ensure X_features has the right shape
if X_features is None:
# Create dummy features with zeros
X_features = np.zeros((X_ts.shape[0], 64)) # Default size
elif len(X_features.shape) == 1:
# Single sample, add batch dimension
X_features = np.expand_dims(X_features, axis=0)
# Get predictions
y_proba = self.model.predict([X_ts, X_features])
# Process based on output type
if self.output_size == 1:
# Binary classification
y_pred = (y_pred_proba > threshold).astype(int).flatten()
return y_pred, y_pred_proba.flatten()
y_pred = (y_proba > 0.5).astype(int).flatten()
return y_pred, y_proba.flatten()
elif self.output_size == 3:
# Multi-class (BUY/HOLD/SELL)
y_pred = np.argmax(y_pred_proba, axis=1)
return y_pred, y_pred_proba
# Multi-class classification
y_pred = np.argmax(y_proba, axis=1)
return y_pred, y_proba
else:
# Regression
return y_pred_proba
return y_proba, y_proba
def save_model(self, filepath=None):
def save(self, filepath=None):
"""
Save the MoE model to a file
Save the model to disk.
Args:
filepath: Path to save the model to
filepath (str): Path to save the model
Returns:
Path to the saved model
str: Path where the model was saved
"""
if self.model is None:
logger.error("MoE model not built yet")
return None
raise ValueError("Model has not been built yet")
if filepath is None:
# Create a default filepath
# Create a default filepath with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(self.model_dir, f"moe_model_{timestamp}.h5")
self.model.save(filepath)
logger.info(f"MoE model saved to {filepath}")
logger.info(f"Model saved to {filepath}")
return filepath
def load_model(self, filepath):
def load(self, filepath):
"""
Load an MoE model from a file
Load a saved model from disk.
Args:
filepath: Path to load the model from
filepath (str): Path to the saved model
Returns:
Loaded model
The loaded model
"""
try:
self.model = tf.keras.models.load_model(filepath)
logger.info(f"MoE model loaded from {filepath}")
# Register custom layers
custom_objects = {
'TransformerBlock': TransformerBlock,
'PositionalEncoding': PositionalEncoding
}
self.model = load_model(filepath, custom_objects=custom_objects)
logger.info(f"Model loaded from {filepath}")
return self.model
except Exception as e:
logger.error(f"Error loading MoE model: {str(e)}")
return None
# Example usage:
if __name__ == "__main__":

View File

@ -0,0 +1,653 @@
#!/usr/bin/env python3
"""
Transformer Model - PyTorch Implementation
This module implements a Transformer model using PyTorch for time series analysis.
The model consists of a Transformer encoder and a Mixture of Experts model.
"""
import os
import logging
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Configure logging
logger = logging.getLogger(__name__)
class TransformerBlock(nn.Module):
"""Transformer Block with self-attention mechanism"""
def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
super(TransformerBlock, self).__init__()
self.attention = nn.MultiheadAttention(
embed_dim=input_dim,
num_heads=num_heads,
dropout=dropout,
batch_first=True
)
self.feed_forward = nn.Sequential(
nn.Linear(input_dim, ff_dim),
nn.ReLU(),
nn.Linear(ff_dim, input_dim)
)
self.layernorm1 = nn.LayerNorm(input_dim)
self.layernorm2 = nn.LayerNorm(input_dim)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)
def forward(self, x):
# Self-attention
attn_output, _ = self.attention(x, x, x)
x = x + self.dropout1(attn_output)
x = self.layernorm1(x)
# Feed forward
ff_output = self.feed_forward(x)
x = x + self.dropout2(ff_output)
x = self.layernorm2(x)
return x
class TransformerModelPyTorch(nn.Module):
"""PyTorch Transformer model for time series analysis"""
def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
"""
Initialize the Transformer model.
Args:
input_shape (tuple): Shape of input data (window_size, features)
output_size (int): Size of output (1 for regression, 3 for classification)
num_heads (int): Number of attention heads
ff_dim (int): Feed forward dimension
num_transformer_blocks (int): Number of transformer blocks
"""
super(TransformerModelPyTorch, self).__init__()
window_size, num_features = input_shape
# Positional encoding
self.pos_encoding = nn.Parameter(
torch.zeros(1, window_size, num_features),
requires_grad=True
)
# Transformer blocks
self.transformer_blocks = nn.ModuleList([
TransformerBlock(
input_dim=num_features,
num_heads=num_heads,
ff_dim=ff_dim
) for _ in range(num_transformer_blocks)
])
# Global average pooling
self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
# Dense layers
self.dense = nn.Sequential(
nn.Linear(num_features, 64),
nn.ReLU(),
nn.BatchNorm1d(64),
nn.Dropout(0.3),
nn.Linear(64, output_size)
)
# Activation based on output size
if output_size == 1:
self.activation = nn.Sigmoid() # Binary classification or regression
elif output_size > 1:
self.activation = nn.Softmax(dim=1) # Multi-class classification
else:
self.activation = nn.Identity() # No activation
def forward(self, x):
"""
Forward pass through the network.
Args:
x: Input tensor of shape [batch_size, window_size, features]
Returns:
Output tensor of shape [batch_size, output_size]
"""
# Add positional encoding
x = x + self.pos_encoding
# Apply transformer blocks
for transformer_block in self.transformer_blocks:
x = transformer_block(x)
# Global average pooling
x = x.transpose(1, 2) # [batch, features, window]
x = self.global_avg_pool(x) # [batch, features, 1]
x = x.squeeze(-1) # [batch, features]
# Dense layers
x = self.dense(x)
# Apply activation
return self.activation(x)
class TransformerModelPyTorchWrapper:
"""
Transformer model wrapper class for time series analysis using PyTorch.
This class provides methods for building, training, evaluating, and making
predictions with the Transformer model.
"""
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
"""
Initialize the Transformer model.
Args:
window_size (int): Size of the input window
num_features (int): Number of features in the input data
output_size (int): Size of the output (1 for regression, 3 for classification)
timeframes (list): List of timeframes used (for logging)
"""
self.window_size = window_size
self.num_features = num_features
self.output_size = output_size
self.timeframes = timeframes or []
# Determine device (GPU or CPU)
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {self.device}")
# Initialize model
self.model = None
self.build_model()
# Initialize training history
self.history = {
'loss': [],
'val_loss': [],
'accuracy': [],
'val_accuracy': []
}
def build_model(self):
"""Build the Transformer model architecture"""
logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
f"num_features={self.num_features}, output_size={self.output_size}")
self.model = TransformerModelPyTorch(
input_shape=(self.window_size, self.num_features),
output_size=self.output_size
).to(self.device)
# Initialize optimizer
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
# Initialize loss function based on output size
if self.output_size == 1:
self.criterion = nn.BCELoss() # Binary classification
elif self.output_size > 1:
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
else:
self.criterion = nn.MSELoss() # Regression
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
"""
Train the Transformer model.
Args:
X_train: Training input data
y_train: Training target data
X_val: Validation input data
y_val: Validation target data
batch_size: Batch size for training
epochs: Number of training epochs
Returns:
Training history
"""
logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
f"batch_size={batch_size}, epochs={epochs}")
# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
# Handle different output sizes for y_train
if self.output_size == 1:
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
else:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
# Create DataLoader for training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Create DataLoader for validation data if provided
if X_val is not None and y_val is not None:
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
if self.output_size == 1:
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
else:
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
else:
val_loader = None
# Training loop
for epoch in range(epochs):
# Training phase
self.model.train()
running_loss = 0.0
correct = 0
total = 0
for inputs, targets in train_loader:
# Zero the parameter gradients
self.optimizer.zero_grad()
# Forward pass
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
# Backward pass and optimize
loss.backward()
self.optimizer.step()
# Statistics
running_loss += loss.item()
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
epoch_loss = running_loss / len(train_loader)
epoch_acc = correct / total if total > 0 else 0
# Validation phase
if val_loader is not None:
val_loss, val_acc = self._validate(val_loader)
logger.info(f"Epoch {epoch+1}/{epochs} - "
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
# Update history
self.history['loss'].append(epoch_loss)
self.history['accuracy'].append(epoch_acc)
self.history['val_loss'].append(val_loss)
self.history['val_accuracy'].append(val_acc)
else:
logger.info(f"Epoch {epoch+1}/{epochs} - "
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
# Update history without validation
self.history['loss'].append(epoch_loss)
self.history['accuracy'].append(epoch_acc)
logger.info("Training completed")
return self.history
def _validate(self, val_loader):
"""Validate the model using the validation set"""
self.model.eval()
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, targets in val_loader:
# Forward pass
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
val_loss += loss.item()
# Calculate accuracy
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
return val_loss / len(val_loader), correct / total if total > 0 else 0
def evaluate(self, X_test, y_test):
"""
Evaluate the model on test data.
Args:
X_test: Test input data
y_test: Test target data
Returns:
dict: Evaluation metrics
"""
logger.info(f"Evaluating model on {len(X_test)} samples")
# Convert to PyTorch tensors
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
# Get predictions
self.model.eval()
with torch.no_grad():
y_pred = self.model(X_test_tensor)
if self.output_size > 1:
_, y_pred_class = torch.max(y_pred, 1)
y_pred_class = y_pred_class.cpu().numpy()
else:
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
# Calculate metrics
if self.output_size > 1:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class, average='weighted')
recall = recall_score(y_test, y_pred_class, average='weighted')
f1 = f1_score(y_test, y_pred_class, average='weighted')
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
else:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
logger.info(f"Evaluation metrics: {metrics}")
return metrics
def predict(self, X):
"""
Make predictions with the model.
Args:
X: Input data
Returns:
Predictions
"""
# Convert to PyTorch tensor
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
# Get predictions
self.model.eval()
with torch.no_grad():
predictions = self.model(X_tensor)
if self.output_size > 1:
# Multi-class classification
probs = predictions.cpu().numpy()
_, class_preds = torch.max(predictions, 1)
class_preds = class_preds.cpu().numpy()
return class_preds, probs
else:
# Binary classification or regression
preds = predictions.cpu().numpy()
if self.output_size == 1:
# Binary classification
class_preds = (preds > 0.5).astype(int)
return class_preds.flatten(), preds.flatten()
else:
# Regression
return preds.flatten(), None
def save(self, filepath):
"""
Save the model to a file.
Args:
filepath: Path to save the model
"""
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filepath), exist_ok=True)
# Save the model state
model_state = {
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'history': self.history,
'window_size': self.window_size,
'num_features': self.num_features,
'output_size': self.output_size,
'timeframes': self.timeframes
}
torch.save(model_state, f"{filepath}.pt")
logger.info(f"Model saved to {filepath}.pt")
def load(self, filepath):
"""
Load the model from a file.
Args:
filepath: Path to load the model from
"""
# Check if file exists
if not os.path.exists(f"{filepath}.pt"):
logger.error(f"Model file {filepath}.pt not found")
return False
# Load the model state
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
# Update model parameters
self.window_size = model_state['window_size']
self.num_features = model_state['num_features']
self.output_size = model_state['output_size']
self.timeframes = model_state['timeframes']
# Rebuild the model
self.build_model()
# Load the model state
self.model.load_state_dict(model_state['model_state_dict'])
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
self.history = model_state['history']
logger.info(f"Model loaded from {filepath}.pt")
return True
class MixtureOfExpertsModelPyTorch:
"""
Mixture of Experts model implementation using PyTorch.
This model combines predictions from multiple models (experts) using a
learned weighting scheme.
"""
def __init__(self, output_size=3, timeframes=None):
"""
Initialize the Mixture of Experts model.
Args:
output_size (int): Size of the output (1 for regression, 3 for classification)
timeframes (list): List of timeframes used (for logging)
"""
self.output_size = output_size
self.timeframes = timeframes or []
self.experts = {}
self.expert_weights = {}
# Determine device (GPU or CPU)
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {self.device}")
# Initialize model and training history
self.model = None
self.history = {
'loss': [],
'val_loss': [],
'accuracy': [],
'val_accuracy': []
}
def add_expert(self, name, model):
"""
Add an expert model.
Args:
name (str): Name of the expert
model: Expert model
"""
self.experts[name] = model
logger.info(f"Added expert: {name}")
def predict(self, X):
"""
Make predictions using all experts and combine them.
Args:
X: Input data
Returns:
Combined predictions
"""
if not self.experts:
logger.error("No experts added to the MoE model")
return None
# Get predictions from each expert
expert_predictions = {}
for name, expert in self.experts.items():
pred, _ = expert.predict(X)
expert_predictions[name] = pred
# Combine predictions based on weights
final_pred = None
for name, pred in expert_predictions.items():
weight = self.expert_weights.get(name, 1.0 / len(self.experts))
if final_pred is None:
final_pred = weight * pred
else:
final_pred += weight * pred
# For classification, convert to class indices
if self.output_size > 1:
# Get class with highest probability
class_pred = np.argmax(final_pred, axis=1)
return class_pred, final_pred
else:
# Binary classification
class_pred = (final_pred > 0.5).astype(int)
return class_pred, final_pred
def evaluate(self, X_test, y_test):
"""
Evaluate the model on test data.
Args:
X_test: Test input data
y_test: Test target data
Returns:
dict: Evaluation metrics
"""
logger.info(f"Evaluating MoE model on {len(X_test)} samples")
# Get predictions
y_pred_class, _ = self.predict(X_test)
# Calculate metrics
if self.output_size > 1:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class, average='weighted')
recall = recall_score(y_test, y_pred_class, average='weighted')
f1 = f1_score(y_test, y_pred_class, average='weighted')
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
else:
accuracy = accuracy_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)
metrics = {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1_score': f1
}
logger.info(f"MoE evaluation metrics: {metrics}")
return metrics
def save(self, filepath):
"""
Save the model weights to a file.
Args:
filepath: Path to save the model
"""
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filepath), exist_ok=True)
# Save the model state
model_state = {
'expert_weights': self.expert_weights,
'output_size': self.output_size,
'timeframes': self.timeframes
}
torch.save(model_state, f"{filepath}_moe.pt")
logger.info(f"MoE model saved to {filepath}_moe.pt")
def load(self, filepath):
"""
Load the model from a file.
Args:
filepath: Path to load the model from
"""
# Check if file exists
if not os.path.exists(f"{filepath}_moe.pt"):
logger.error(f"MoE model file {filepath}_moe.pt not found")
return False
# Load the model state
model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
# Update model parameters
self.expert_weights = model_state['expert_weights']
self.output_size = model_state['output_size']
self.timeframes = model_state['timeframes']
logger.info(f"MoE model loaded from {filepath}_moe.pt")
return True

View File

@ -1,13 +1,22 @@
tensorflow>=2.5.0
# Main dependencies
numpy>=1.19.5
pandas>=1.3.0
matplotlib>=3.4.2
scikit-learn>=0.24.2
tensorflow-addons>=0.13.0
plotly>=5.1.0
h5py>=3.1.0
tqdm>=4.61.1
pyyaml>=5.4.1
tensorboard>=2.5.0
ccxt>=1.50.0
requests>=2.25.1
# PyTorch (primary framework)
torch
torchvision
# TensorFlow (optional)
# tensorflow>=2.5.0
# tensorflow-addons>=0.13.0
# Additional dependencies
plotly
h5py
tqdm
pyyaml
tensorboard
ccxt
requests

88
NN/start_tensorboard.py Normal file
View File

@ -0,0 +1,88 @@
#!/usr/bin/env python
"""
Start TensorBoard for monitoring neural network training
"""
import os
import sys
import subprocess
import webbrowser
from time import sleep
def start_tensorboard(logdir="NN/models/saved/logs", port=6006, open_browser=True):
"""
Start TensorBoard in a subprocess
Args:
logdir: Directory containing TensorBoard logs
port: Port to run TensorBoard on
open_browser: Whether to open a browser automatically
"""
# Make sure the log directory exists
os.makedirs(logdir, exist_ok=True)
# Create command
cmd = [
sys.executable,
"-m",
"tensorboard.main",
f"--logdir={logdir}",
f"--port={port}",
"--bind_all"
]
print(f"Starting TensorBoard with logs from {logdir} on port {port}")
print(f"Command: {' '.join(cmd)}")
# Start TensorBoard in a subprocess
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True
)
# Wait for TensorBoard to start up
for line in process.stdout:
print(line.strip())
if "TensorBoard" in line and "http://" in line:
# TensorBoard is running, extract the URL
url = None
for part in line.split():
if part.startswith(("http://", "https://")):
url = part
break
# Open browser if requested and URL found
if open_browser and url:
print(f"Opening TensorBoard in browser: {url}")
webbrowser.open(url)
break
# Return the process for the caller to manage
return process
if __name__ == "__main__":
import argparse
# Parse command line arguments
parser = argparse.ArgumentParser(description="Start TensorBoard for NN training visualization")
parser.add_argument("--logdir", default="NN/models/saved/logs", help="Directory containing TensorBoard logs")
parser.add_argument("--port", type=int, default=6006, help="Port to run TensorBoard on")
parser.add_argument("--no-browser", action="store_true", help="Don't open browser automatically")
args = parser.parse_args()
# Start TensorBoard
process = start_tensorboard(args.logdir, args.port, not args.no_browser)
try:
# Keep the script running until Ctrl+C
print("TensorBoard is running. Press Ctrl+C to stop.")
while True:
sleep(1)
except KeyboardInterrupt:
print("Stopping TensorBoard...")
process.terminate()
process.wait()

Binary file not shown.

Binary file not shown.

390
NN/utils/data_interface.py Normal file
View File

@ -0,0 +1,390 @@
"""
Data Interface for Neural Network Trading System
This module provides functionality to fetch, process, and prepare data for the neural network models.
"""
import os
import logging
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import json
import pickle
from sklearn.preprocessing import MinMaxScaler
logger = logging.getLogger(__name__)
class DataInterface:
"""
Handles data collection, processing, and preparation for neural network models.
This class is responsible for:
1. Fetching historical data
2. Preprocessing data for neural network input
3. Generating training datasets
4. Handling real-time data integration
"""
def __init__(self, symbol="BTC/USDT", timeframes=None, data_dir="NN/data"):
"""
Initialize the data interface.
Args:
symbol (str): Trading pair symbol (e.g., "BTC/USDT")
timeframes (list): List of timeframes to use (e.g., ['1m', '5m', '1h', '4h', '1d'])
data_dir (str): Directory to store/load datasets
"""
self.symbol = symbol
self.timeframes = timeframes or ['1h', '4h', '1d']
self.data_dir = data_dir
self.scalers = {} # Store scalers for each timeframe
# Create data directory if it doesn't exist
os.makedirs(self.data_dir, exist_ok=True)
# Initialize empty dataframes for each timeframe
self.dataframes = {tf: None for tf in self.timeframes}
logger.info(f"DataInterface initialized for {symbol} with timeframes {timeframes}")
def get_historical_data(self, timeframe='1h', n_candles=1000, use_cache=True):
"""
Fetch historical price data for a given timeframe.
Args:
timeframe (str): Timeframe to fetch data for
n_candles (int): Number of candles to fetch
use_cache (bool): Whether to use cached data if available
Returns:
pd.DataFrame: DataFrame with OHLCV data
"""
cache_file = os.path.join(self.data_dir, f"{self.symbol.replace('/', '_')}_{timeframe}.csv")
# Check if cached data exists and is recent
if use_cache and os.path.exists(cache_file):
try:
df = pd.read_csv(cache_file, parse_dates=['timestamp'])
# If we have enough data and it's recent, use it
if len(df) >= n_candles:
logger.info(f"Using cached data for {self.symbol} {timeframe} ({len(df)} candles)")
self.dataframes[timeframe] = df
return df.tail(n_candles)
except Exception as e:
logger.error(f"Error reading cached data: {str(e)}")
# If we get here, we need to fetch data
# For now, we'll use a placeholder for fetching data from an exchange
try:
# In a real implementation, we would fetch data from an exchange or API here
# For this example, we'll create dummy data if we can't load from cache
logger.info(f"Fetching historical data for {self.symbol} {timeframe}")
# Placeholder for real data fetching
# In a real implementation, this would be replaced with API calls
self._fetch_data_from_exchange(timeframe, n_candles)
# Save to cache
if self.dataframes[timeframe] is not None:
self.dataframes[timeframe].to_csv(cache_file, index=False)
return self.dataframes[timeframe]
else:
# Create dummy data as fallback
logger.warning(f"Could not fetch data for {self.symbol} {timeframe}, using dummy data")
df = self._create_dummy_data(timeframe, n_candles)
self.dataframes[timeframe] = df
return df
except Exception as e:
logger.error(f"Error fetching data: {str(e)}")
return None
def _fetch_data_from_exchange(self, timeframe, n_candles):
"""
Placeholder method for fetching data from an exchange.
In a real implementation, this would connect to an exchange API.
"""
# This is a placeholder - in a real implementation this would make API calls
# to a cryptocurrency exchange to fetch OHLCV data
# For now, just generate dummy data
self.dataframes[timeframe] = self._create_dummy_data(timeframe, n_candles)
def _create_dummy_data(self, timeframe, n_candles):
"""
Create dummy OHLCV data for testing purposes.
Args:
timeframe (str): Timeframe to create data for
n_candles (int): Number of candles to create
Returns:
pd.DataFrame: DataFrame with dummy OHLCV data
"""
# Map timeframe to seconds
tf_seconds = {
'1m': 60,
'5m': 300,
'15m': 900,
'1h': 3600,
'4h': 14400,
'1d': 86400
}
seconds = tf_seconds.get(timeframe, 3600) # Default to 1h
# Create timestamps
end_time = datetime.now()
timestamps = [end_time - timedelta(seconds=seconds * i) for i in range(n_candles)]
timestamps.reverse() # Oldest first
# Generate random price data with realistic patterns
np.random.seed(42) # For reproducibility
# Start price
price = 50000 # For BTC/USDT
prices = []
volumes = []
for i in range(n_candles):
# Random walk with drift and volatility based on timeframe
drift = 0.0001 * seconds # Larger drift for larger timeframes
volatility = 0.01 * np.sqrt(seconds / 3600) # Scale volatility by sqrt of time
# Daily/weekly patterns
if timeframe in ['1d', '4h']:
# Add some cyclical patterns
cycle = np.sin(i / 7 * np.pi) * 0.02 # Weekly cycle
else:
cycle = np.sin(i / 24 * np.pi) * 0.01 # Daily cycle
# Calculate price change with random walk + cycles
price_change = price * (drift + volatility * np.random.randn() + cycle)
price += price_change
# Generate OHLC from the price
open_price = price
high_price = price * (1 + abs(0.005 * np.random.randn()))
low_price = price * (1 - abs(0.005 * np.random.randn()))
close_price = price * (1 + 0.002 * np.random.randn())
# Ensure high >= open, close, low and low <= open, close
high_price = max(high_price, open_price, close_price)
low_price = min(low_price, open_price, close_price)
# Generate volume (higher for larger price movements)
volume = abs(price_change) * (10000 + 5000 * np.random.rand())
prices.append((open_price, high_price, low_price, close_price))
volumes.append(volume)
# Update price for next iteration
price = close_price
# Create DataFrame
df = pd.DataFrame(
[(t, o, h, l, c, v) for t, (o, h, l, c), v in zip(timestamps, prices, volumes)],
columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
)
return df
def prepare_nn_input(self, timeframes=None, n_candles=500, window_size=20):
"""
Prepare input data for neural network models.
Args:
timeframes (list): List of timeframes to use
n_candles (int): Number of candles to fetch for each timeframe
window_size (int): Size of the sliding window for feature creation
Returns:
tuple: (X, y, timestamps) where:
X is the input features array with shape (n_samples, window_size, n_features)
y is the target array with shape (n_samples,)
timestamps is an array of timestamps for each sample
"""
if timeframes is None:
timeframes = self.timeframes
# Get data for all requested timeframes
dfs = {}
for tf in timeframes:
df = self.get_historical_data(timeframe=tf, n_candles=n_candles)
if df is not None and not df.empty:
dfs[tf] = df
if not dfs:
logger.error("No data available for feature creation")
return None, None, None
# For simplicity, we'll use just one timeframe for now
# In a more complex implementation, we would merge multiple timeframes
primary_tf = timeframes[0]
if primary_tf not in dfs:
logger.error(f"Primary timeframe {primary_tf} not available")
return None, None, None
df = dfs[primary_tf]
# Create features
X, y, timestamps = self._create_features(df, window_size)
return X, y, timestamps
def _create_features(self, df, window_size):
"""
Create features from OHLCV data using a sliding window approach.
Args:
df (pd.DataFrame): DataFrame with OHLCV data
window_size (int): Size of the sliding window
Returns:
tuple: (X, y, timestamps) where:
X is the input features array
y is the target array
timestamps is an array of timestamps for each sample
"""
# Extract OHLCV columns
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
# Scale the data
scaler = MinMaxScaler()
ohlcv_scaled = scaler.fit_transform(ohlcv)
# Store the scaler for later use
timeframe = next((tf for tf in self.timeframes if self.dataframes.get(tf) is not None and
self.dataframes[tf].equals(df)), 'unknown')
self.scalers[timeframe] = scaler
# Create sliding windows
X = []
y = []
timestamps = []
for i in range(len(ohlcv_scaled) - window_size):
# Input: window_size candles of OHLCV data
X.append(ohlcv_scaled[i:i+window_size])
# Target: binary classification - price goes up (1) or down (0)
# 1 if close price increases in the next candle, 0 otherwise
price_change = ohlcv[i+window_size, 3] - ohlcv[i+window_size-1, 3]
y.append(1 if price_change > 0 else 0)
# Store timestamp for reference
timestamps.append(df['timestamp'].iloc[i+window_size])
return np.array(X), np.array(y), np.array(timestamps)
def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
"""
Generate and save a training dataset for neural network models.
Args:
timeframes (list): List of timeframes to use
n_candles (int): Number of candles to fetch for each timeframe
window_size (int): Size of the sliding window for feature creation
Returns:
dict: Dictionary of dataset file paths
"""
if timeframes is None:
timeframes = self.timeframes
# Prepare inputs
X, y, timestamps = self.prepare_nn_input(timeframes, n_candles, window_size)
if X is None or y is None:
logger.error("Failed to prepare input data for dataset")
return None
# Prepare output paths
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
dataset_name = f"{self.symbol.replace('/', '_')}_{'_'.join(timeframes)}_{timestamp_str}"
X_path = os.path.join(self.data_dir, f"{dataset_name}_X.npy")
y_path = os.path.join(self.data_dir, f"{dataset_name}_y.npy")
timestamps_path = os.path.join(self.data_dir, f"{dataset_name}_timestamps.npy")
metadata_path = os.path.join(self.data_dir, f"{dataset_name}_metadata.json")
# Save arrays
np.save(X_path, X)
np.save(y_path, y)
np.save(timestamps_path, timestamps)
# Save metadata
metadata = {
'symbol': self.symbol,
'timeframes': timeframes,
'window_size': window_size,
'n_samples': len(X),
'feature_shape': X.shape[1:],
'created_at': datetime.now().isoformat(),
'dataset_name': dataset_name
}
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
# Save scalers
scaler_path = os.path.join(self.data_dir, f"{dataset_name}_scalers.pkl")
with open(scaler_path, 'wb') as f:
pickle.dump(self.scalers, f)
# Return dataset info
dataset_info = {
'X_path': X_path,
'y_path': y_path,
'timestamps_path': timestamps_path,
'metadata_path': metadata_path,
'scaler_path': scaler_path
}
logger.info(f"Dataset generated and saved: {dataset_name}")
return dataset_info
def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
"""
Prepare a single input sample from the most recent data for real-time inference.
Args:
timeframe (str): Timeframe to use
n_candles (int): Number of recent candles to fetch
window_size (int): Size of the sliding window
Returns:
tuple: (X, timestamp) where:
X is the input features array with shape (1, window_size, n_features)
timestamp is the timestamp of the most recent candle
"""
# Get recent data
df = self.get_historical_data(timeframe=timeframe, n_candles=n_candles, use_cache=False)
if df is None or len(df) < window_size:
logger.error(f"Not enough data for inference (need at least {window_size} candles)")
return None, None
# Extract features from the most recent window
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].tail(window_size).values
# Scale the data
if timeframe in self.scalers:
# Use existing scaler
scaler = self.scalers[timeframe]
else:
# Create new scaler
scaler = MinMaxScaler()
# Fit on all available data
all_data = df[['open', 'high', 'low', 'close', 'volume']].values
scaler.fit(all_data)
self.scalers[timeframe] = scaler
ohlcv_scaled = scaler.transform(ohlcv)
# Reshape to (1, window_size, n_features)
X = np.array([ohlcv_scaled])
# Get timestamp of the most recent candle
timestamp = df['timestamp'].iloc[-1]
return X, timestamp

232
run_nn.py Normal file
View File

@ -0,0 +1,232 @@
#!/usr/bin/env python3
"""
Neural Network Training Runner Script
This script runs the Neural Network Trading System with the existing conda environment.
It detects which deep learning framework is available (TensorFlow or PyTorch) and
adjusts the implementation accordingly.
"""
import os
import sys
import subprocess
import argparse
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('nn_runner')
def detect_framework():
"""Detect which deep learning framework is available in the environment"""
try:
import torch
torch_version = torch.__version__
logger.info(f"PyTorch {torch_version} detected")
return "pytorch", torch_version
except ImportError:
logger.warning("PyTorch not found in environment")
try:
import tensorflow as tf
tf_version = tf.__version__
logger.info(f"TensorFlow {tf_version} detected")
return "tensorflow", tf_version
except ImportError:
logger.error("Neither PyTorch nor TensorFlow is available in the environment")
return None, None
def check_dependencies():
"""Check for required dependencies and return if they are met"""
required_packages = ["numpy", "pandas", "matplotlib", "scikit-learn"]
missing_packages = []
for package in required_packages:
try:
__import__(package)
except ImportError:
missing_packages.append(package)
if missing_packages:
logger.warning(f"Missing required packages: {', '.join(missing_packages)}")
return False
return True
def create_run_command(args, framework):
"""Create the command to run the neural network based on the available framework"""
cmd = ["python", "-m", "NN.main"]
# Add mode
cmd.extend(["--mode", args.mode])
# Add symbol
if args.symbol:
cmd.extend(["--symbol", args.symbol])
# Add timeframes
if args.timeframes:
cmd.extend(["--timeframes"] + args.timeframes)
# Add window size
if args.window_size:
cmd.extend(["--window-size", str(args.window_size)])
# Add output size
if args.output_size:
cmd.extend(["--output-size", str(args.output_size)])
# Add batch size
if args.batch_size:
cmd.extend(["--batch-size", str(args.batch_size)])
# Add epochs
if args.epochs:
cmd.extend(["--epochs", str(args.epochs)])
# Add model type
if args.model_type:
cmd.extend(["--model-type", args.model_type])
# Add framework-specific flag
cmd.extend(["--framework", framework])
return cmd
def parse_arguments():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description='Neural Network Trading System Runner')
parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
help='Mode to run (train, predict, realtime)')
parser.add_argument('--symbol', type=str, default='BTC/USDT',
help='Trading pair symbol')
parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
help='Timeframes to use')
parser.add_argument('--window-size', type=int, default=20,
help='Window size for input data')
parser.add_argument('--output-size', type=int, default=3,
help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
parser.add_argument('--batch-size', type=int, default=32,
help='Batch size for training')
parser.add_argument('--epochs', type=int, default=100,
help='Number of epochs for training')
parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
help='Model type to use')
parser.add_argument('--conda-env', type=str, default='gpt-gpu',
help='Name of conda environment to use')
parser.add_argument('--no-conda', action='store_true',
help='Do not use conda environment activation')
parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
help='Deep learning framework to use (default: pytorch)')
return parser.parse_args()
def main():
# Parse arguments
args = parse_arguments()
# Check if we should run with conda
if not args.no_conda and args.conda_env:
# Create conda activation command
if sys.platform == 'win32':
conda_cmd = f"conda activate {args.conda_env} && "
else:
conda_cmd = f"source activate {args.conda_env} && "
logger.info(f"Running with conda environment: {args.conda_env}")
# Create the run script
script_path = Path("run_nn_in_conda.bat" if sys.platform == 'win32' else "run_nn_in_conda.sh")
with open(script_path, 'w') as f:
if sys.platform == 'win32':
f.write("@echo off\n")
f.write(f"call conda activate {args.conda_env}\n")
f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
if args.timeframes:
f.write(f" --timeframes {' '.join(args.timeframes)}")
if args.window_size:
f.write(f" --window-size {args.window_size}")
if args.output_size:
f.write(f" --output-size {args.output_size}")
if args.batch_size:
f.write(f" --batch-size {args.batch_size}")
if args.epochs:
f.write(f" --epochs {args.epochs}")
if args.model_type:
f.write(f" --model-type {args.model_type}")
else:
f.write("#!/bin/bash\n")
f.write(f"source activate {args.conda_env}\n")
f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
if args.timeframes:
f.write(f" --timeframes {' '.join(args.timeframes)}")
if args.window_size:
f.write(f" --window-size {args.window_size}")
if args.output_size:
f.write(f" --output-size {args.output_size}")
if args.batch_size:
f.write(f" --batch-size {args.batch_size}")
if args.epochs:
f.write(f" --epochs {args.epochs}")
if args.model_type:
f.write(f" --model-type {args.model_type}")
# Make script executable on Unix
if sys.platform != 'win32':
os.chmod(script_path, 0o755)
# Run the script
logger.info(f"Created script: {script_path}")
logger.info("Run this script to execute the neural network with the conda environment")
if sys.platform == 'win32':
print("\nTo run the neural network, execute the following command:")
print(f" {script_path}")
else:
print("\nTo run the neural network, execute the following command:")
print(f" ./{script_path}")
else:
# Run directly without conda
# First detect available framework
framework, version = detect_framework()
if framework is None:
logger.error("Cannot run Neural Network - no deep learning framework available")
return
# Check dependencies
if not check_dependencies():
logger.error("Missing required dependencies - please install them first")
return
# Create command
cmd = create_run_command(args, framework)
# Run command
logger.info(f"Running command: {' '.join(cmd)}")
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
logger.error(f"Error running neural network: {str(e)}")
except Exception as e:
logger.error(f"Error: {str(e)}")
if __name__ == "__main__":
main()

3
run_nn_in_conda.bat Normal file
View File

@ -0,0 +1,3 @@
@echo off
call conda activate gpt-gpu
python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs 100 --model-type cnn --framework pytorch

50
run_pytorch_nn.bat Normal file
View File

@ -0,0 +1,50 @@
@echo off
echo ============================================================
echo Neural Network Trading System - PyTorch Implementation
echo ============================================================
call conda activate gpt-gpu
REM Parse command-line arguments
set MODE=train
set MODEL_TYPE=cnn
set SYMBOL=BTC/USDT
set EPOCHS=100
:parse
if "%~1"=="" goto endparse
if /i "%~1"=="--mode" (
set MODE=%~2
shift
shift
goto parse
)
if /i "%~1"=="--model" (
set MODEL_TYPE=%~2
shift
shift
goto parse
)
if /i "%~1"=="--symbol" (
set SYMBOL=%~2
shift
shift
goto parse
)
if /i "%~1"=="--epochs" (
set EPOCHS=%~2
shift
shift
goto parse
)
shift
goto parse
:endparse
echo Running Neural Network in %MODE% mode with %MODEL_TYPE% model for %SYMBOL% for %EPOCHS% epochs
python -m NN.main --mode %MODE% --symbol %SYMBOL% --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs %EPOCHS% --model-type %MODEL_TYPE% --framework pytorch
echo ============================================================
echo Run completed.
echo ============================================================