new nn wip
This commit is contained in:
parent
50eb50696b
commit
0042581275
1
.gitignore
vendored
1
.gitignore
vendored
@ -14,3 +14,4 @@ models/trading_agent_final.pt
|
|||||||
models/trading_agent_final.pt.backup
|
models/trading_agent_final.pt.backup
|
||||||
*.pt
|
*.pt
|
||||||
*.backup
|
*.backup
|
||||||
|
logs/
|
||||||
|
38
.vscode/tasks.json
vendored
Normal file
38
.vscode/tasks.json
vendored
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
{
|
||||||
|
"version": "2.0.0",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"label": "Start TensorBoard",
|
||||||
|
"type": "shell",
|
||||||
|
"command": "python",
|
||||||
|
"args": [
|
||||||
|
"-m",
|
||||||
|
"tensorboard.main",
|
||||||
|
"--logdir=NN/models/saved/logs",
|
||||||
|
"--port=6006",
|
||||||
|
"--host=localhost"
|
||||||
|
],
|
||||||
|
"isBackground": true,
|
||||||
|
"problemMatcher": {
|
||||||
|
"pattern": {
|
||||||
|
"regexp": "^.*$",
|
||||||
|
"file": 1,
|
||||||
|
"location": 2,
|
||||||
|
"message": 3
|
||||||
|
},
|
||||||
|
"background": {
|
||||||
|
"activeOnStart": true,
|
||||||
|
"beginsPattern": ".*TensorBoard.*",
|
||||||
|
"endsPattern": ".*TensorBoard.*"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"presentation": {
|
||||||
|
"reveal": "always",
|
||||||
|
"panel": "new"
|
||||||
|
},
|
||||||
|
"runOptions": {
|
||||||
|
"runOn": "folderOpen"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
BIN
NN/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
NN/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
NN/__pycache__/main.cpython-312.pyc
Normal file
BIN
NN/__pycache__/main.cpython-312.pyc
Normal file
Binary file not shown.
13
NN/_notes.md
Normal file
13
NN/_notes.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
great. realtime.py works. now let's examine and contunue with our 500m NN in a NN folder with different modules - first module will be around 100m Convolutional NN that is historically used for image recognition with great success by detecting features on multiple levels - deep NN. create the NN class and integrated RL pipeline that will use historical data to retrospectively identify buy/sell opportunities and use that to train the module. use the data from realtime.py (add easy to use realtime data interface if existing functions are not convenient enough)
|
||||||
|
create a new main file in the NN folder for our new MoE model. we'll use one main NN module that will orchestrate data flows. our CNN module should have training and inference pipelines implemented internally, but the orchestrator will get the realtime data and forward it. use a common interface. another module later will be Transformer module that will take as input raw data from the latest hidden layers of the CNN where high end features are learned as well as the output, which will be BUY/HOLD/SELL signals as well as key support/resistance trend lines
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Train a CNN model
|
||||||
|
python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --model-type cnn --epochs 100
|
||||||
|
|
||||||
|
# Make predictions with a trained model
|
||||||
|
python -m NN.main --mode predict --symbol BTC/USDT --timeframe 1h --model-type cnn
|
||||||
|
|
||||||
|
# Run real-time analysis
|
||||||
|
python -m NN.main --mode realtime --symbol BTC/USDT --timeframe 1h --inference-interval 60
|
265
NN/main.py
Normal file
265
NN/main.py
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Neural Network Trading System Main Module
|
||||||
|
|
||||||
|
This module serves as the main entry point for the NN trading system,
|
||||||
|
coordinating data flow between different components and implementing
|
||||||
|
training and inference pipelines.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(),
|
||||||
|
logging.FileHandler(os.path.join('logs', f'nn_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'))
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger('NN')
|
||||||
|
|
||||||
|
# Create logs directory if it doesn't exist
|
||||||
|
os.makedirs('logs', exist_ok=True)
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
"""Parse command line arguments"""
|
||||||
|
parser = argparse.ArgumentParser(description='Neural Network Trading System')
|
||||||
|
|
||||||
|
parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
|
||||||
|
help='Mode to run (train, predict, realtime)')
|
||||||
|
parser.add_argument('--symbol', type=str, default='BTC/USDT',
|
||||||
|
help='Trading pair symbol')
|
||||||
|
parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
|
||||||
|
help='Timeframes to use')
|
||||||
|
parser.add_argument('--window-size', type=int, default=20,
|
||||||
|
help='Window size for input data')
|
||||||
|
parser.add_argument('--output-size', type=int, default=3,
|
||||||
|
help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
|
||||||
|
parser.add_argument('--batch-size', type=int, default=32,
|
||||||
|
help='Batch size for training')
|
||||||
|
parser.add_argument('--epochs', type=int, default=100,
|
||||||
|
help='Number of epochs for training')
|
||||||
|
parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
|
||||||
|
help='Model type to use')
|
||||||
|
parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
|
||||||
|
help='Deep learning framework to use')
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point for the NN trading system"""
|
||||||
|
# Parse arguments
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
logger.info(f"Starting NN Trading System in {args.mode} mode")
|
||||||
|
logger.info(f"Configuration: Symbol={args.symbol}, Timeframes={args.timeframes}, "
|
||||||
|
f"Window Size={args.window_size}, Output Size={args.output_size}, "
|
||||||
|
f"Model Type={args.model_type}, Framework={args.framework}")
|
||||||
|
|
||||||
|
# Import the appropriate modules based on the framework
|
||||||
|
if args.framework == 'pytorch':
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
logger.info(f"Using PyTorch {torch.__version__}")
|
||||||
|
|
||||||
|
# Import PyTorch-based modules
|
||||||
|
from NN.utils.data_interface import DataInterface
|
||||||
|
|
||||||
|
if args.model_type == 'cnn':
|
||||||
|
from NN.models.cnn_model_pytorch import CNNModelPyTorch as Model
|
||||||
|
elif args.model_type == 'transformer':
|
||||||
|
from NN.models.transformer_model_pytorch import TransformerModelPyTorchWrapper as Model
|
||||||
|
elif args.model_type == 'moe':
|
||||||
|
from NN.models.transformer_model_pytorch import MixtureOfExpertsModelPyTorch as Model
|
||||||
|
else:
|
||||||
|
logger.error(f"Unknown model type: {args.model_type}")
|
||||||
|
return
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
logger.error(f"Failed to import PyTorch modules: {str(e)}")
|
||||||
|
logger.error("Please make sure PyTorch is installed or use the TensorFlow framework.")
|
||||||
|
return
|
||||||
|
|
||||||
|
elif args.framework == 'tensorflow':
|
||||||
|
try:
|
||||||
|
import tensorflow as tf
|
||||||
|
logger.info(f"Using TensorFlow {tf.__version__}")
|
||||||
|
|
||||||
|
# Import TensorFlow-based modules
|
||||||
|
from NN.utils.data_interface import DataInterface
|
||||||
|
|
||||||
|
if args.model_type == 'cnn':
|
||||||
|
from NN.models.cnn_model import CNNModel as Model
|
||||||
|
elif args.model_type == 'transformer':
|
||||||
|
from NN.models.transformer_model import TransformerModel as Model
|
||||||
|
elif args.model_type == 'moe':
|
||||||
|
from NN.models.transformer_model import MixtureOfExpertsModel as Model
|
||||||
|
else:
|
||||||
|
logger.error(f"Unknown model type: {args.model_type}")
|
||||||
|
return
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
logger.error(f"Failed to import TensorFlow modules: {str(e)}")
|
||||||
|
logger.error("Please make sure TensorFlow is installed or use the PyTorch framework.")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
logger.error(f"Unknown framework: {args.framework}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Initialize data interface
|
||||||
|
try:
|
||||||
|
logger.info("Initializing data interface...")
|
||||||
|
data_interface = DataInterface(
|
||||||
|
symbol=args.symbol,
|
||||||
|
timeframes=args.timeframes,
|
||||||
|
window_size=args.window_size,
|
||||||
|
output_size=args.output_size
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to initialize data interface: {str(e)}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Initialize model
|
||||||
|
try:
|
||||||
|
logger.info(f"Initializing {args.model_type.upper()} model...")
|
||||||
|
model = Model(
|
||||||
|
window_size=args.window_size,
|
||||||
|
num_features=data_interface.get_feature_count(),
|
||||||
|
output_size=args.output_size,
|
||||||
|
timeframes=args.timeframes
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to initialize model: {str(e)}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Execute the requested mode
|
||||||
|
if args.mode == 'train':
|
||||||
|
train(data_interface, model, args)
|
||||||
|
elif args.mode == 'predict':
|
||||||
|
predict(data_interface, model, args)
|
||||||
|
elif args.mode == 'realtime':
|
||||||
|
realtime(data_interface, model, args)
|
||||||
|
else:
|
||||||
|
logger.error(f"Unknown mode: {args.mode}")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info("Neural Network Trading System finished successfully")
|
||||||
|
|
||||||
|
def train(data_interface, model, args):
|
||||||
|
"""Train the model using the data interface"""
|
||||||
|
logger.info("Starting training mode...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Prepare training data
|
||||||
|
logger.info("Preparing training data...")
|
||||||
|
X_train, y_train, X_val, y_val = data_interface.prepare_training_data()
|
||||||
|
|
||||||
|
# Train the model
|
||||||
|
logger.info("Training model...")
|
||||||
|
model.train(
|
||||||
|
X_train, y_train,
|
||||||
|
X_val, y_val,
|
||||||
|
batch_size=args.batch_size,
|
||||||
|
epochs=args.epochs
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save the model
|
||||||
|
model_path = os.path.join(
|
||||||
|
'models',
|
||||||
|
f"{args.model_type}_{args.symbol.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||||
|
)
|
||||||
|
logger.info(f"Saving model to {model_path}...")
|
||||||
|
model.save(model_path)
|
||||||
|
|
||||||
|
# Evaluate the model
|
||||||
|
logger.info("Evaluating model...")
|
||||||
|
metrics = model.evaluate(X_val, y_val)
|
||||||
|
logger.info(f"Evaluation metrics: {metrics}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in training mode: {str(e)}")
|
||||||
|
return
|
||||||
|
|
||||||
|
def predict(data_interface, model, args):
|
||||||
|
"""Make predictions using the trained model"""
|
||||||
|
logger.info("Starting prediction mode...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Load the latest model
|
||||||
|
model_dir = os.path.join('models')
|
||||||
|
model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
|
||||||
|
|
||||||
|
if not model_files:
|
||||||
|
logger.error(f"No saved model found for type {args.model_type}")
|
||||||
|
return
|
||||||
|
|
||||||
|
latest_model = sorted(model_files)[-1]
|
||||||
|
model_path = os.path.join(model_dir, latest_model)
|
||||||
|
|
||||||
|
logger.info(f"Loading model from {model_path}...")
|
||||||
|
model.load(model_path)
|
||||||
|
|
||||||
|
# Prepare prediction data
|
||||||
|
logger.info("Preparing prediction data...")
|
||||||
|
X_pred = data_interface.prepare_prediction_data()
|
||||||
|
|
||||||
|
# Make predictions
|
||||||
|
logger.info("Making predictions...")
|
||||||
|
predictions = model.predict(X_pred)
|
||||||
|
|
||||||
|
# Process and display predictions
|
||||||
|
logger.info("Processing predictions...")
|
||||||
|
data_interface.process_predictions(predictions)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in prediction mode: {str(e)}")
|
||||||
|
return
|
||||||
|
|
||||||
|
def realtime(data_interface, model, args):
|
||||||
|
"""Run the model in real-time mode"""
|
||||||
|
logger.info("Starting real-time mode...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Import realtime module
|
||||||
|
from NN.realtime import RealtimeAnalyzer
|
||||||
|
|
||||||
|
# Load the latest model
|
||||||
|
model_dir = os.path.join('models')
|
||||||
|
model_files = [f for f in os.listdir(model_dir) if f.startswith(args.model_type)]
|
||||||
|
|
||||||
|
if not model_files:
|
||||||
|
logger.error(f"No saved model found for type {args.model_type}")
|
||||||
|
return
|
||||||
|
|
||||||
|
latest_model = sorted(model_files)[-1]
|
||||||
|
model_path = os.path.join(model_dir, latest_model)
|
||||||
|
|
||||||
|
logger.info(f"Loading model from {model_path}...")
|
||||||
|
model.load(model_path)
|
||||||
|
|
||||||
|
# Initialize realtime analyzer
|
||||||
|
logger.info("Initializing real-time analyzer...")
|
||||||
|
realtime_analyzer = RealtimeAnalyzer(
|
||||||
|
data_interface=data_interface,
|
||||||
|
model=model,
|
||||||
|
symbol=args.symbol,
|
||||||
|
timeframes=args.timeframes
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start real-time analysis
|
||||||
|
logger.info("Starting real-time analysis...")
|
||||||
|
realtime_analyzer.start()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in real-time mode: {str(e)}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
560
NN/models/cnn_model.py
Normal file
560
NN/models/cnn_model.py
Normal file
@ -0,0 +1,560 @@
|
|||||||
|
"""
|
||||||
|
Convolutional Neural Network for timeseries analysis
|
||||||
|
|
||||||
|
This module implements a deep CNN model for cryptocurrency price analysis.
|
||||||
|
The model uses multiple parallel convolutional pathways and LSTM layers
|
||||||
|
to detect patterns at different time scales.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.models import Model, load_model
|
||||||
|
from tensorflow.keras.layers import (
|
||||||
|
Input, Conv1D, MaxPooling1D, Dense, Dropout, BatchNormalization,
|
||||||
|
LSTM, Bidirectional, Flatten, Concatenate, GlobalAveragePooling1D,
|
||||||
|
LeakyReLU, Attention
|
||||||
|
)
|
||||||
|
from tensorflow.keras.optimizers import Adam
|
||||||
|
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
||||||
|
from tensorflow.keras.metrics import AUC
|
||||||
|
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class CNNModel:
|
||||||
|
"""
|
||||||
|
Convolutional Neural Network for time series analysis.
|
||||||
|
|
||||||
|
This model uses a multi-pathway architecture with different filter sizes
|
||||||
|
to detect patterns at different time scales, combined with LSTM layers
|
||||||
|
for temporal dependencies.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, input_shape=(20, 5), output_size=1, model_dir="NN/models/saved"):
|
||||||
|
"""
|
||||||
|
Initialize the CNN model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_shape (tuple): Shape of input data (sequence_length, features)
|
||||||
|
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
|
||||||
|
model_dir (str): Directory to save trained models
|
||||||
|
"""
|
||||||
|
self.input_shape = input_shape
|
||||||
|
self.output_size = output_size
|
||||||
|
self.model_dir = model_dir
|
||||||
|
self.model = None
|
||||||
|
self.history = None
|
||||||
|
|
||||||
|
# Create model directory if it doesn't exist
|
||||||
|
os.makedirs(self.model_dir, exist_ok=True)
|
||||||
|
|
||||||
|
logger.info(f"Initialized CNN model with input shape {input_shape} and output size {output_size}")
|
||||||
|
|
||||||
|
def build_model(self, filters=(32, 64, 128), kernel_sizes=(3, 5, 7),
|
||||||
|
dropout_rate=0.3, learning_rate=0.001):
|
||||||
|
"""
|
||||||
|
Build the CNN model architecture.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filters (tuple): Number of filters for each convolutional pathway
|
||||||
|
kernel_sizes (tuple): Kernel sizes for each convolutional pathway
|
||||||
|
dropout_rate (float): Dropout rate for regularization
|
||||||
|
learning_rate (float): Learning rate for Adam optimizer
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The compiled model
|
||||||
|
"""
|
||||||
|
# Input layer
|
||||||
|
inputs = Input(shape=self.input_shape)
|
||||||
|
|
||||||
|
# Multiple parallel convolutional pathways with different kernel sizes
|
||||||
|
# to capture patterns at different time scales
|
||||||
|
conv_layers = []
|
||||||
|
|
||||||
|
for i, (filter_size, kernel_size) in enumerate(zip(filters, kernel_sizes)):
|
||||||
|
conv_path = Conv1D(
|
||||||
|
filters=filter_size,
|
||||||
|
kernel_size=kernel_size,
|
||||||
|
padding='same',
|
||||||
|
name=f'conv1d_{i+1}'
|
||||||
|
)(inputs)
|
||||||
|
conv_path = BatchNormalization()(conv_path)
|
||||||
|
conv_path = LeakyReLU(alpha=0.1)(conv_path)
|
||||||
|
conv_path = MaxPooling1D(pool_size=2, padding='same')(conv_path)
|
||||||
|
conv_path = Dropout(dropout_rate)(conv_path)
|
||||||
|
conv_layers.append(conv_path)
|
||||||
|
|
||||||
|
# Merge convolutional pathways
|
||||||
|
if len(conv_layers) > 1:
|
||||||
|
merged = Concatenate()(conv_layers)
|
||||||
|
else:
|
||||||
|
merged = conv_layers[0]
|
||||||
|
|
||||||
|
# Add another Conv1D layer after merging
|
||||||
|
x = Conv1D(filters=filters[-1], kernel_size=3, padding='same')(merged)
|
||||||
|
x = BatchNormalization()(x)
|
||||||
|
x = LeakyReLU(alpha=0.1)(x)
|
||||||
|
x = MaxPooling1D(pool_size=2, padding='same')(x)
|
||||||
|
x = Dropout(dropout_rate)(x)
|
||||||
|
|
||||||
|
# Bidirectional LSTM for temporal dependencies
|
||||||
|
x = Bidirectional(LSTM(128, return_sequences=True))(x)
|
||||||
|
x = Dropout(dropout_rate)(x)
|
||||||
|
|
||||||
|
# Attention mechanism to focus on important time steps
|
||||||
|
x = Bidirectional(LSTM(64, return_sequences=True))(x)
|
||||||
|
|
||||||
|
# Global average pooling to reduce parameters
|
||||||
|
x = GlobalAveragePooling1D()(x)
|
||||||
|
x = Dropout(dropout_rate)(x)
|
||||||
|
|
||||||
|
# Dense layers for final classification/regression
|
||||||
|
x = Dense(64, activation='relu')(x)
|
||||||
|
x = BatchNormalization()(x)
|
||||||
|
x = Dropout(dropout_rate)(x)
|
||||||
|
|
||||||
|
# Output layer
|
||||||
|
if self.output_size == 1:
|
||||||
|
# Binary classification (up/down)
|
||||||
|
outputs = Dense(1, activation='sigmoid', name='output')(x)
|
||||||
|
loss = 'binary_crossentropy'
|
||||||
|
metrics = ['accuracy', AUC()]
|
||||||
|
elif self.output_size == 3:
|
||||||
|
# Multi-class classification (buy/hold/sell)
|
||||||
|
outputs = Dense(3, activation='softmax', name='output')(x)
|
||||||
|
loss = 'categorical_crossentropy'
|
||||||
|
metrics = ['accuracy']
|
||||||
|
else:
|
||||||
|
# Regression
|
||||||
|
outputs = Dense(self.output_size, activation='linear', name='output')(x)
|
||||||
|
loss = 'mse'
|
||||||
|
metrics = ['mae']
|
||||||
|
|
||||||
|
# Create and compile model
|
||||||
|
self.model = Model(inputs=inputs, outputs=outputs)
|
||||||
|
|
||||||
|
# Compile with Adam optimizer
|
||||||
|
self.model.compile(
|
||||||
|
optimizer=Adam(learning_rate=learning_rate),
|
||||||
|
loss=loss,
|
||||||
|
metrics=metrics
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log model summary
|
||||||
|
self.model.summary(print_fn=lambda x: logger.info(x))
|
||||||
|
|
||||||
|
return self.model
|
||||||
|
|
||||||
|
def train(self, X_train, y_train, batch_size=32, epochs=100, validation_split=0.2,
|
||||||
|
callbacks=None, class_weights=None):
|
||||||
|
"""
|
||||||
|
Train the CNN model on the provided data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X_train (numpy.ndarray): Training features
|
||||||
|
y_train (numpy.ndarray): Training targets
|
||||||
|
batch_size (int): Batch size
|
||||||
|
epochs (int): Number of epochs
|
||||||
|
validation_split (float): Fraction of data to use for validation
|
||||||
|
callbacks (list): List of Keras callbacks
|
||||||
|
class_weights (dict): Class weights for imbalanced datasets
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
History object containing training metrics
|
||||||
|
"""
|
||||||
|
if self.model is None:
|
||||||
|
self.build_model()
|
||||||
|
|
||||||
|
# Default callbacks if none provided
|
||||||
|
if callbacks is None:
|
||||||
|
# Create a timestamp for model checkpoints
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
|
||||||
|
callbacks = [
|
||||||
|
EarlyStopping(
|
||||||
|
monitor='val_loss',
|
||||||
|
patience=10,
|
||||||
|
restore_best_weights=True
|
||||||
|
),
|
||||||
|
ReduceLROnPlateau(
|
||||||
|
monitor='val_loss',
|
||||||
|
factor=0.5,
|
||||||
|
patience=5,
|
||||||
|
min_lr=1e-6
|
||||||
|
),
|
||||||
|
ModelCheckpoint(
|
||||||
|
filepath=os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5"),
|
||||||
|
monitor='val_loss',
|
||||||
|
save_best_only=True
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Check if y_train needs to be one-hot encoded for multi-class
|
||||||
|
if self.output_size == 3 and len(y_train.shape) == 1:
|
||||||
|
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
|
||||||
|
|
||||||
|
# Train the model
|
||||||
|
logger.info(f"Training CNN model with {len(X_train)} samples, batch size {batch_size}, epochs {epochs}")
|
||||||
|
self.history = self.model.fit(
|
||||||
|
X_train, y_train,
|
||||||
|
batch_size=batch_size,
|
||||||
|
epochs=epochs,
|
||||||
|
validation_split=validation_split,
|
||||||
|
callbacks=callbacks,
|
||||||
|
class_weight=class_weights,
|
||||||
|
verbose=2
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save the trained model
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
model_path = os.path.join(self.model_dir, f"cnn_model_final_{timestamp}.h5")
|
||||||
|
self.model.save(model_path)
|
||||||
|
logger.info(f"Model saved to {model_path}")
|
||||||
|
|
||||||
|
# Save training history
|
||||||
|
history_path = os.path.join(self.model_dir, f"cnn_model_history_{timestamp}.json")
|
||||||
|
with open(history_path, 'w') as f:
|
||||||
|
# Convert numpy values to Python native types for JSON serialization
|
||||||
|
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
|
||||||
|
json.dump(history_dict, f, indent=2)
|
||||||
|
|
||||||
|
return self.history
|
||||||
|
|
||||||
|
def evaluate(self, X_test, y_test, plot_results=False):
|
||||||
|
"""
|
||||||
|
Evaluate the model on test data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X_test (numpy.ndarray): Test features
|
||||||
|
y_test (numpy.ndarray): Test targets
|
||||||
|
plot_results (bool): Whether to plot evaluation results
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Evaluation metrics
|
||||||
|
"""
|
||||||
|
if self.model is None:
|
||||||
|
raise ValueError("Model has not been built or trained yet")
|
||||||
|
|
||||||
|
# Convert y_test to one-hot encoding for multi-class
|
||||||
|
y_test_original = y_test.copy()
|
||||||
|
if self.output_size == 3 and len(y_test.shape) == 1:
|
||||||
|
y_test = tf.keras.utils.to_categorical(y_test, num_classes=3)
|
||||||
|
|
||||||
|
# Evaluate model
|
||||||
|
logger.info(f"Evaluating CNN model on {len(X_test)} samples")
|
||||||
|
eval_results = self.model.evaluate(X_test, y_test, verbose=0)
|
||||||
|
|
||||||
|
metrics = {}
|
||||||
|
for metric, value in zip(self.model.metrics_names, eval_results):
|
||||||
|
metrics[metric] = value
|
||||||
|
logger.info(f"{metric}: {value:.4f}")
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
y_pred_prob = self.model.predict(X_test)
|
||||||
|
|
||||||
|
# Different processing based on output type
|
||||||
|
if self.output_size == 1:
|
||||||
|
# Binary classification
|
||||||
|
y_pred = (y_pred_prob > 0.5).astype(int).flatten()
|
||||||
|
|
||||||
|
# Classification report
|
||||||
|
report = classification_report(y_test, y_pred)
|
||||||
|
logger.info(f"Classification Report:\n{report}")
|
||||||
|
|
||||||
|
# Confusion matrix
|
||||||
|
cm = confusion_matrix(y_test, y_pred)
|
||||||
|
logger.info(f"Confusion Matrix:\n{cm}")
|
||||||
|
|
||||||
|
# ROC curve and AUC
|
||||||
|
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
|
||||||
|
roc_auc = auc(fpr, tpr)
|
||||||
|
metrics['auc'] = roc_auc
|
||||||
|
|
||||||
|
if plot_results:
|
||||||
|
self._plot_binary_results(y_test, y_pred, y_pred_prob, fpr, tpr, roc_auc)
|
||||||
|
|
||||||
|
elif self.output_size == 3:
|
||||||
|
# Multi-class classification
|
||||||
|
y_pred = np.argmax(y_pred_prob, axis=1)
|
||||||
|
|
||||||
|
# Classification report
|
||||||
|
report = classification_report(y_test_original, y_pred)
|
||||||
|
logger.info(f"Classification Report:\n{report}")
|
||||||
|
|
||||||
|
# Confusion matrix
|
||||||
|
cm = confusion_matrix(y_test_original, y_pred)
|
||||||
|
logger.info(f"Confusion Matrix:\n{cm}")
|
||||||
|
|
||||||
|
if plot_results:
|
||||||
|
self._plot_multiclass_results(y_test_original, y_pred, y_pred_prob)
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
"""
|
||||||
|
Make predictions on new data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X (numpy.ndarray): Input features
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (y_pred, y_proba) where:
|
||||||
|
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
|
||||||
|
y_proba is the class probability
|
||||||
|
"""
|
||||||
|
if self.model is None:
|
||||||
|
raise ValueError("Model has not been built or trained yet")
|
||||||
|
|
||||||
|
# Ensure X has the right shape
|
||||||
|
if len(X.shape) == 2:
|
||||||
|
# Single sample, add batch dimension
|
||||||
|
X = np.expand_dims(X, axis=0)
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
y_proba = self.model.predict(X)
|
||||||
|
|
||||||
|
# Process based on output type
|
||||||
|
if self.output_size == 1:
|
||||||
|
# Binary classification
|
||||||
|
y_pred = (y_proba > 0.5).astype(int).flatten()
|
||||||
|
return y_pred, y_proba.flatten()
|
||||||
|
elif self.output_size == 3:
|
||||||
|
# Multi-class classification
|
||||||
|
y_pred = np.argmax(y_proba, axis=1)
|
||||||
|
return y_pred, y_proba
|
||||||
|
else:
|
||||||
|
# Regression
|
||||||
|
return y_proba, y_proba
|
||||||
|
|
||||||
|
def save(self, filepath=None):
|
||||||
|
"""
|
||||||
|
Save the model to disk.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath (str): Path to save the model
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Path where the model was saved
|
||||||
|
"""
|
||||||
|
if self.model is None:
|
||||||
|
raise ValueError("Model has not been built yet")
|
||||||
|
|
||||||
|
if filepath is None:
|
||||||
|
# Create a default filepath with timestamp
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
filepath = os.path.join(self.model_dir, f"cnn_model_{timestamp}.h5")
|
||||||
|
|
||||||
|
self.model.save(filepath)
|
||||||
|
logger.info(f"Model saved to {filepath}")
|
||||||
|
return filepath
|
||||||
|
|
||||||
|
def load(self, filepath):
|
||||||
|
"""
|
||||||
|
Load a saved model from disk.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath (str): Path to the saved model
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The loaded model
|
||||||
|
"""
|
||||||
|
self.model = load_model(filepath)
|
||||||
|
logger.info(f"Model loaded from {filepath}")
|
||||||
|
return self.model
|
||||||
|
|
||||||
|
def extract_hidden_features(self, X):
|
||||||
|
"""
|
||||||
|
Extract features from the last hidden layer of the CNN for transfer learning.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X (numpy.ndarray): Input data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
numpy.ndarray: Extracted features
|
||||||
|
"""
|
||||||
|
if self.model is None:
|
||||||
|
raise ValueError("Model has not been built or trained yet")
|
||||||
|
|
||||||
|
# Create a new model that outputs the features from the layer before the output
|
||||||
|
feature_layer_name = self.model.layers[-2].name
|
||||||
|
feature_extractor = Model(
|
||||||
|
inputs=self.model.input,
|
||||||
|
outputs=self.model.get_layer(feature_layer_name).output
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract features
|
||||||
|
features = feature_extractor.predict(X)
|
||||||
|
|
||||||
|
return features
|
||||||
|
|
||||||
|
def _plot_binary_results(self, y_true, y_pred, y_proba, fpr, tpr, roc_auc):
|
||||||
|
"""
|
||||||
|
Plot evaluation results for binary classification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
y_true (numpy.ndarray): True labels
|
||||||
|
y_pred (numpy.ndarray): Predicted labels
|
||||||
|
y_proba (numpy.ndarray): Prediction probabilities
|
||||||
|
fpr (numpy.ndarray): False positive rates for ROC curve
|
||||||
|
tpr (numpy.ndarray): True positive rates for ROC curve
|
||||||
|
roc_auc (float): Area under ROC curve
|
||||||
|
"""
|
||||||
|
plt.figure(figsize=(15, 5))
|
||||||
|
|
||||||
|
# Confusion Matrix
|
||||||
|
plt.subplot(1, 3, 1)
|
||||||
|
cm = confusion_matrix(y_true, y_pred)
|
||||||
|
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
|
||||||
|
plt.title('Confusion Matrix')
|
||||||
|
plt.colorbar()
|
||||||
|
tick_marks = [0, 1]
|
||||||
|
plt.xticks(tick_marks, ['0', '1'])
|
||||||
|
plt.yticks(tick_marks, ['0', '1'])
|
||||||
|
plt.xlabel('Predicted Label')
|
||||||
|
plt.ylabel('True Label')
|
||||||
|
|
||||||
|
# Add text annotations to confusion matrix
|
||||||
|
thresh = cm.max() / 2.
|
||||||
|
for i in range(cm.shape[0]):
|
||||||
|
for j in range(cm.shape[1]):
|
||||||
|
plt.text(j, i, format(cm[i, j], 'd'),
|
||||||
|
horizontalalignment="center",
|
||||||
|
color="white" if cm[i, j] > thresh else "black")
|
||||||
|
|
||||||
|
# Histogram of prediction probabilities
|
||||||
|
plt.subplot(1, 3, 2)
|
||||||
|
plt.hist(y_proba[y_true == 0], alpha=0.5, label='Class 0')
|
||||||
|
plt.hist(y_proba[y_true == 1], alpha=0.5, label='Class 1')
|
||||||
|
plt.title('Prediction Probabilities')
|
||||||
|
plt.xlabel('Probability of Class 1')
|
||||||
|
plt.ylabel('Count')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
# ROC Curve
|
||||||
|
plt.subplot(1, 3, 3)
|
||||||
|
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.3f})')
|
||||||
|
plt.plot([0, 1], [0, 1], 'k--')
|
||||||
|
plt.xlim([0.0, 1.0])
|
||||||
|
plt.ylim([0.0, 1.05])
|
||||||
|
plt.xlabel('False Positive Rate')
|
||||||
|
plt.ylabel('True Positive Rate')
|
||||||
|
plt.title('Receiver Operating Characteristic')
|
||||||
|
plt.legend(loc="lower right")
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save figure
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
fig_path = os.path.join(self.model_dir, f"cnn_evaluation_{timestamp}.png")
|
||||||
|
plt.savefig(fig_path)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
logger.info(f"Evaluation plots saved to {fig_path}")
|
||||||
|
|
||||||
|
def _plot_multiclass_results(self, y_true, y_pred, y_proba):
|
||||||
|
"""
|
||||||
|
Plot evaluation results for multi-class classification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
y_true (numpy.ndarray): True labels
|
||||||
|
y_pred (numpy.ndarray): Predicted labels
|
||||||
|
y_proba (numpy.ndarray): Prediction probabilities
|
||||||
|
"""
|
||||||
|
plt.figure(figsize=(12, 5))
|
||||||
|
|
||||||
|
# Confusion Matrix
|
||||||
|
plt.subplot(1, 2, 1)
|
||||||
|
cm = confusion_matrix(y_true, y_pred)
|
||||||
|
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
|
||||||
|
plt.title('Confusion Matrix')
|
||||||
|
plt.colorbar()
|
||||||
|
classes = ['BUY', 'HOLD', 'SELL'] # Assumes classes are 0, 1, 2
|
||||||
|
tick_marks = np.arange(len(classes))
|
||||||
|
plt.xticks(tick_marks, classes)
|
||||||
|
plt.yticks(tick_marks, classes)
|
||||||
|
plt.xlabel('Predicted Label')
|
||||||
|
plt.ylabel('True Label')
|
||||||
|
|
||||||
|
# Add text annotations to confusion matrix
|
||||||
|
thresh = cm.max() / 2.
|
||||||
|
for i in range(cm.shape[0]):
|
||||||
|
for j in range(cm.shape[1]):
|
||||||
|
plt.text(j, i, format(cm[i, j], 'd'),
|
||||||
|
horizontalalignment="center",
|
||||||
|
color="white" if cm[i, j] > thresh else "black")
|
||||||
|
|
||||||
|
# Class probability distributions
|
||||||
|
plt.subplot(1, 2, 2)
|
||||||
|
for i, cls in enumerate(classes):
|
||||||
|
plt.hist(y_proba[y_true == i, i], alpha=0.5, label=f'Class {cls}')
|
||||||
|
plt.title('Class Probability Distributions')
|
||||||
|
plt.xlabel('Probability')
|
||||||
|
plt.ylabel('Count')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save figure
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
fig_path = os.path.join(self.model_dir, f"cnn_multiclass_evaluation_{timestamp}.png")
|
||||||
|
plt.savefig(fig_path)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
logger.info(f"Multiclass evaluation plots saved to {fig_path}")
|
||||||
|
|
||||||
|
def plot_training_history(self):
|
||||||
|
"""
|
||||||
|
Plot training history (loss and metrics).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Path to the saved plot
|
||||||
|
"""
|
||||||
|
if self.history is None:
|
||||||
|
raise ValueError("Model has not been trained yet")
|
||||||
|
|
||||||
|
plt.figure(figsize=(12, 5))
|
||||||
|
|
||||||
|
# Plot loss
|
||||||
|
plt.subplot(1, 2, 1)
|
||||||
|
plt.plot(self.history.history['loss'], label='Training Loss')
|
||||||
|
if 'val_loss' in self.history.history:
|
||||||
|
plt.plot(self.history.history['val_loss'], label='Validation Loss')
|
||||||
|
plt.title('Model Loss')
|
||||||
|
plt.xlabel('Epoch')
|
||||||
|
plt.ylabel('Loss')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
# Plot accuracy
|
||||||
|
plt.subplot(1, 2, 2)
|
||||||
|
|
||||||
|
if 'accuracy' in self.history.history:
|
||||||
|
plt.plot(self.history.history['accuracy'], label='Training Accuracy')
|
||||||
|
if 'val_accuracy' in self.history.history:
|
||||||
|
plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
|
||||||
|
plt.title('Model Accuracy')
|
||||||
|
plt.ylabel('Accuracy')
|
||||||
|
elif 'mae' in self.history.history:
|
||||||
|
plt.plot(self.history.history['mae'], label='Training MAE')
|
||||||
|
if 'val_mae' in self.history.history:
|
||||||
|
plt.plot(self.history.history['val_mae'], label='Validation MAE')
|
||||||
|
plt.title('Model MAE')
|
||||||
|
plt.ylabel('MAE')
|
||||||
|
|
||||||
|
plt.xlabel('Epoch')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save figure
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
fig_path = os.path.join(self.model_dir, f"cnn_training_history_{timestamp}.png")
|
||||||
|
plt.savefig(fig_path)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
logger.info(f"Training history plot saved to {fig_path}")
|
||||||
|
return fig_path
|
546
NN/models/cnn_model_pytorch.py
Normal file
546
NN/models/cnn_model_pytorch.py
Normal file
@ -0,0 +1,546 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
CNN Model - PyTorch Implementation
|
||||||
|
|
||||||
|
This module implements a CNN model using PyTorch for time series analysis.
|
||||||
|
The model consists of multiple convolutional pathways and LSTM layers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, TensorDataset
|
||||||
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class CNNPyTorch(nn.Module):
|
||||||
|
"""PyTorch CNN model for time series analysis"""
|
||||||
|
|
||||||
|
def __init__(self, input_shape, output_size=3):
|
||||||
|
"""
|
||||||
|
Initialize the CNN model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_shape (tuple): Shape of input data (window_size, features)
|
||||||
|
output_size (int): Size of output (1 for regression, 3 for classification)
|
||||||
|
"""
|
||||||
|
super(CNNPyTorch, self).__init__()
|
||||||
|
|
||||||
|
window_size, num_features = input_shape
|
||||||
|
|
||||||
|
# Architecture parameters
|
||||||
|
filters = [32, 64, 128]
|
||||||
|
kernel_sizes = [3, 5, 7]
|
||||||
|
lstm_units = 100
|
||||||
|
dense_units = 64
|
||||||
|
dropout_rate = 0.3
|
||||||
|
|
||||||
|
# Create parallel convolutional pathways
|
||||||
|
self.conv_paths = nn.ModuleList()
|
||||||
|
|
||||||
|
for f, k in zip(filters, kernel_sizes):
|
||||||
|
path = nn.Sequential(
|
||||||
|
nn.Conv1d(num_features, f, kernel_size=k, padding='same'),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.BatchNorm1d(f),
|
||||||
|
nn.MaxPool1d(kernel_size=2, stride=1, padding=1),
|
||||||
|
nn.Dropout(dropout_rate)
|
||||||
|
)
|
||||||
|
self.conv_paths.append(path)
|
||||||
|
|
||||||
|
# Calculate output size from conv paths
|
||||||
|
conv_output_size = sum(filters) * window_size
|
||||||
|
|
||||||
|
# LSTM layer
|
||||||
|
self.lstm = nn.LSTM(
|
||||||
|
input_size=sum(filters),
|
||||||
|
hidden_size=lstm_units,
|
||||||
|
batch_first=True,
|
||||||
|
bidirectional=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Dense layers
|
||||||
|
self.flatten = nn.Flatten()
|
||||||
|
self.dense1 = nn.Sequential(
|
||||||
|
nn.Linear(lstm_units * 2 * window_size, dense_units),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.BatchNorm1d(dense_units),
|
||||||
|
nn.Dropout(dropout_rate)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Output layer
|
||||||
|
self.output = nn.Linear(dense_units, output_size)
|
||||||
|
|
||||||
|
# Activation based on output size
|
||||||
|
if output_size == 1:
|
||||||
|
self.activation = nn.Sigmoid() # Binary classification or regression
|
||||||
|
elif output_size > 1:
|
||||||
|
self.activation = nn.Softmax(dim=1) # Multi-class classification
|
||||||
|
else:
|
||||||
|
self.activation = nn.Identity() # No activation
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""
|
||||||
|
Forward pass through the network.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: Input tensor of shape [batch_size, window_size, features]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Output tensor of shape [batch_size, output_size]
|
||||||
|
"""
|
||||||
|
batch_size, window_size, num_features = x.shape
|
||||||
|
|
||||||
|
# Transpose for conv1d: [batch, features, window]
|
||||||
|
x_t = x.transpose(1, 2)
|
||||||
|
|
||||||
|
# Process through parallel conv paths
|
||||||
|
conv_outputs = []
|
||||||
|
for path in self.conv_paths:
|
||||||
|
conv_outputs.append(path(x_t))
|
||||||
|
|
||||||
|
# Concatenate conv outputs
|
||||||
|
conv_concat = torch.cat(conv_outputs, dim=1)
|
||||||
|
|
||||||
|
# Transpose back for LSTM: [batch, window, features]
|
||||||
|
conv_concat = conv_concat.transpose(1, 2)
|
||||||
|
|
||||||
|
# LSTM processing
|
||||||
|
lstm_out, _ = self.lstm(conv_concat)
|
||||||
|
|
||||||
|
# Flatten
|
||||||
|
flattened = self.flatten(lstm_out)
|
||||||
|
|
||||||
|
# Dense processing
|
||||||
|
dense_out = self.dense1(flattened)
|
||||||
|
|
||||||
|
# Output
|
||||||
|
output = self.output(dense_out)
|
||||||
|
|
||||||
|
# Apply activation
|
||||||
|
return self.activation(output)
|
||||||
|
|
||||||
|
|
||||||
|
class CNNModelPyTorch:
|
||||||
|
"""
|
||||||
|
CNN model wrapper class for time series analysis using PyTorch.
|
||||||
|
|
||||||
|
This class provides methods for building, training, evaluating, and making
|
||||||
|
predictions with the CNN model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
|
||||||
|
"""
|
||||||
|
Initialize the CNN model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
window_size (int): Size of the input window
|
||||||
|
num_features (int): Number of features in the input data
|
||||||
|
output_size (int): Size of the output (1 for regression, 3 for classification)
|
||||||
|
timeframes (list): List of timeframes used (for logging)
|
||||||
|
"""
|
||||||
|
self.window_size = window_size
|
||||||
|
self.num_features = num_features
|
||||||
|
self.output_size = output_size
|
||||||
|
self.timeframes = timeframes or []
|
||||||
|
|
||||||
|
# Determine device (GPU or CPU)
|
||||||
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
logger.info(f"Using device: {self.device}")
|
||||||
|
|
||||||
|
# Initialize model
|
||||||
|
self.model = None
|
||||||
|
self.build_model()
|
||||||
|
|
||||||
|
# Initialize training history
|
||||||
|
self.history = {
|
||||||
|
'loss': [],
|
||||||
|
'val_loss': [],
|
||||||
|
'accuracy': [],
|
||||||
|
'val_accuracy': []
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_model(self):
|
||||||
|
"""Build the CNN model architecture"""
|
||||||
|
logger.info(f"Building PyTorch CNN model with window_size={self.window_size}, "
|
||||||
|
f"num_features={self.num_features}, output_size={self.output_size}")
|
||||||
|
|
||||||
|
self.model = CNNPyTorch(
|
||||||
|
input_shape=(self.window_size, self.num_features),
|
||||||
|
output_size=self.output_size
|
||||||
|
).to(self.device)
|
||||||
|
|
||||||
|
# Initialize optimizer
|
||||||
|
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
|
||||||
|
|
||||||
|
# Initialize loss function based on output size
|
||||||
|
if self.output_size == 1:
|
||||||
|
self.criterion = nn.BCELoss() # Binary classification
|
||||||
|
elif self.output_size > 1:
|
||||||
|
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
|
||||||
|
else:
|
||||||
|
self.criterion = nn.MSELoss() # Regression
|
||||||
|
|
||||||
|
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
|
||||||
|
|
||||||
|
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
|
||||||
|
"""
|
||||||
|
Train the CNN model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X_train: Training input data
|
||||||
|
y_train: Training target data
|
||||||
|
X_val: Validation input data
|
||||||
|
y_val: Validation target data
|
||||||
|
batch_size: Batch size for training
|
||||||
|
epochs: Number of training epochs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Training history
|
||||||
|
"""
|
||||||
|
logger.info(f"Training PyTorch CNN model with {len(X_train)} samples, "
|
||||||
|
f"batch_size={batch_size}, epochs={epochs}")
|
||||||
|
|
||||||
|
# Convert numpy arrays to PyTorch tensors
|
||||||
|
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
|
# Handle different output sizes for y_train
|
||||||
|
if self.output_size == 1:
|
||||||
|
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
|
||||||
|
else:
|
||||||
|
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
|
||||||
|
|
||||||
|
# Create DataLoader for training data
|
||||||
|
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
||||||
|
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||||
|
|
||||||
|
# Create DataLoader for validation data if provided
|
||||||
|
if X_val is not None and y_val is not None:
|
||||||
|
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
|
||||||
|
if self.output_size == 1:
|
||||||
|
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
|
||||||
|
else:
|
||||||
|
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
|
||||||
|
|
||||||
|
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
|
||||||
|
val_loader = DataLoader(val_dataset, batch_size=batch_size)
|
||||||
|
else:
|
||||||
|
val_loader = None
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
for epoch in range(epochs):
|
||||||
|
# Training phase
|
||||||
|
self.model.train()
|
||||||
|
running_loss = 0.0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
for inputs, targets in train_loader:
|
||||||
|
# Zero the parameter gradients
|
||||||
|
self.optimizer.zero_grad()
|
||||||
|
|
||||||
|
# Forward pass
|
||||||
|
outputs = self.model(inputs)
|
||||||
|
|
||||||
|
# Calculate loss
|
||||||
|
if self.output_size == 1:
|
||||||
|
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||||
|
else:
|
||||||
|
loss = self.criterion(outputs, targets)
|
||||||
|
|
||||||
|
# Backward pass and optimize
|
||||||
|
loss.backward()
|
||||||
|
self.optimizer.step()
|
||||||
|
|
||||||
|
# Statistics
|
||||||
|
running_loss += loss.item()
|
||||||
|
if self.output_size > 1:
|
||||||
|
_, predicted = torch.max(outputs, 1)
|
||||||
|
total += targets.size(0)
|
||||||
|
correct += (predicted == targets).sum().item()
|
||||||
|
|
||||||
|
epoch_loss = running_loss / len(train_loader)
|
||||||
|
epoch_acc = correct / total if total > 0 else 0
|
||||||
|
|
||||||
|
# Validation phase
|
||||||
|
if val_loader is not None:
|
||||||
|
val_loss, val_acc = self._validate(val_loader)
|
||||||
|
|
||||||
|
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||||
|
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
|
||||||
|
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
|
||||||
|
|
||||||
|
# Update history
|
||||||
|
self.history['loss'].append(epoch_loss)
|
||||||
|
self.history['accuracy'].append(epoch_acc)
|
||||||
|
self.history['val_loss'].append(val_loss)
|
||||||
|
self.history['val_accuracy'].append(val_acc)
|
||||||
|
else:
|
||||||
|
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||||
|
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
|
||||||
|
|
||||||
|
# Update history without validation
|
||||||
|
self.history['loss'].append(epoch_loss)
|
||||||
|
self.history['accuracy'].append(epoch_acc)
|
||||||
|
|
||||||
|
logger.info("Training completed")
|
||||||
|
return self.history
|
||||||
|
|
||||||
|
def _validate(self, val_loader):
|
||||||
|
"""Validate the model using the validation set"""
|
||||||
|
self.model.eval()
|
||||||
|
val_loss = 0.0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
for inputs, targets in val_loader:
|
||||||
|
# Forward pass
|
||||||
|
outputs = self.model(inputs)
|
||||||
|
|
||||||
|
# Calculate loss
|
||||||
|
if self.output_size == 1:
|
||||||
|
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||||
|
else:
|
||||||
|
loss = self.criterion(outputs, targets)
|
||||||
|
|
||||||
|
val_loss += loss.item()
|
||||||
|
|
||||||
|
# Calculate accuracy
|
||||||
|
if self.output_size > 1:
|
||||||
|
_, predicted = torch.max(outputs, 1)
|
||||||
|
total += targets.size(0)
|
||||||
|
correct += (predicted == targets).sum().item()
|
||||||
|
|
||||||
|
return val_loss / len(val_loader), correct / total if total > 0 else 0
|
||||||
|
|
||||||
|
def evaluate(self, X_test, y_test):
|
||||||
|
"""
|
||||||
|
Evaluate the model on test data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X_test: Test input data
|
||||||
|
y_test: Test target data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Evaluation metrics
|
||||||
|
"""
|
||||||
|
logger.info(f"Evaluating model on {len(X_test)} samples")
|
||||||
|
|
||||||
|
# Convert to PyTorch tensors
|
||||||
|
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
self.model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
y_pred = self.model(X_test_tensor)
|
||||||
|
|
||||||
|
if self.output_size > 1:
|
||||||
|
_, y_pred_class = torch.max(y_pred, 1)
|
||||||
|
y_pred_class = y_pred_class.cpu().numpy()
|
||||||
|
else:
|
||||||
|
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
|
||||||
|
|
||||||
|
# Calculate metrics
|
||||||
|
if self.output_size > 1:
|
||||||
|
accuracy = accuracy_score(y_test, y_pred_class)
|
||||||
|
precision = precision_score(y_test, y_pred_class, average='weighted')
|
||||||
|
recall = recall_score(y_test, y_pred_class, average='weighted')
|
||||||
|
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
'accuracy': accuracy,
|
||||||
|
'precision': precision,
|
||||||
|
'recall': recall,
|
||||||
|
'f1_score': f1
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
accuracy = accuracy_score(y_test, y_pred_class)
|
||||||
|
precision = precision_score(y_test, y_pred_class)
|
||||||
|
recall = recall_score(y_test, y_pred_class)
|
||||||
|
f1 = f1_score(y_test, y_pred_class)
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
'accuracy': accuracy,
|
||||||
|
'precision': precision,
|
||||||
|
'recall': recall,
|
||||||
|
'f1_score': f1
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Evaluation metrics: {metrics}")
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
"""
|
||||||
|
Make predictions with the model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X: Input data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Predictions
|
||||||
|
"""
|
||||||
|
# Convert to PyTorch tensor
|
||||||
|
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
self.model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
predictions = self.model(X_tensor)
|
||||||
|
|
||||||
|
if self.output_size > 1:
|
||||||
|
# Multi-class classification
|
||||||
|
probs = predictions.cpu().numpy()
|
||||||
|
_, class_preds = torch.max(predictions, 1)
|
||||||
|
class_preds = class_preds.cpu().numpy()
|
||||||
|
return class_preds, probs
|
||||||
|
else:
|
||||||
|
# Binary classification or regression
|
||||||
|
preds = predictions.cpu().numpy()
|
||||||
|
if self.output_size == 1:
|
||||||
|
# Binary classification
|
||||||
|
class_preds = (preds > 0.5).astype(int)
|
||||||
|
return class_preds.flatten(), preds.flatten()
|
||||||
|
else:
|
||||||
|
# Regression
|
||||||
|
return preds.flatten(), None
|
||||||
|
|
||||||
|
def save(self, filepath):
|
||||||
|
"""
|
||||||
|
Save the model to a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath: Path to save the model
|
||||||
|
"""
|
||||||
|
# Create directory if it doesn't exist
|
||||||
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||||
|
|
||||||
|
# Save the model state
|
||||||
|
model_state = {
|
||||||
|
'model_state_dict': self.model.state_dict(),
|
||||||
|
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||||
|
'history': self.history,
|
||||||
|
'window_size': self.window_size,
|
||||||
|
'num_features': self.num_features,
|
||||||
|
'output_size': self.output_size,
|
||||||
|
'timeframes': self.timeframes
|
||||||
|
}
|
||||||
|
|
||||||
|
torch.save(model_state, f"{filepath}.pt")
|
||||||
|
logger.info(f"Model saved to {filepath}.pt")
|
||||||
|
|
||||||
|
def load(self, filepath):
|
||||||
|
"""
|
||||||
|
Load the model from a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath: Path to load the model from
|
||||||
|
"""
|
||||||
|
# Check if file exists
|
||||||
|
if not os.path.exists(f"{filepath}.pt"):
|
||||||
|
logger.error(f"Model file {filepath}.pt not found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Load the model state
|
||||||
|
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
|
||||||
|
|
||||||
|
# Update model parameters
|
||||||
|
self.window_size = model_state['window_size']
|
||||||
|
self.num_features = model_state['num_features']
|
||||||
|
self.output_size = model_state['output_size']
|
||||||
|
self.timeframes = model_state['timeframes']
|
||||||
|
|
||||||
|
# Rebuild the model
|
||||||
|
self.build_model()
|
||||||
|
|
||||||
|
# Load the model state
|
||||||
|
self.model.load_state_dict(model_state['model_state_dict'])
|
||||||
|
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
|
||||||
|
self.history = model_state['history']
|
||||||
|
|
||||||
|
logger.info(f"Model loaded from {filepath}.pt")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def plot_training_history(self):
|
||||||
|
"""Plot the training history"""
|
||||||
|
if not self.history['loss']:
|
||||||
|
logger.warning("No training history to plot")
|
||||||
|
return
|
||||||
|
|
||||||
|
plt.figure(figsize=(12, 4))
|
||||||
|
|
||||||
|
# Plot loss
|
||||||
|
plt.subplot(1, 2, 1)
|
||||||
|
plt.plot(self.history['loss'], label='Training Loss')
|
||||||
|
if 'val_loss' in self.history and self.history['val_loss']:
|
||||||
|
plt.plot(self.history['val_loss'], label='Validation Loss')
|
||||||
|
plt.title('Model Loss')
|
||||||
|
plt.ylabel('Loss')
|
||||||
|
plt.xlabel('Epoch')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
# Plot accuracy
|
||||||
|
plt.subplot(1, 2, 2)
|
||||||
|
plt.plot(self.history['accuracy'], label='Training Accuracy')
|
||||||
|
if 'val_accuracy' in self.history and self.history['val_accuracy']:
|
||||||
|
plt.plot(self.history['val_accuracy'], label='Validation Accuracy')
|
||||||
|
plt.title('Model Accuracy')
|
||||||
|
plt.ylabel('Accuracy')
|
||||||
|
plt.xlabel('Epoch')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
# Save the plot
|
||||||
|
os.makedirs('plots', exist_ok=True)
|
||||||
|
plt.savefig(os.path.join('plots', f"cnn_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"))
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
logger.info("Training history plots saved to plots directory")
|
||||||
|
|
||||||
|
def extract_hidden_features(self, X):
|
||||||
|
"""
|
||||||
|
Extract hidden features from the model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X: Input data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Hidden features
|
||||||
|
"""
|
||||||
|
# Convert to PyTorch tensor
|
||||||
|
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
|
# Forward pass through the model up to the last hidden layer
|
||||||
|
self.model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
# Get features before the output layer
|
||||||
|
x_t = X_tensor.transpose(1, 2)
|
||||||
|
|
||||||
|
# Process through parallel conv paths
|
||||||
|
conv_outputs = []
|
||||||
|
for path in self.model.conv_paths:
|
||||||
|
conv_outputs.append(path(x_t))
|
||||||
|
|
||||||
|
# Concatenate conv outputs
|
||||||
|
conv_concat = torch.cat(conv_outputs, dim=1)
|
||||||
|
|
||||||
|
# Transpose back for LSTM
|
||||||
|
conv_concat = conv_concat.transpose(1, 2)
|
||||||
|
|
||||||
|
# LSTM processing
|
||||||
|
lstm_out, _ = self.model.lstm(conv_concat)
|
||||||
|
|
||||||
|
# Flatten
|
||||||
|
flattened = self.model.flatten(lstm_out)
|
||||||
|
|
||||||
|
# Dense processing
|
||||||
|
hidden_features = self.model.dense1(flattened)
|
||||||
|
|
||||||
|
return hidden_features.cpu().numpy()
|
File diff suppressed because it is too large
Load Diff
653
NN/models/transformer_model_pytorch.py
Normal file
653
NN/models/transformer_model_pytorch.py
Normal file
@ -0,0 +1,653 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Transformer Model - PyTorch Implementation
|
||||||
|
|
||||||
|
This module implements a Transformer model using PyTorch for time series analysis.
|
||||||
|
The model consists of a Transformer encoder and a Mixture of Experts model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, TensorDataset
|
||||||
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class TransformerBlock(nn.Module):
|
||||||
|
"""Transformer Block with self-attention mechanism"""
|
||||||
|
|
||||||
|
def __init__(self, input_dim, num_heads=4, ff_dim=64, dropout=0.1):
|
||||||
|
super(TransformerBlock, self).__init__()
|
||||||
|
|
||||||
|
self.attention = nn.MultiheadAttention(
|
||||||
|
embed_dim=input_dim,
|
||||||
|
num_heads=num_heads,
|
||||||
|
dropout=dropout,
|
||||||
|
batch_first=True
|
||||||
|
)
|
||||||
|
|
||||||
|
self.feed_forward = nn.Sequential(
|
||||||
|
nn.Linear(input_dim, ff_dim),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(ff_dim, input_dim)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.layernorm1 = nn.LayerNorm(input_dim)
|
||||||
|
self.layernorm2 = nn.LayerNorm(input_dim)
|
||||||
|
self.dropout1 = nn.Dropout(dropout)
|
||||||
|
self.dropout2 = nn.Dropout(dropout)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Self-attention
|
||||||
|
attn_output, _ = self.attention(x, x, x)
|
||||||
|
x = x + self.dropout1(attn_output)
|
||||||
|
x = self.layernorm1(x)
|
||||||
|
|
||||||
|
# Feed forward
|
||||||
|
ff_output = self.feed_forward(x)
|
||||||
|
x = x + self.dropout2(ff_output)
|
||||||
|
x = self.layernorm2(x)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
class TransformerModelPyTorch(nn.Module):
|
||||||
|
"""PyTorch Transformer model for time series analysis"""
|
||||||
|
|
||||||
|
def __init__(self, input_shape, output_size=3, num_heads=4, ff_dim=64, num_transformer_blocks=2):
|
||||||
|
"""
|
||||||
|
Initialize the Transformer model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_shape (tuple): Shape of input data (window_size, features)
|
||||||
|
output_size (int): Size of output (1 for regression, 3 for classification)
|
||||||
|
num_heads (int): Number of attention heads
|
||||||
|
ff_dim (int): Feed forward dimension
|
||||||
|
num_transformer_blocks (int): Number of transformer blocks
|
||||||
|
"""
|
||||||
|
super(TransformerModelPyTorch, self).__init__()
|
||||||
|
|
||||||
|
window_size, num_features = input_shape
|
||||||
|
|
||||||
|
# Positional encoding
|
||||||
|
self.pos_encoding = nn.Parameter(
|
||||||
|
torch.zeros(1, window_size, num_features),
|
||||||
|
requires_grad=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Transformer blocks
|
||||||
|
self.transformer_blocks = nn.ModuleList([
|
||||||
|
TransformerBlock(
|
||||||
|
input_dim=num_features,
|
||||||
|
num_heads=num_heads,
|
||||||
|
ff_dim=ff_dim
|
||||||
|
) for _ in range(num_transformer_blocks)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Global average pooling
|
||||||
|
self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
|
||||||
|
|
||||||
|
# Dense layers
|
||||||
|
self.dense = nn.Sequential(
|
||||||
|
nn.Linear(num_features, 64),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.BatchNorm1d(64),
|
||||||
|
nn.Dropout(0.3),
|
||||||
|
nn.Linear(64, output_size)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Activation based on output size
|
||||||
|
if output_size == 1:
|
||||||
|
self.activation = nn.Sigmoid() # Binary classification or regression
|
||||||
|
elif output_size > 1:
|
||||||
|
self.activation = nn.Softmax(dim=1) # Multi-class classification
|
||||||
|
else:
|
||||||
|
self.activation = nn.Identity() # No activation
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""
|
||||||
|
Forward pass through the network.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: Input tensor of shape [batch_size, window_size, features]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Output tensor of shape [batch_size, output_size]
|
||||||
|
"""
|
||||||
|
# Add positional encoding
|
||||||
|
x = x + self.pos_encoding
|
||||||
|
|
||||||
|
# Apply transformer blocks
|
||||||
|
for transformer_block in self.transformer_blocks:
|
||||||
|
x = transformer_block(x)
|
||||||
|
|
||||||
|
# Global average pooling
|
||||||
|
x = x.transpose(1, 2) # [batch, features, window]
|
||||||
|
x = self.global_avg_pool(x) # [batch, features, 1]
|
||||||
|
x = x.squeeze(-1) # [batch, features]
|
||||||
|
|
||||||
|
# Dense layers
|
||||||
|
x = self.dense(x)
|
||||||
|
|
||||||
|
# Apply activation
|
||||||
|
return self.activation(x)
|
||||||
|
|
||||||
|
|
||||||
|
class TransformerModelPyTorchWrapper:
|
||||||
|
"""
|
||||||
|
Transformer model wrapper class for time series analysis using PyTorch.
|
||||||
|
|
||||||
|
This class provides methods for building, training, evaluating, and making
|
||||||
|
predictions with the Transformer model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
|
||||||
|
"""
|
||||||
|
Initialize the Transformer model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
window_size (int): Size of the input window
|
||||||
|
num_features (int): Number of features in the input data
|
||||||
|
output_size (int): Size of the output (1 for regression, 3 for classification)
|
||||||
|
timeframes (list): List of timeframes used (for logging)
|
||||||
|
"""
|
||||||
|
self.window_size = window_size
|
||||||
|
self.num_features = num_features
|
||||||
|
self.output_size = output_size
|
||||||
|
self.timeframes = timeframes or []
|
||||||
|
|
||||||
|
# Determine device (GPU or CPU)
|
||||||
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
logger.info(f"Using device: {self.device}")
|
||||||
|
|
||||||
|
# Initialize model
|
||||||
|
self.model = None
|
||||||
|
self.build_model()
|
||||||
|
|
||||||
|
# Initialize training history
|
||||||
|
self.history = {
|
||||||
|
'loss': [],
|
||||||
|
'val_loss': [],
|
||||||
|
'accuracy': [],
|
||||||
|
'val_accuracy': []
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_model(self):
|
||||||
|
"""Build the Transformer model architecture"""
|
||||||
|
logger.info(f"Building PyTorch Transformer model with window_size={self.window_size}, "
|
||||||
|
f"num_features={self.num_features}, output_size={self.output_size}")
|
||||||
|
|
||||||
|
self.model = TransformerModelPyTorch(
|
||||||
|
input_shape=(self.window_size, self.num_features),
|
||||||
|
output_size=self.output_size
|
||||||
|
).to(self.device)
|
||||||
|
|
||||||
|
# Initialize optimizer
|
||||||
|
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
|
||||||
|
|
||||||
|
# Initialize loss function based on output size
|
||||||
|
if self.output_size == 1:
|
||||||
|
self.criterion = nn.BCELoss() # Binary classification
|
||||||
|
elif self.output_size > 1:
|
||||||
|
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
|
||||||
|
else:
|
||||||
|
self.criterion = nn.MSELoss() # Regression
|
||||||
|
|
||||||
|
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
|
||||||
|
|
||||||
|
def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
|
||||||
|
"""
|
||||||
|
Train the Transformer model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X_train: Training input data
|
||||||
|
y_train: Training target data
|
||||||
|
X_val: Validation input data
|
||||||
|
y_val: Validation target data
|
||||||
|
batch_size: Batch size for training
|
||||||
|
epochs: Number of training epochs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Training history
|
||||||
|
"""
|
||||||
|
logger.info(f"Training PyTorch Transformer model with {len(X_train)} samples, "
|
||||||
|
f"batch_size={batch_size}, epochs={epochs}")
|
||||||
|
|
||||||
|
# Convert numpy arrays to PyTorch tensors
|
||||||
|
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
|
# Handle different output sizes for y_train
|
||||||
|
if self.output_size == 1:
|
||||||
|
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
|
||||||
|
else:
|
||||||
|
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
|
||||||
|
|
||||||
|
# Create DataLoader for training data
|
||||||
|
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
||||||
|
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||||
|
|
||||||
|
# Create DataLoader for validation data if provided
|
||||||
|
if X_val is not None and y_val is not None:
|
||||||
|
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
|
||||||
|
if self.output_size == 1:
|
||||||
|
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
|
||||||
|
else:
|
||||||
|
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
|
||||||
|
|
||||||
|
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
|
||||||
|
val_loader = DataLoader(val_dataset, batch_size=batch_size)
|
||||||
|
else:
|
||||||
|
val_loader = None
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
for epoch in range(epochs):
|
||||||
|
# Training phase
|
||||||
|
self.model.train()
|
||||||
|
running_loss = 0.0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
for inputs, targets in train_loader:
|
||||||
|
# Zero the parameter gradients
|
||||||
|
self.optimizer.zero_grad()
|
||||||
|
|
||||||
|
# Forward pass
|
||||||
|
outputs = self.model(inputs)
|
||||||
|
|
||||||
|
# Calculate loss
|
||||||
|
if self.output_size == 1:
|
||||||
|
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||||
|
else:
|
||||||
|
loss = self.criterion(outputs, targets)
|
||||||
|
|
||||||
|
# Backward pass and optimize
|
||||||
|
loss.backward()
|
||||||
|
self.optimizer.step()
|
||||||
|
|
||||||
|
# Statistics
|
||||||
|
running_loss += loss.item()
|
||||||
|
if self.output_size > 1:
|
||||||
|
_, predicted = torch.max(outputs, 1)
|
||||||
|
total += targets.size(0)
|
||||||
|
correct += (predicted == targets).sum().item()
|
||||||
|
|
||||||
|
epoch_loss = running_loss / len(train_loader)
|
||||||
|
epoch_acc = correct / total if total > 0 else 0
|
||||||
|
|
||||||
|
# Validation phase
|
||||||
|
if val_loader is not None:
|
||||||
|
val_loss, val_acc = self._validate(val_loader)
|
||||||
|
|
||||||
|
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||||
|
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
|
||||||
|
f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
|
||||||
|
|
||||||
|
# Update history
|
||||||
|
self.history['loss'].append(epoch_loss)
|
||||||
|
self.history['accuracy'].append(epoch_acc)
|
||||||
|
self.history['val_loss'].append(val_loss)
|
||||||
|
self.history['val_accuracy'].append(val_acc)
|
||||||
|
else:
|
||||||
|
logger.info(f"Epoch {epoch+1}/{epochs} - "
|
||||||
|
f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f}")
|
||||||
|
|
||||||
|
# Update history without validation
|
||||||
|
self.history['loss'].append(epoch_loss)
|
||||||
|
self.history['accuracy'].append(epoch_acc)
|
||||||
|
|
||||||
|
logger.info("Training completed")
|
||||||
|
return self.history
|
||||||
|
|
||||||
|
def _validate(self, val_loader):
|
||||||
|
"""Validate the model using the validation set"""
|
||||||
|
self.model.eval()
|
||||||
|
val_loss = 0.0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
for inputs, targets in val_loader:
|
||||||
|
# Forward pass
|
||||||
|
outputs = self.model(inputs)
|
||||||
|
|
||||||
|
# Calculate loss
|
||||||
|
if self.output_size == 1:
|
||||||
|
loss = self.criterion(outputs, targets.unsqueeze(1))
|
||||||
|
else:
|
||||||
|
loss = self.criterion(outputs, targets)
|
||||||
|
|
||||||
|
val_loss += loss.item()
|
||||||
|
|
||||||
|
# Calculate accuracy
|
||||||
|
if self.output_size > 1:
|
||||||
|
_, predicted = torch.max(outputs, 1)
|
||||||
|
total += targets.size(0)
|
||||||
|
correct += (predicted == targets).sum().item()
|
||||||
|
|
||||||
|
return val_loss / len(val_loader), correct / total if total > 0 else 0
|
||||||
|
|
||||||
|
def evaluate(self, X_test, y_test):
|
||||||
|
"""
|
||||||
|
Evaluate the model on test data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X_test: Test input data
|
||||||
|
y_test: Test target data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Evaluation metrics
|
||||||
|
"""
|
||||||
|
logger.info(f"Evaluating model on {len(X_test)} samples")
|
||||||
|
|
||||||
|
# Convert to PyTorch tensors
|
||||||
|
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
self.model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
y_pred = self.model(X_test_tensor)
|
||||||
|
|
||||||
|
if self.output_size > 1:
|
||||||
|
_, y_pred_class = torch.max(y_pred, 1)
|
||||||
|
y_pred_class = y_pred_class.cpu().numpy()
|
||||||
|
else:
|
||||||
|
y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
|
||||||
|
|
||||||
|
# Calculate metrics
|
||||||
|
if self.output_size > 1:
|
||||||
|
accuracy = accuracy_score(y_test, y_pred_class)
|
||||||
|
precision = precision_score(y_test, y_pred_class, average='weighted')
|
||||||
|
recall = recall_score(y_test, y_pred_class, average='weighted')
|
||||||
|
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
'accuracy': accuracy,
|
||||||
|
'precision': precision,
|
||||||
|
'recall': recall,
|
||||||
|
'f1_score': f1
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
accuracy = accuracy_score(y_test, y_pred_class)
|
||||||
|
precision = precision_score(y_test, y_pred_class)
|
||||||
|
recall = recall_score(y_test, y_pred_class)
|
||||||
|
f1 = f1_score(y_test, y_pred_class)
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
'accuracy': accuracy,
|
||||||
|
'precision': precision,
|
||||||
|
'recall': recall,
|
||||||
|
'f1_score': f1
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Evaluation metrics: {metrics}")
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
"""
|
||||||
|
Make predictions with the model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X: Input data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Predictions
|
||||||
|
"""
|
||||||
|
# Convert to PyTorch tensor
|
||||||
|
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
self.model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
predictions = self.model(X_tensor)
|
||||||
|
|
||||||
|
if self.output_size > 1:
|
||||||
|
# Multi-class classification
|
||||||
|
probs = predictions.cpu().numpy()
|
||||||
|
_, class_preds = torch.max(predictions, 1)
|
||||||
|
class_preds = class_preds.cpu().numpy()
|
||||||
|
return class_preds, probs
|
||||||
|
else:
|
||||||
|
# Binary classification or regression
|
||||||
|
preds = predictions.cpu().numpy()
|
||||||
|
if self.output_size == 1:
|
||||||
|
# Binary classification
|
||||||
|
class_preds = (preds > 0.5).astype(int)
|
||||||
|
return class_preds.flatten(), preds.flatten()
|
||||||
|
else:
|
||||||
|
# Regression
|
||||||
|
return preds.flatten(), None
|
||||||
|
|
||||||
|
def save(self, filepath):
|
||||||
|
"""
|
||||||
|
Save the model to a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath: Path to save the model
|
||||||
|
"""
|
||||||
|
# Create directory if it doesn't exist
|
||||||
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||||
|
|
||||||
|
# Save the model state
|
||||||
|
model_state = {
|
||||||
|
'model_state_dict': self.model.state_dict(),
|
||||||
|
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||||
|
'history': self.history,
|
||||||
|
'window_size': self.window_size,
|
||||||
|
'num_features': self.num_features,
|
||||||
|
'output_size': self.output_size,
|
||||||
|
'timeframes': self.timeframes
|
||||||
|
}
|
||||||
|
|
||||||
|
torch.save(model_state, f"{filepath}.pt")
|
||||||
|
logger.info(f"Model saved to {filepath}.pt")
|
||||||
|
|
||||||
|
def load(self, filepath):
|
||||||
|
"""
|
||||||
|
Load the model from a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath: Path to load the model from
|
||||||
|
"""
|
||||||
|
# Check if file exists
|
||||||
|
if not os.path.exists(f"{filepath}.pt"):
|
||||||
|
logger.error(f"Model file {filepath}.pt not found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Load the model state
|
||||||
|
model_state = torch.load(f"{filepath}.pt", map_location=self.device)
|
||||||
|
|
||||||
|
# Update model parameters
|
||||||
|
self.window_size = model_state['window_size']
|
||||||
|
self.num_features = model_state['num_features']
|
||||||
|
self.output_size = model_state['output_size']
|
||||||
|
self.timeframes = model_state['timeframes']
|
||||||
|
|
||||||
|
# Rebuild the model
|
||||||
|
self.build_model()
|
||||||
|
|
||||||
|
# Load the model state
|
||||||
|
self.model.load_state_dict(model_state['model_state_dict'])
|
||||||
|
self.optimizer.load_state_dict(model_state['optimizer_state_dict'])
|
||||||
|
self.history = model_state['history']
|
||||||
|
|
||||||
|
logger.info(f"Model loaded from {filepath}.pt")
|
||||||
|
return True
|
||||||
|
|
||||||
|
class MixtureOfExpertsModelPyTorch:
|
||||||
|
"""
|
||||||
|
Mixture of Experts model implementation using PyTorch.
|
||||||
|
|
||||||
|
This model combines predictions from multiple models (experts) using a
|
||||||
|
learned weighting scheme.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, output_size=3, timeframes=None):
|
||||||
|
"""
|
||||||
|
Initialize the Mixture of Experts model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_size (int): Size of the output (1 for regression, 3 for classification)
|
||||||
|
timeframes (list): List of timeframes used (for logging)
|
||||||
|
"""
|
||||||
|
self.output_size = output_size
|
||||||
|
self.timeframes = timeframes or []
|
||||||
|
self.experts = {}
|
||||||
|
self.expert_weights = {}
|
||||||
|
|
||||||
|
# Determine device (GPU or CPU)
|
||||||
|
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
logger.info(f"Using device: {self.device}")
|
||||||
|
|
||||||
|
# Initialize model and training history
|
||||||
|
self.model = None
|
||||||
|
self.history = {
|
||||||
|
'loss': [],
|
||||||
|
'val_loss': [],
|
||||||
|
'accuracy': [],
|
||||||
|
'val_accuracy': []
|
||||||
|
}
|
||||||
|
|
||||||
|
def add_expert(self, name, model):
|
||||||
|
"""
|
||||||
|
Add an expert model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name (str): Name of the expert
|
||||||
|
model: Expert model
|
||||||
|
"""
|
||||||
|
self.experts[name] = model
|
||||||
|
logger.info(f"Added expert: {name}")
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
"""
|
||||||
|
Make predictions using all experts and combine them.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X: Input data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Combined predictions
|
||||||
|
"""
|
||||||
|
if not self.experts:
|
||||||
|
logger.error("No experts added to the MoE model")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get predictions from each expert
|
||||||
|
expert_predictions = {}
|
||||||
|
for name, expert in self.experts.items():
|
||||||
|
pred, _ = expert.predict(X)
|
||||||
|
expert_predictions[name] = pred
|
||||||
|
|
||||||
|
# Combine predictions based on weights
|
||||||
|
final_pred = None
|
||||||
|
for name, pred in expert_predictions.items():
|
||||||
|
weight = self.expert_weights.get(name, 1.0 / len(self.experts))
|
||||||
|
if final_pred is None:
|
||||||
|
final_pred = weight * pred
|
||||||
|
else:
|
||||||
|
final_pred += weight * pred
|
||||||
|
|
||||||
|
# For classification, convert to class indices
|
||||||
|
if self.output_size > 1:
|
||||||
|
# Get class with highest probability
|
||||||
|
class_pred = np.argmax(final_pred, axis=1)
|
||||||
|
return class_pred, final_pred
|
||||||
|
else:
|
||||||
|
# Binary classification
|
||||||
|
class_pred = (final_pred > 0.5).astype(int)
|
||||||
|
return class_pred, final_pred
|
||||||
|
|
||||||
|
def evaluate(self, X_test, y_test):
|
||||||
|
"""
|
||||||
|
Evaluate the model on test data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
X_test: Test input data
|
||||||
|
y_test: Test target data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Evaluation metrics
|
||||||
|
"""
|
||||||
|
logger.info(f"Evaluating MoE model on {len(X_test)} samples")
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
y_pred_class, _ = self.predict(X_test)
|
||||||
|
|
||||||
|
# Calculate metrics
|
||||||
|
if self.output_size > 1:
|
||||||
|
accuracy = accuracy_score(y_test, y_pred_class)
|
||||||
|
precision = precision_score(y_test, y_pred_class, average='weighted')
|
||||||
|
recall = recall_score(y_test, y_pred_class, average='weighted')
|
||||||
|
f1 = f1_score(y_test, y_pred_class, average='weighted')
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
'accuracy': accuracy,
|
||||||
|
'precision': precision,
|
||||||
|
'recall': recall,
|
||||||
|
'f1_score': f1
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
accuracy = accuracy_score(y_test, y_pred_class)
|
||||||
|
precision = precision_score(y_test, y_pred_class)
|
||||||
|
recall = recall_score(y_test, y_pred_class)
|
||||||
|
f1 = f1_score(y_test, y_pred_class)
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
'accuracy': accuracy,
|
||||||
|
'precision': precision,
|
||||||
|
'recall': recall,
|
||||||
|
'f1_score': f1
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"MoE evaluation metrics: {metrics}")
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
def save(self, filepath):
|
||||||
|
"""
|
||||||
|
Save the model weights to a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath: Path to save the model
|
||||||
|
"""
|
||||||
|
# Create directory if it doesn't exist
|
||||||
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
||||||
|
|
||||||
|
# Save the model state
|
||||||
|
model_state = {
|
||||||
|
'expert_weights': self.expert_weights,
|
||||||
|
'output_size': self.output_size,
|
||||||
|
'timeframes': self.timeframes
|
||||||
|
}
|
||||||
|
|
||||||
|
torch.save(model_state, f"{filepath}_moe.pt")
|
||||||
|
logger.info(f"MoE model saved to {filepath}_moe.pt")
|
||||||
|
|
||||||
|
def load(self, filepath):
|
||||||
|
"""
|
||||||
|
Load the model from a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath: Path to load the model from
|
||||||
|
"""
|
||||||
|
# Check if file exists
|
||||||
|
if not os.path.exists(f"{filepath}_moe.pt"):
|
||||||
|
logger.error(f"MoE model file {filepath}_moe.pt not found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Load the model state
|
||||||
|
model_state = torch.load(f"{filepath}_moe.pt", map_location=self.device)
|
||||||
|
|
||||||
|
# Update model parameters
|
||||||
|
self.expert_weights = model_state['expert_weights']
|
||||||
|
self.output_size = model_state['output_size']
|
||||||
|
self.timeframes = model_state['timeframes']
|
||||||
|
|
||||||
|
logger.info(f"MoE model loaded from {filepath}_moe.pt")
|
||||||
|
return True
|
@ -1,13 +1,22 @@
|
|||||||
tensorflow>=2.5.0
|
# Main dependencies
|
||||||
numpy>=1.19.5
|
numpy>=1.19.5
|
||||||
pandas>=1.3.0
|
pandas>=1.3.0
|
||||||
matplotlib>=3.4.2
|
matplotlib>=3.4.2
|
||||||
scikit-learn>=0.24.2
|
scikit-learn>=0.24.2
|
||||||
tensorflow-addons>=0.13.0
|
|
||||||
plotly>=5.1.0
|
# PyTorch (primary framework)
|
||||||
h5py>=3.1.0
|
torch
|
||||||
tqdm>=4.61.1
|
torchvision
|
||||||
pyyaml>=5.4.1
|
|
||||||
tensorboard>=2.5.0
|
# TensorFlow (optional)
|
||||||
ccxt>=1.50.0
|
# tensorflow>=2.5.0
|
||||||
requests>=2.25.1
|
# tensorflow-addons>=0.13.0
|
||||||
|
|
||||||
|
# Additional dependencies
|
||||||
|
plotly
|
||||||
|
h5py
|
||||||
|
tqdm
|
||||||
|
pyyaml
|
||||||
|
tensorboard
|
||||||
|
ccxt
|
||||||
|
requests
|
88
NN/start_tensorboard.py
Normal file
88
NN/start_tensorboard.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Start TensorBoard for monitoring neural network training
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import webbrowser
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
def start_tensorboard(logdir="NN/models/saved/logs", port=6006, open_browser=True):
|
||||||
|
"""
|
||||||
|
Start TensorBoard in a subprocess
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logdir: Directory containing TensorBoard logs
|
||||||
|
port: Port to run TensorBoard on
|
||||||
|
open_browser: Whether to open a browser automatically
|
||||||
|
"""
|
||||||
|
# Make sure the log directory exists
|
||||||
|
os.makedirs(logdir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create command
|
||||||
|
cmd = [
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"tensorboard.main",
|
||||||
|
f"--logdir={logdir}",
|
||||||
|
f"--port={port}",
|
||||||
|
"--bind_all"
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"Starting TensorBoard with logs from {logdir} on port {port}")
|
||||||
|
print(f"Command: {' '.join(cmd)}")
|
||||||
|
|
||||||
|
# Start TensorBoard in a subprocess
|
||||||
|
process = subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
universal_newlines=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wait for TensorBoard to start up
|
||||||
|
for line in process.stdout:
|
||||||
|
print(line.strip())
|
||||||
|
if "TensorBoard" in line and "http://" in line:
|
||||||
|
# TensorBoard is running, extract the URL
|
||||||
|
url = None
|
||||||
|
for part in line.split():
|
||||||
|
if part.startswith(("http://", "https://")):
|
||||||
|
url = part
|
||||||
|
break
|
||||||
|
|
||||||
|
# Open browser if requested and URL found
|
||||||
|
if open_browser and url:
|
||||||
|
print(f"Opening TensorBoard in browser: {url}")
|
||||||
|
webbrowser.open(url)
|
||||||
|
|
||||||
|
break
|
||||||
|
|
||||||
|
# Return the process for the caller to manage
|
||||||
|
return process
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# Parse command line arguments
|
||||||
|
parser = argparse.ArgumentParser(description="Start TensorBoard for NN training visualization")
|
||||||
|
parser.add_argument("--logdir", default="NN/models/saved/logs", help="Directory containing TensorBoard logs")
|
||||||
|
parser.add_argument("--port", type=int, default=6006, help="Port to run TensorBoard on")
|
||||||
|
parser.add_argument("--no-browser", action="store_true", help="Don't open browser automatically")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Start TensorBoard
|
||||||
|
process = start_tensorboard(args.logdir, args.port, not args.no_browser)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Keep the script running until Ctrl+C
|
||||||
|
print("TensorBoard is running. Press Ctrl+C to stop.")
|
||||||
|
while True:
|
||||||
|
sleep(1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Stopping TensorBoard...")
|
||||||
|
process.terminate()
|
||||||
|
process.wait()
|
BIN
NN/utils/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
NN/utils/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
NN/utils/__pycache__/data_interface.cpython-312.pyc
Normal file
BIN
NN/utils/__pycache__/data_interface.cpython-312.pyc
Normal file
Binary file not shown.
390
NN/utils/data_interface.py
Normal file
390
NN/utils/data_interface.py
Normal file
@ -0,0 +1,390 @@
|
|||||||
|
"""
|
||||||
|
Data Interface for Neural Network Trading System
|
||||||
|
|
||||||
|
This module provides functionality to fetch, process, and prepare data for the neural network models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DataInterface:
|
||||||
|
"""
|
||||||
|
Handles data collection, processing, and preparation for neural network models.
|
||||||
|
|
||||||
|
This class is responsible for:
|
||||||
|
1. Fetching historical data
|
||||||
|
2. Preprocessing data for neural network input
|
||||||
|
3. Generating training datasets
|
||||||
|
4. Handling real-time data integration
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, symbol="BTC/USDT", timeframes=None, data_dir="NN/data"):
|
||||||
|
"""
|
||||||
|
Initialize the data interface.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
symbol (str): Trading pair symbol (e.g., "BTC/USDT")
|
||||||
|
timeframes (list): List of timeframes to use (e.g., ['1m', '5m', '1h', '4h', '1d'])
|
||||||
|
data_dir (str): Directory to store/load datasets
|
||||||
|
"""
|
||||||
|
self.symbol = symbol
|
||||||
|
self.timeframes = timeframes or ['1h', '4h', '1d']
|
||||||
|
self.data_dir = data_dir
|
||||||
|
self.scalers = {} # Store scalers for each timeframe
|
||||||
|
|
||||||
|
# Create data directory if it doesn't exist
|
||||||
|
os.makedirs(self.data_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Initialize empty dataframes for each timeframe
|
||||||
|
self.dataframes = {tf: None for tf in self.timeframes}
|
||||||
|
|
||||||
|
logger.info(f"DataInterface initialized for {symbol} with timeframes {timeframes}")
|
||||||
|
|
||||||
|
def get_historical_data(self, timeframe='1h', n_candles=1000, use_cache=True):
|
||||||
|
"""
|
||||||
|
Fetch historical price data for a given timeframe.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeframe (str): Timeframe to fetch data for
|
||||||
|
n_candles (int): Number of candles to fetch
|
||||||
|
use_cache (bool): Whether to use cached data if available
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: DataFrame with OHLCV data
|
||||||
|
"""
|
||||||
|
cache_file = os.path.join(self.data_dir, f"{self.symbol.replace('/', '_')}_{timeframe}.csv")
|
||||||
|
|
||||||
|
# Check if cached data exists and is recent
|
||||||
|
if use_cache and os.path.exists(cache_file):
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(cache_file, parse_dates=['timestamp'])
|
||||||
|
# If we have enough data and it's recent, use it
|
||||||
|
if len(df) >= n_candles:
|
||||||
|
logger.info(f"Using cached data for {self.symbol} {timeframe} ({len(df)} candles)")
|
||||||
|
self.dataframes[timeframe] = df
|
||||||
|
return df.tail(n_candles)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reading cached data: {str(e)}")
|
||||||
|
|
||||||
|
# If we get here, we need to fetch data
|
||||||
|
# For now, we'll use a placeholder for fetching data from an exchange
|
||||||
|
try:
|
||||||
|
# In a real implementation, we would fetch data from an exchange or API here
|
||||||
|
# For this example, we'll create dummy data if we can't load from cache
|
||||||
|
logger.info(f"Fetching historical data for {self.symbol} {timeframe}")
|
||||||
|
|
||||||
|
# Placeholder for real data fetching
|
||||||
|
# In a real implementation, this would be replaced with API calls
|
||||||
|
self._fetch_data_from_exchange(timeframe, n_candles)
|
||||||
|
|
||||||
|
# Save to cache
|
||||||
|
if self.dataframes[timeframe] is not None:
|
||||||
|
self.dataframes[timeframe].to_csv(cache_file, index=False)
|
||||||
|
return self.dataframes[timeframe]
|
||||||
|
else:
|
||||||
|
# Create dummy data as fallback
|
||||||
|
logger.warning(f"Could not fetch data for {self.symbol} {timeframe}, using dummy data")
|
||||||
|
df = self._create_dummy_data(timeframe, n_candles)
|
||||||
|
self.dataframes[timeframe] = df
|
||||||
|
return df
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching data: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _fetch_data_from_exchange(self, timeframe, n_candles):
|
||||||
|
"""
|
||||||
|
Placeholder method for fetching data from an exchange.
|
||||||
|
In a real implementation, this would connect to an exchange API.
|
||||||
|
"""
|
||||||
|
# This is a placeholder - in a real implementation this would make API calls
|
||||||
|
# to a cryptocurrency exchange to fetch OHLCV data
|
||||||
|
|
||||||
|
# For now, just generate dummy data
|
||||||
|
self.dataframes[timeframe] = self._create_dummy_data(timeframe, n_candles)
|
||||||
|
|
||||||
|
def _create_dummy_data(self, timeframe, n_candles):
|
||||||
|
"""
|
||||||
|
Create dummy OHLCV data for testing purposes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeframe (str): Timeframe to create data for
|
||||||
|
n_candles (int): Number of candles to create
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: DataFrame with dummy OHLCV data
|
||||||
|
"""
|
||||||
|
# Map timeframe to seconds
|
||||||
|
tf_seconds = {
|
||||||
|
'1m': 60,
|
||||||
|
'5m': 300,
|
||||||
|
'15m': 900,
|
||||||
|
'1h': 3600,
|
||||||
|
'4h': 14400,
|
||||||
|
'1d': 86400
|
||||||
|
}
|
||||||
|
seconds = tf_seconds.get(timeframe, 3600) # Default to 1h
|
||||||
|
|
||||||
|
# Create timestamps
|
||||||
|
end_time = datetime.now()
|
||||||
|
timestamps = [end_time - timedelta(seconds=seconds * i) for i in range(n_candles)]
|
||||||
|
timestamps.reverse() # Oldest first
|
||||||
|
|
||||||
|
# Generate random price data with realistic patterns
|
||||||
|
np.random.seed(42) # For reproducibility
|
||||||
|
|
||||||
|
# Start price
|
||||||
|
price = 50000 # For BTC/USDT
|
||||||
|
prices = []
|
||||||
|
volumes = []
|
||||||
|
|
||||||
|
for i in range(n_candles):
|
||||||
|
# Random walk with drift and volatility based on timeframe
|
||||||
|
drift = 0.0001 * seconds # Larger drift for larger timeframes
|
||||||
|
volatility = 0.01 * np.sqrt(seconds / 3600) # Scale volatility by sqrt of time
|
||||||
|
|
||||||
|
# Daily/weekly patterns
|
||||||
|
if timeframe in ['1d', '4h']:
|
||||||
|
# Add some cyclical patterns
|
||||||
|
cycle = np.sin(i / 7 * np.pi) * 0.02 # Weekly cycle
|
||||||
|
else:
|
||||||
|
cycle = np.sin(i / 24 * np.pi) * 0.01 # Daily cycle
|
||||||
|
|
||||||
|
# Calculate price change with random walk + cycles
|
||||||
|
price_change = price * (drift + volatility * np.random.randn() + cycle)
|
||||||
|
price += price_change
|
||||||
|
|
||||||
|
# Generate OHLC from the price
|
||||||
|
open_price = price
|
||||||
|
high_price = price * (1 + abs(0.005 * np.random.randn()))
|
||||||
|
low_price = price * (1 - abs(0.005 * np.random.randn()))
|
||||||
|
close_price = price * (1 + 0.002 * np.random.randn())
|
||||||
|
|
||||||
|
# Ensure high >= open, close, low and low <= open, close
|
||||||
|
high_price = max(high_price, open_price, close_price)
|
||||||
|
low_price = min(low_price, open_price, close_price)
|
||||||
|
|
||||||
|
# Generate volume (higher for larger price movements)
|
||||||
|
volume = abs(price_change) * (10000 + 5000 * np.random.rand())
|
||||||
|
|
||||||
|
prices.append((open_price, high_price, low_price, close_price))
|
||||||
|
volumes.append(volume)
|
||||||
|
|
||||||
|
# Update price for next iteration
|
||||||
|
price = close_price
|
||||||
|
|
||||||
|
# Create DataFrame
|
||||||
|
df = pd.DataFrame(
|
||||||
|
[(t, o, h, l, c, v) for t, (o, h, l, c), v in zip(timestamps, prices, volumes)],
|
||||||
|
columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||||||
|
)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def prepare_nn_input(self, timeframes=None, n_candles=500, window_size=20):
|
||||||
|
"""
|
||||||
|
Prepare input data for neural network models.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeframes (list): List of timeframes to use
|
||||||
|
n_candles (int): Number of candles to fetch for each timeframe
|
||||||
|
window_size (int): Size of the sliding window for feature creation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (X, y, timestamps) where:
|
||||||
|
X is the input features array with shape (n_samples, window_size, n_features)
|
||||||
|
y is the target array with shape (n_samples,)
|
||||||
|
timestamps is an array of timestamps for each sample
|
||||||
|
"""
|
||||||
|
if timeframes is None:
|
||||||
|
timeframes = self.timeframes
|
||||||
|
|
||||||
|
# Get data for all requested timeframes
|
||||||
|
dfs = {}
|
||||||
|
for tf in timeframes:
|
||||||
|
df = self.get_historical_data(timeframe=tf, n_candles=n_candles)
|
||||||
|
if df is not None and not df.empty:
|
||||||
|
dfs[tf] = df
|
||||||
|
|
||||||
|
if not dfs:
|
||||||
|
logger.error("No data available for feature creation")
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
# For simplicity, we'll use just one timeframe for now
|
||||||
|
# In a more complex implementation, we would merge multiple timeframes
|
||||||
|
primary_tf = timeframes[0]
|
||||||
|
if primary_tf not in dfs:
|
||||||
|
logger.error(f"Primary timeframe {primary_tf} not available")
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
df = dfs[primary_tf]
|
||||||
|
|
||||||
|
# Create features
|
||||||
|
X, y, timestamps = self._create_features(df, window_size)
|
||||||
|
|
||||||
|
return X, y, timestamps
|
||||||
|
|
||||||
|
def _create_features(self, df, window_size):
|
||||||
|
"""
|
||||||
|
Create features from OHLCV data using a sliding window approach.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df (pd.DataFrame): DataFrame with OHLCV data
|
||||||
|
window_size (int): Size of the sliding window
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (X, y, timestamps) where:
|
||||||
|
X is the input features array
|
||||||
|
y is the target array
|
||||||
|
timestamps is an array of timestamps for each sample
|
||||||
|
"""
|
||||||
|
# Extract OHLCV columns
|
||||||
|
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
|
||||||
|
|
||||||
|
# Scale the data
|
||||||
|
scaler = MinMaxScaler()
|
||||||
|
ohlcv_scaled = scaler.fit_transform(ohlcv)
|
||||||
|
|
||||||
|
# Store the scaler for later use
|
||||||
|
timeframe = next((tf for tf in self.timeframes if self.dataframes.get(tf) is not None and
|
||||||
|
self.dataframes[tf].equals(df)), 'unknown')
|
||||||
|
self.scalers[timeframe] = scaler
|
||||||
|
|
||||||
|
# Create sliding windows
|
||||||
|
X = []
|
||||||
|
y = []
|
||||||
|
timestamps = []
|
||||||
|
|
||||||
|
for i in range(len(ohlcv_scaled) - window_size):
|
||||||
|
# Input: window_size candles of OHLCV data
|
||||||
|
X.append(ohlcv_scaled[i:i+window_size])
|
||||||
|
|
||||||
|
# Target: binary classification - price goes up (1) or down (0)
|
||||||
|
# 1 if close price increases in the next candle, 0 otherwise
|
||||||
|
price_change = ohlcv[i+window_size, 3] - ohlcv[i+window_size-1, 3]
|
||||||
|
y.append(1 if price_change > 0 else 0)
|
||||||
|
|
||||||
|
# Store timestamp for reference
|
||||||
|
timestamps.append(df['timestamp'].iloc[i+window_size])
|
||||||
|
|
||||||
|
return np.array(X), np.array(y), np.array(timestamps)
|
||||||
|
|
||||||
|
def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
|
||||||
|
"""
|
||||||
|
Generate and save a training dataset for neural network models.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeframes (list): List of timeframes to use
|
||||||
|
n_candles (int): Number of candles to fetch for each timeframe
|
||||||
|
window_size (int): Size of the sliding window for feature creation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Dictionary of dataset file paths
|
||||||
|
"""
|
||||||
|
if timeframes is None:
|
||||||
|
timeframes = self.timeframes
|
||||||
|
|
||||||
|
# Prepare inputs
|
||||||
|
X, y, timestamps = self.prepare_nn_input(timeframes, n_candles, window_size)
|
||||||
|
|
||||||
|
if X is None or y is None:
|
||||||
|
logger.error("Failed to prepare input data for dataset")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Prepare output paths
|
||||||
|
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
dataset_name = f"{self.symbol.replace('/', '_')}_{'_'.join(timeframes)}_{timestamp_str}"
|
||||||
|
|
||||||
|
X_path = os.path.join(self.data_dir, f"{dataset_name}_X.npy")
|
||||||
|
y_path = os.path.join(self.data_dir, f"{dataset_name}_y.npy")
|
||||||
|
timestamps_path = os.path.join(self.data_dir, f"{dataset_name}_timestamps.npy")
|
||||||
|
metadata_path = os.path.join(self.data_dir, f"{dataset_name}_metadata.json")
|
||||||
|
|
||||||
|
# Save arrays
|
||||||
|
np.save(X_path, X)
|
||||||
|
np.save(y_path, y)
|
||||||
|
np.save(timestamps_path, timestamps)
|
||||||
|
|
||||||
|
# Save metadata
|
||||||
|
metadata = {
|
||||||
|
'symbol': self.symbol,
|
||||||
|
'timeframes': timeframes,
|
||||||
|
'window_size': window_size,
|
||||||
|
'n_samples': len(X),
|
||||||
|
'feature_shape': X.shape[1:],
|
||||||
|
'created_at': datetime.now().isoformat(),
|
||||||
|
'dataset_name': dataset_name
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(metadata_path, 'w') as f:
|
||||||
|
json.dump(metadata, f, indent=2)
|
||||||
|
|
||||||
|
# Save scalers
|
||||||
|
scaler_path = os.path.join(self.data_dir, f"{dataset_name}_scalers.pkl")
|
||||||
|
with open(scaler_path, 'wb') as f:
|
||||||
|
pickle.dump(self.scalers, f)
|
||||||
|
|
||||||
|
# Return dataset info
|
||||||
|
dataset_info = {
|
||||||
|
'X_path': X_path,
|
||||||
|
'y_path': y_path,
|
||||||
|
'timestamps_path': timestamps_path,
|
||||||
|
'metadata_path': metadata_path,
|
||||||
|
'scaler_path': scaler_path
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Dataset generated and saved: {dataset_name}")
|
||||||
|
return dataset_info
|
||||||
|
|
||||||
|
def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
|
||||||
|
"""
|
||||||
|
Prepare a single input sample from the most recent data for real-time inference.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeframe (str): Timeframe to use
|
||||||
|
n_candles (int): Number of recent candles to fetch
|
||||||
|
window_size (int): Size of the sliding window
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (X, timestamp) where:
|
||||||
|
X is the input features array with shape (1, window_size, n_features)
|
||||||
|
timestamp is the timestamp of the most recent candle
|
||||||
|
"""
|
||||||
|
# Get recent data
|
||||||
|
df = self.get_historical_data(timeframe=timeframe, n_candles=n_candles, use_cache=False)
|
||||||
|
|
||||||
|
if df is None or len(df) < window_size:
|
||||||
|
logger.error(f"Not enough data for inference (need at least {window_size} candles)")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Extract features from the most recent window
|
||||||
|
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].tail(window_size).values
|
||||||
|
|
||||||
|
# Scale the data
|
||||||
|
if timeframe in self.scalers:
|
||||||
|
# Use existing scaler
|
||||||
|
scaler = self.scalers[timeframe]
|
||||||
|
else:
|
||||||
|
# Create new scaler
|
||||||
|
scaler = MinMaxScaler()
|
||||||
|
# Fit on all available data
|
||||||
|
all_data = df[['open', 'high', 'low', 'close', 'volume']].values
|
||||||
|
scaler.fit(all_data)
|
||||||
|
self.scalers[timeframe] = scaler
|
||||||
|
|
||||||
|
ohlcv_scaled = scaler.transform(ohlcv)
|
||||||
|
|
||||||
|
# Reshape to (1, window_size, n_features)
|
||||||
|
X = np.array([ohlcv_scaled])
|
||||||
|
|
||||||
|
# Get timestamp of the most recent candle
|
||||||
|
timestamp = df['timestamp'].iloc[-1]
|
||||||
|
|
||||||
|
return X, timestamp
|
232
run_nn.py
Normal file
232
run_nn.py
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Neural Network Training Runner Script
|
||||||
|
|
||||||
|
This script runs the Neural Network Trading System with the existing conda environment.
|
||||||
|
It detects which deep learning framework is available (TensorFlow or PyTorch) and
|
||||||
|
adjusts the implementation accordingly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
logger = logging.getLogger('nn_runner')
|
||||||
|
|
||||||
|
def detect_framework():
|
||||||
|
"""Detect which deep learning framework is available in the environment"""
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
torch_version = torch.__version__
|
||||||
|
logger.info(f"PyTorch {torch_version} detected")
|
||||||
|
return "pytorch", torch_version
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("PyTorch not found in environment")
|
||||||
|
try:
|
||||||
|
import tensorflow as tf
|
||||||
|
tf_version = tf.__version__
|
||||||
|
logger.info(f"TensorFlow {tf_version} detected")
|
||||||
|
return "tensorflow", tf_version
|
||||||
|
except ImportError:
|
||||||
|
logger.error("Neither PyTorch nor TensorFlow is available in the environment")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def check_dependencies():
|
||||||
|
"""Check for required dependencies and return if they are met"""
|
||||||
|
required_packages = ["numpy", "pandas", "matplotlib", "scikit-learn"]
|
||||||
|
missing_packages = []
|
||||||
|
|
||||||
|
for package in required_packages:
|
||||||
|
try:
|
||||||
|
__import__(package)
|
||||||
|
except ImportError:
|
||||||
|
missing_packages.append(package)
|
||||||
|
|
||||||
|
if missing_packages:
|
||||||
|
logger.warning(f"Missing required packages: {', '.join(missing_packages)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def create_run_command(args, framework):
|
||||||
|
"""Create the command to run the neural network based on the available framework"""
|
||||||
|
cmd = ["python", "-m", "NN.main"]
|
||||||
|
|
||||||
|
# Add mode
|
||||||
|
cmd.extend(["--mode", args.mode])
|
||||||
|
|
||||||
|
# Add symbol
|
||||||
|
if args.symbol:
|
||||||
|
cmd.extend(["--symbol", args.symbol])
|
||||||
|
|
||||||
|
# Add timeframes
|
||||||
|
if args.timeframes:
|
||||||
|
cmd.extend(["--timeframes"] + args.timeframes)
|
||||||
|
|
||||||
|
# Add window size
|
||||||
|
if args.window_size:
|
||||||
|
cmd.extend(["--window-size", str(args.window_size)])
|
||||||
|
|
||||||
|
# Add output size
|
||||||
|
if args.output_size:
|
||||||
|
cmd.extend(["--output-size", str(args.output_size)])
|
||||||
|
|
||||||
|
# Add batch size
|
||||||
|
if args.batch_size:
|
||||||
|
cmd.extend(["--batch-size", str(args.batch_size)])
|
||||||
|
|
||||||
|
# Add epochs
|
||||||
|
if args.epochs:
|
||||||
|
cmd.extend(["--epochs", str(args.epochs)])
|
||||||
|
|
||||||
|
# Add model type
|
||||||
|
if args.model_type:
|
||||||
|
cmd.extend(["--model-type", args.model_type])
|
||||||
|
|
||||||
|
# Add framework-specific flag
|
||||||
|
cmd.extend(["--framework", framework])
|
||||||
|
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
"""Parse command line arguments"""
|
||||||
|
parser = argparse.ArgumentParser(description='Neural Network Trading System Runner')
|
||||||
|
|
||||||
|
parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
|
||||||
|
help='Mode to run (train, predict, realtime)')
|
||||||
|
parser.add_argument('--symbol', type=str, default='BTC/USDT',
|
||||||
|
help='Trading pair symbol')
|
||||||
|
parser.add_argument('--timeframes', type=str, nargs='+', default=['1h', '4h'],
|
||||||
|
help='Timeframes to use')
|
||||||
|
parser.add_argument('--window-size', type=int, default=20,
|
||||||
|
help='Window size for input data')
|
||||||
|
parser.add_argument('--output-size', type=int, default=3,
|
||||||
|
help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
|
||||||
|
parser.add_argument('--batch-size', type=int, default=32,
|
||||||
|
help='Batch size for training')
|
||||||
|
parser.add_argument('--epochs', type=int, default=100,
|
||||||
|
help='Number of epochs for training')
|
||||||
|
parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
|
||||||
|
help='Model type to use')
|
||||||
|
parser.add_argument('--conda-env', type=str, default='gpt-gpu',
|
||||||
|
help='Name of conda environment to use')
|
||||||
|
parser.add_argument('--no-conda', action='store_true',
|
||||||
|
help='Do not use conda environment activation')
|
||||||
|
parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
|
||||||
|
help='Deep learning framework to use (default: pytorch)')
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Parse arguments
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
# Check if we should run with conda
|
||||||
|
if not args.no_conda and args.conda_env:
|
||||||
|
# Create conda activation command
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
conda_cmd = f"conda activate {args.conda_env} && "
|
||||||
|
else:
|
||||||
|
conda_cmd = f"source activate {args.conda_env} && "
|
||||||
|
|
||||||
|
logger.info(f"Running with conda environment: {args.conda_env}")
|
||||||
|
|
||||||
|
# Create the run script
|
||||||
|
script_path = Path("run_nn_in_conda.bat" if sys.platform == 'win32' else "run_nn_in_conda.sh")
|
||||||
|
|
||||||
|
with open(script_path, 'w') as f:
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
f.write("@echo off\n")
|
||||||
|
f.write(f"call conda activate {args.conda_env}\n")
|
||||||
|
f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
|
||||||
|
|
||||||
|
if args.timeframes:
|
||||||
|
f.write(f" --timeframes {' '.join(args.timeframes)}")
|
||||||
|
|
||||||
|
if args.window_size:
|
||||||
|
f.write(f" --window-size {args.window_size}")
|
||||||
|
|
||||||
|
if args.output_size:
|
||||||
|
f.write(f" --output-size {args.output_size}")
|
||||||
|
|
||||||
|
if args.batch_size:
|
||||||
|
f.write(f" --batch-size {args.batch_size}")
|
||||||
|
|
||||||
|
if args.epochs:
|
||||||
|
f.write(f" --epochs {args.epochs}")
|
||||||
|
|
||||||
|
if args.model_type:
|
||||||
|
f.write(f" --model-type {args.model_type}")
|
||||||
|
else:
|
||||||
|
f.write("#!/bin/bash\n")
|
||||||
|
f.write(f"source activate {args.conda_env}\n")
|
||||||
|
f.write(f"python -m NN.main --mode {args.mode} --symbol {args.symbol}")
|
||||||
|
|
||||||
|
if args.timeframes:
|
||||||
|
f.write(f" --timeframes {' '.join(args.timeframes)}")
|
||||||
|
|
||||||
|
if args.window_size:
|
||||||
|
f.write(f" --window-size {args.window_size}")
|
||||||
|
|
||||||
|
if args.output_size:
|
||||||
|
f.write(f" --output-size {args.output_size}")
|
||||||
|
|
||||||
|
if args.batch_size:
|
||||||
|
f.write(f" --batch-size {args.batch_size}")
|
||||||
|
|
||||||
|
if args.epochs:
|
||||||
|
f.write(f" --epochs {args.epochs}")
|
||||||
|
|
||||||
|
if args.model_type:
|
||||||
|
f.write(f" --model-type {args.model_type}")
|
||||||
|
|
||||||
|
# Make script executable on Unix
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
os.chmod(script_path, 0o755)
|
||||||
|
|
||||||
|
# Run the script
|
||||||
|
logger.info(f"Created script: {script_path}")
|
||||||
|
logger.info("Run this script to execute the neural network with the conda environment")
|
||||||
|
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
print("\nTo run the neural network, execute the following command:")
|
||||||
|
print(f" {script_path}")
|
||||||
|
else:
|
||||||
|
print("\nTo run the neural network, execute the following command:")
|
||||||
|
print(f" ./{script_path}")
|
||||||
|
else:
|
||||||
|
# Run directly without conda
|
||||||
|
# First detect available framework
|
||||||
|
framework, version = detect_framework()
|
||||||
|
|
||||||
|
if framework is None:
|
||||||
|
logger.error("Cannot run Neural Network - no deep learning framework available")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check dependencies
|
||||||
|
if not check_dependencies():
|
||||||
|
logger.error("Missing required dependencies - please install them first")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create command
|
||||||
|
cmd = create_run_command(args, framework)
|
||||||
|
|
||||||
|
# Run command
|
||||||
|
logger.info(f"Running command: {' '.join(cmd)}")
|
||||||
|
try:
|
||||||
|
subprocess.run(cmd, check=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error(f"Error running neural network: {str(e)}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error: {str(e)}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
3
run_nn_in_conda.bat
Normal file
3
run_nn_in_conda.bat
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
@echo off
|
||||||
|
call conda activate gpt-gpu
|
||||||
|
python -m NN.main --mode train --symbol BTC/USDT --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs 100 --model-type cnn --framework pytorch
|
50
run_pytorch_nn.bat
Normal file
50
run_pytorch_nn.bat
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
@echo off
|
||||||
|
echo ============================================================
|
||||||
|
echo Neural Network Trading System - PyTorch Implementation
|
||||||
|
echo ============================================================
|
||||||
|
|
||||||
|
call conda activate gpt-gpu
|
||||||
|
|
||||||
|
REM Parse command-line arguments
|
||||||
|
set MODE=train
|
||||||
|
set MODEL_TYPE=cnn
|
||||||
|
set SYMBOL=BTC/USDT
|
||||||
|
set EPOCHS=100
|
||||||
|
|
||||||
|
:parse
|
||||||
|
if "%~1"=="" goto endparse
|
||||||
|
if /i "%~1"=="--mode" (
|
||||||
|
set MODE=%~2
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
goto parse
|
||||||
|
)
|
||||||
|
if /i "%~1"=="--model" (
|
||||||
|
set MODEL_TYPE=%~2
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
goto parse
|
||||||
|
)
|
||||||
|
if /i "%~1"=="--symbol" (
|
||||||
|
set SYMBOL=%~2
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
goto parse
|
||||||
|
)
|
||||||
|
if /i "%~1"=="--epochs" (
|
||||||
|
set EPOCHS=%~2
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
goto parse
|
||||||
|
)
|
||||||
|
shift
|
||||||
|
goto parse
|
||||||
|
:endparse
|
||||||
|
|
||||||
|
echo Running Neural Network in %MODE% mode with %MODEL_TYPE% model for %SYMBOL% for %EPOCHS% epochs
|
||||||
|
|
||||||
|
python -m NN.main --mode %MODE% --symbol %SYMBOL% --timeframes 1h 4h --window-size 20 --output-size 3 --batch-size 32 --epochs %EPOCHS% --model-type %MODEL_TYPE% --framework pytorch
|
||||||
|
|
||||||
|
echo ============================================================
|
||||||
|
echo Run completed.
|
||||||
|
echo ============================================================
|
Loading…
x
Reference in New Issue
Block a user