This commit is contained in:
Dobromir Popov
2025-03-29 04:09:03 +02:00
parent 43803caaf1
commit 8b3db10a85
3 changed files with 307 additions and 267 deletions

View File

@ -11,6 +11,7 @@ import logging
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import math
import torch
import torch.nn as nn
@ -24,79 +25,84 @@ logger = logging.getLogger(__name__)
class CNNPyTorch(nn.Module):
"""PyTorch CNN model for time series analysis"""
def __init__(self, input_shape, output_size=5):
def __init__(self, input_shape, output_size=3):
"""
Initialize the enhanced CNN model.
Initialize the CNN model.
Args:
input_shape (tuple): Shape of input data (window_size, features)
output_size (int): Always 5 for our trading signals
output_size (int): Size of the output (3 for BUY/HOLD/SELL)
"""
super(CNNPyTorch, self).__init__()
window_size, num_features = input_shape
kernel_size = 5
kernel_size = min(5, window_size) # Ensure kernel size doesn't exceed window size
dropout_rate = 0.3
# Enhanced CNN Architecture
# Calculate initial channel size based on number of features
initial_channels = max(32, num_features * 2) # Scale channels with features
# CNN Architecture
self.conv_layers = nn.Sequential(
# Block 1
nn.Conv1d(num_features, 64, kernel_size, padding='same'),
nn.BatchNorm1d(64),
nn.Conv1d(num_features, initial_channels, kernel_size, padding='same'),
nn.BatchNorm1d(initial_channels),
nn.ReLU(),
nn.Dropout(dropout_rate),
# Block 2
nn.Conv1d(64, 128, kernel_size, padding='same'),
nn.BatchNorm1d(128),
nn.Conv1d(initial_channels, initial_channels * 2, kernel_size, padding='same'),
nn.BatchNorm1d(initial_channels * 2),
nn.ReLU(),
nn.MaxPool1d(2),
nn.Dropout(dropout_rate),
# Block 3
nn.Conv1d(128, 256, kernel_size, padding='same'),
nn.BatchNorm1d(256),
nn.Conv1d(initial_channels * 2, initial_channels * 4, kernel_size, padding='same'),
nn.BatchNorm1d(initial_channels * 4),
nn.ReLU(),
nn.Dropout(dropout_rate),
# Block 4
nn.Conv1d(256, 512, kernel_size, padding='same'),
nn.BatchNorm1d(512),
nn.Conv1d(initial_channels * 4, initial_channels * 8, kernel_size, padding='same'),
nn.BatchNorm1d(initial_channels * 8),
nn.ReLU(),
nn.MaxPool1d(2)
nn.MaxPool1d(2),
nn.Dropout(dropout_rate)
)
# Calculate flattened size after conv and pooling
conv_output_size = 512 * (window_size // 4)
conv_output_size = (initial_channels * 8) * (window_size // 4)
# Dense layers with scaled sizes
dense_size = min(2048, conv_output_size) # Cap dense layer size
# Enhanced dense layers
self.dense_block = nn.Sequential(
nn.Flatten(),
nn.Linear(conv_output_size, 512),
nn.BatchNorm1d(512),
nn.Linear(conv_output_size, dense_size),
nn.BatchNorm1d(dense_size),
nn.ReLU(),
nn.Dropout(dropout_rate),
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.Linear(dense_size, dense_size // 2),
nn.BatchNorm1d(dense_size // 2),
nn.ReLU(),
nn.Dropout(dropout_rate),
nn.Linear(256, 128),
nn.BatchNorm1d(128),
nn.Linear(dense_size // 2, dense_size // 4),
nn.BatchNorm1d(dense_size // 4),
nn.ReLU(),
nn.Dropout(dropout_rate),
nn.Linear(128, output_size)
nn.Linear(dense_size // 4, output_size)
)
# Activation based on output size
if output_size == 1:
self.activation = nn.Sigmoid() # Binary classification or regression
elif output_size > 1:
self.activation = nn.Softmax(dim=1) # Multi-class classification
else:
self.activation = nn.Identity() # No activation
# Activation for output
self.activation = nn.Softmax(dim=1)
def forward(self, x):
"""
Forward pass through enhanced network.
Forward pass through the network.
Args:
x: Input tensor of shape [batch_size, window_size, features]
@ -107,14 +113,16 @@ class CNNPyTorch(nn.Module):
# Transpose for conv1d: [batch, features, window]
x_t = x.transpose(1, 2)
# Process through all CNN layers
# Process through CNN layers
conv_out = self.conv_layers(x_t)
# Process through dense layers
output = self.dense_block(conv_out)
dense_out = self.dense_block(conv_out)
return self.activation(output)
# Apply activation
output = self.activation(dense_out)
return output
class CNNModelPyTorch:
"""
@ -124,14 +132,14 @@ class CNNModelPyTorch:
predictions with the CNN model.
"""
def __init__(self, window_size, num_features, output_size=5, timeframes=None):
def __init__(self, window_size, num_features, output_size=3, timeframes=None):
"""
Initialize the CNN model.
Args:
window_size (int): Size of the input window
num_features (int): Number of features in the input data
output_size (int): Size of the output (1 for regression, 3 for classification)
output_size (int): Size of the output (default: 3 for BUY/HOLD/SELL)
timeframes (list): List of timeframes used (for logging)
"""
# Action tracking
@ -171,27 +179,23 @@ class CNNModelPyTorch:
output_size=self.output_size
).to(self.device)
# Initialize optimizer
# Initialize optimizer with learning rate schedule
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
self.optimizer, mode='max', factor=0.5, patience=10, verbose=True
)
# Initialize loss function based on output size
if self.output_size == 1:
self.criterion = nn.BCELoss() # Binary classification
elif self.output_size > 1:
self.criterion = nn.CrossEntropyLoss() # Multi-class classification
else:
self.criterion = nn.MSELoss() # Regression
# Initialize loss function with class weights
class_weights = torch.tensor([1.0, 0.5, 1.0]).to(self.device) # Lower weight for HOLD
self.criterion = nn.CrossEntropyLoss(weight=class_weights)
logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
def train_epoch(self, X_train, y_train, batch_size=32):
def train_epoch(self, X_train, y_train, future_prices=None, batch_size=32):
"""Train for one epoch and return loss and accuracy"""
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
if self.output_size == 1:
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
else:
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
@ -210,40 +214,44 @@ class CNNModelPyTorch:
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
loss = self.criterion(outputs, targets)
# Backward pass and optimize
loss.backward()
# Clip gradients to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
self.optimizer.step()
# Statistics
running_loss += loss.item()
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
epoch_loss = running_loss / len(train_loader)
epoch_acc = correct / total if total > 0 else 0
return epoch_loss, epoch_acc
# Update learning rate scheduler
self.scheduler.step(epoch_acc)
# To maintain compatibility with the updated training code, we'll return 3 values
# But the price_loss will be zero since we're not using that in this model
return epoch_loss, 0.0, epoch_acc
def evaluate(self, X_val, y_val):
def evaluate(self, X_val, y_val, future_prices=None):
"""Evaluate on validation data and return loss and accuracy"""
# Convert to PyTorch tensors
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
if self.output_size == 1:
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
else:
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
# Create DataLoader
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32)
self.model.eval()
val_loss = 0.0
running_loss = 0.0
correct = 0
total = 0
@ -253,20 +261,20 @@ class CNNModelPyTorch:
outputs = self.model(inputs)
# Calculate loss
if self.output_size == 1:
loss = self.criterion(outputs, targets.unsqueeze(1))
else:
loss = self.criterion(outputs, targets)
val_loss += loss.item()
loss = self.criterion(outputs, targets)
running_loss += loss.item()
# Calculate accuracy
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
_, predicted = torch.max(outputs, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
return val_loss / len(val_loader), correct / total if total > 0 else 0
val_loss = running_loss / len(val_loader)
val_acc = correct / total if total > 0 else 0
# To maintain compatibility with the updated training code, we'll return 3 values
# But the price_loss will be zero since we're not using that in this model
return val_loss, 0.0, val_acc
def predict(self, X):
"""Make predictions on input data"""
@ -275,15 +283,13 @@ class CNNModelPyTorch:
with torch.no_grad():
outputs = self.model(X_tensor)
if self.output_size > 1:
_, predicted = torch.max(outputs, 1)
return predicted.cpu().numpy()
else:
return outputs.cpu().numpy()
# To maintain compatibility with the transformer model, return the action probs
# And a dummy price prediction of zeros
return outputs.cpu().numpy(), np.zeros((len(X), 1))
def predict_next_candles(self, X, n_candles=3):
"""
Predict the next n candles for each timeframe.
Predict the next n candles.
Args:
X: Input data of shape [batch_size, window_size, features]
@ -296,33 +302,14 @@ class CNNModelPyTorch:
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
with torch.no_grad():
# Get the last window of data
last_window = X_tensor[-1:] # [1, window_size, features]
# Get predictions for the input window
action_probs = self.model(X_tensor)
# Initialize predictions
# For compatibility, we'll return a dictionary with the timeframes
predictions = {}
# For each timeframe, predict next n candles
for i, tf in enumerate(self.timeframes):
# Extract features for this timeframe
tf_features = last_window[:, :, i*5:(i+1)*5] # [1, window_size, 5]
# Predict next n candles
tf_predictions = []
current_window = tf_features
for _ in range(n_candles):
# Get prediction for next candle
output = self.model(current_window)
tf_predictions.append(output.cpu().numpy())
# Update window for next prediction
current_window = torch.cat([
current_window[:, 1:, :],
output.unsqueeze(1)
], dim=1)
predictions[tf] = np.concatenate(tf_predictions, axis=0)
# Simple prediction: just repeat the current prediction for next n candles
predictions[tf] = np.tile(action_probs.cpu().numpy(), (n_candles, 1))
return predictions