Merge commit 'd49a473ed6f4aef55bfdd47d6370e53582be6b7b' into cleanup

This commit is contained in:
Dobromir Popov
2025-10-01 00:32:19 +03:00
353 changed files with 81004 additions and 35899 deletions

View File

@@ -1,21 +0,0 @@
"""
Neural Network Models
====================
This package contains the neural network models used in the trading system:
- CNN Model: Deep convolutional neural network for feature extraction
- DQN Agent: Deep Q-Network for reinforcement learning
- COB RL Model: Specialized RL model for order book data
- Advanced Transformer: High-performance transformer for trading
PyTorch implementation only.
"""
from NN.models.cnn_model import EnhancedCNNModel as CNNModel
from NN.models.dqn_agent import DQNAgent
from NN.models.cob_rl_model import MassiveRLNetwork, COBRLModelInterface
from NN.models.advanced_transformer_trading import AdvancedTradingTransformer, TradingTransformerConfig
from NN.models.model_interfaces import ModelInterface, CNNModelInterface, RLAgentInterface, ExtremaTrainerInterface
__all__ = ['CNNModel', 'DQNAgent', 'MassiveRLNetwork', 'COBRLModelInterface', 'AdvancedTradingTransformer', 'TradingTransformerConfig',
'ModelInterface', 'CNNModelInterface', 'RLAgentInterface', 'ExtremaTrainerInterface']

View File

@@ -267,7 +267,17 @@ class COBRLModelInterface(ModelInterface):
logger.info(f"COB RL Model Interface initialized on {self.device}")
<<<<<<< HEAD
def predict(self, cob_features) -> Dict[str, Any]:
=======
def to(self, device):
"""PyTorch-style device movement method"""
self.device = device
self.model = self.model.to(device)
return self
def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
"""Make prediction using the model"""
self.model.eval()
with torch.no_grad():

File diff suppressed because it is too large Load Diff

View File

@@ -3,6 +3,7 @@ import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import time
import logging
import torch.nn.functional as F
from typing import List, Tuple, Dict, Any, Optional, Union
@@ -80,6 +81,9 @@ class EnhancedCNN(nn.Module):
self.n_actions = n_actions
self.confidence_threshold = confidence_threshold
# Training data storage
self.training_data = []
# Calculate input dimensions
if isinstance(input_shape, (list, tuple)):
if len(input_shape) == 3: # [channels, height, width]
@@ -265,8 +269,9 @@ class EnhancedCNN(nn.Module):
nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
)
# ULTRA MASSIVE multi-timeframe price prediction heads
self.price_pred_immediate = nn.Sequential(
# ULTRA MASSIVE price direction prediction head
# Outputs single direction and confidence values
self.price_direction_head = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
@@ -275,32 +280,13 @@ class EnhancedCNN(nn.Module):
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
nn.Linear(256, 2) # [direction, confidence]
)
self.price_pred_midterm = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
self.price_pred_longterm = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
# Direction activation (tanh for -1 to 1)
self.direction_activation = nn.Tanh()
# Confidence activation (sigmoid for 0 to 1)
self.confidence_activation = nn.Sigmoid()
# ULTRA MASSIVE value prediction with ensemble approaches
self.price_pred_value = nn.Sequential(
@@ -371,21 +357,45 @@ class EnhancedCNN(nn.Module):
nn.Linear(128, 4) # Low risk, medium risk, high risk, extreme risk
)
def _memory_barrier(self, tensor: torch.Tensor) -> torch.Tensor:
"""Create a memory barrier to prevent in-place operation issues"""
return tensor.detach().clone().requires_grad_(tensor.requires_grad)
def _check_rebuild_network(self, features):
"""Check if network needs to be rebuilt for different feature dimensions"""
"""DEPRECATED: Network should have fixed architecture - no runtime rebuilding"""
if features != self.feature_dim:
logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
self.feature_dim = features
self._build_network()
# Move to device after rebuilding
self.to(self.device)
return True
logger.error(f"CRITICAL: Input feature dimension mismatch! Expected {self.feature_dim}, got {features}")
logger.error("This indicates a bug in data preprocessing - input should be fixed size!")
logger.error("Network architecture should NOT change at runtime!")
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {features}")
return False
def forward(self, x):
"""Forward pass through the ULTRA MASSIVE network"""
batch_size = x.size(0)
# Validate input dimensions to prevent zero-element tensor issues
if x.numel() == 0:
logger.error(f"Forward pass received empty tensor with shape {x.shape}")
# Return default outputs for all 5 expected values to prevent crash
default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither
default_price_pred = torch.zeros(batch_size, 1, device=x.device)
default_features = torch.zeros(batch_size, 1024, device=x.device)
default_advanced = torch.zeros(batch_size, 1, device=x.device)
return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
# Check for zero feature dimensions
if len(x.shape) > 1 and any(dim == 0 for dim in x.shape[1:]):
logger.error(f"Forward pass received tensor with zero feature dimensions: {x.shape}")
# Return default outputs for all 5 expected values to prevent crash
default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither
default_price_pred = torch.zeros(batch_size, 1, device=x.device)
default_features = torch.zeros(batch_size, 1024, device=x.device)
default_advanced = torch.zeros(batch_size, 1, device=x.device)
return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
# Process different input shapes
if len(x.shape) > 2:
# Handle 4D input [batch, timeframes, window, features] or 3D input [batch, timeframes, features]
@@ -397,10 +407,11 @@ class EnhancedCNN(nn.Module):
# Now x is 3D: [batch, timeframes, features]
x_reshaped = x
# Check if the feature dimension has changed and rebuild if necessary
if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim:
total_features = x_reshaped.size(1) * x_reshaped.size(2)
self._check_rebuild_network(total_features)
# Validate input dimensions (should be fixed)
total_features = x_reshaped.size(1) * x_reshaped.size(2)
if total_features != self.feature_dim:
logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
# Apply ultra massive convolutions
x_conv = self.conv_layers(x_reshaped)
@@ -413,9 +424,10 @@ class EnhancedCNN(nn.Module):
# For 2D input [batch, features]
x_flat = x
# Check if dimensions have changed
# Validate input dimensions (should be fixed)
if x_flat.size(1) != self.feature_dim:
self._check_rebuild_network(x_flat.size(1))
logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
# Apply ULTRA MASSIVE FC layers to get base features
features = self.fc_layers(x_flat) # [batch, 1024]
@@ -464,10 +476,14 @@ class EnhancedCNN(nn.Module):
# Extrema predictions (bottom/top/neither detection)
extrema_pred = self.extrema_head(features_refined)
# Multi-timeframe price movement predictions
price_immediate = self.price_pred_immediate(features_refined)
price_midterm = self.price_pred_midterm(features_refined)
price_longterm = self.price_pred_longterm(features_refined)
# Price direction predictions
price_direction_raw = self.price_direction_head(features_refined)
# Apply separate activations to direction and confidence
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
price_values = self.price_pred_value(features_refined)
# Additional specialized predictions for enhanced accuracy
@@ -476,38 +492,42 @@ class EnhancedCNN(nn.Module):
market_regime_pred = self.market_regime_head(features_refined)
risk_pred = self.risk_head(features_refined)
# Package all price predictions
price_predictions = {
'immediate': price_immediate,
'midterm': price_midterm,
'longterm': price_longterm,
'values': price_values
}
# Use the price direction prediction directly (already [batch, 2])
price_direction_tensor = price_direction_pred
# Package additional predictions for enhanced decision making
advanced_predictions = {
'volatility': volatility_pred,
'support_resistance': support_resistance_pred,
'market_regime': market_regime_pred,
'risk_assessment': risk_pred
}
# Package additional predictions into a single tensor (use volatility as primary)
# For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
advanced_pred_tensor = volatility_pred
return q_values, extrema_pred, price_predictions, features_refined, advanced_predictions
return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
def act(self, state, explore=True):
def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
"""Enhanced action selection with ultra massive model predictions"""
if explore and np.random.random() < 0.1: # 10% random exploration
return np.random.choice(self.n_actions)
self.eval()
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
# Accept both NumPy arrays and already-built torch tensors
if isinstance(state, torch.Tensor):
state_tensor = state.detach().to(self.device)
if state_tensor.dim() == 1:
state_tensor = state_tensor.unsqueeze(0)
else:
# Convert to tensor **directly on the target device** to avoid intermediate CPU copies
state_tensor = torch.as_tensor(state, dtype=torch.float32, device=self.device)
if state_tensor.dim() == 1:
state_tensor = state_tensor.unsqueeze(0)
with torch.no_grad():
q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor)
q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
# Process price direction predictions
if price_direction_predictions is not None:
self.process_price_direction_predictions(price_direction_predictions)
# Apply softmax to get action probabilities
action_probs = torch.softmax(q_values, dim=1)
action = torch.argmax(action_probs, dim=1).item()
action_probs_tensor = torch.softmax(q_values, dim=1)
action_idx = int(torch.argmax(action_probs_tensor, dim=1).item())
confidence = float(action_probs_tensor[0, action_idx].item()) # Confidence of the chosen action
action_probs = action_probs_tensor.squeeze(0).tolist() # Convert to list of floats for return
# Log advanced predictions for better decision making
if hasattr(self, '_log_predictions') and self._log_predictions:
@@ -537,7 +557,180 @@ class EnhancedCNN(nn.Module):
logger.info(f" Market Regime: {regime_labels[regime_class]} ({regime[regime_class]:.3f})")
logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
return action
return action_idx, confidence, action_probs
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
"""
Process price direction predictions and convert to standardized format
Args:
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
try:
if price_direction_pred is None or price_direction_pred.numel() == 0:
return {}
# Extract direction and confidence values
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
processed_directions = {
'direction': direction_value,
'confidence': confidence_value
}
# Store for later access
self.last_price_direction = processed_directions
return processed_directions
except Exception as e:
logger.error(f"Error processing price direction predictions: {e}")
return {}
def get_price_direction_vector(self) -> Dict[str, float]:
"""
Get the current price direction and confidence
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
return getattr(self, 'last_price_direction', {})
def get_price_direction_summary(self) -> Dict[str, Any]:
"""
Get a summary of price direction prediction
Returns:
Dict containing direction and confidence information
"""
try:
last_direction = getattr(self, 'last_price_direction', {})
if not last_direction:
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
direction_value = last_direction['direction']
confidence_value = last_direction['confidence']
# Convert to discrete direction
if direction_value > 0.1:
direction_label = "UP"
discrete_direction = 1
elif direction_value < -0.1:
direction_label = "DOWN"
discrete_direction = -1
else:
direction_label = "SIDEWAYS"
discrete_direction = 0
return {
'direction_value': float(direction_value),
'confidence_value': float(confidence_value),
'direction_label': direction_label,
'discrete_direction': discrete_direction,
'strength': abs(float(direction_value)),
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
}
except Exception as e:
logger.error(f"Error calculating price direction summary: {e}")
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
"""
Add training data to the model's training buffer with position-based reward enhancement
Args:
state: Input state
action: Action taken
reward: Base reward received
position_pnl: Current position P&L (0.0 if no position)
has_position: Whether we currently have an open position
"""
try:
# Enhance reward based on position status
enhanced_reward = self._calculate_position_enhanced_reward(
reward, action, position_pnl, has_position
)
self.training_data.append({
'state': state,
'action': action,
'reward': enhanced_reward,
'base_reward': reward, # Keep original reward for analysis
'position_pnl': position_pnl,
'has_position': has_position,
'timestamp': time.time()
})
# Keep only the last 1000 training samples
if len(self.training_data) > 1000:
self.training_data = self.training_data[-1000:]
except Exception as e:
logger.error(f"Error adding training data: {e}")
def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
"""
Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
Args:
base_reward: Original reward from price prediction accuracy
action: Action taken ('BUY', 'SELL', 'HOLD')
position_pnl: Current position P&L
has_position: Whether we have an open position
Returns:
Enhanced reward that incentivizes profitable behavior
"""
try:
enhanced_reward = base_reward
if has_position and position_pnl != 0.0:
# Position-based reward adjustments
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
if position_pnl > 0: # Profitable position
if action == "HOLD":
# Reward holding profitable positions (let winners run)
enhanced_reward += abs(pnl_factor) * 0.5
elif action in ["BUY", "SELL"]:
# Moderate reward for taking action on profitable positions
enhanced_reward += abs(pnl_factor) * 0.3
elif position_pnl < 0: # Losing position
if action == "HOLD":
# Penalty for holding losing positions (cut losses)
enhanced_reward -= abs(pnl_factor) * 0.8
elif action in ["BUY", "SELL"]:
# Reward for taking action to close losing positions
enhanced_reward += abs(pnl_factor) * 0.6
# Ensure reward doesn't become extreme
enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
return enhanced_reward
except Exception as e:
logger.error(f"Error calculating position-enhanced reward: {e}")
return base_reward
def save(self, path):
"""Save model weights and architecture"""

View File

@@ -1 +0,0 @@
{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}

View File

@@ -1,20 +0,0 @@
{
"supervised": {
"epochs_completed": 22650,
"best_val_pnl": 0.0,
"best_epoch": 50,
"best_win_rate": 0
},
"reinforcement": {
"episodes_completed": 0,
"best_reward": -Infinity,
"best_episode": 0,
"best_win_rate": 0
},
"hybrid": {
"iterations_completed": 453,
"best_combined_score": 0.0,
"training_started": "2025-04-09T10:30:42.510856",
"last_update": "2025-04-09T10:40:02.217840"
}
}

View File

@@ -1,326 +0,0 @@
{
"epochs_completed": 8,
"best_val_pnl": 0.0,
"best_epoch": 1,
"best_win_rate": 0.0,
"training_started": "2025-04-02T10:43:58.946682",
"last_update": "2025-04-02T10:44:10.940892",
"epochs": [
{
"epoch": 1,
"train_loss": 1.0950355529785156,
"val_loss": 1.1657923062642415,
"train_acc": 0.3255208333333333,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:01.840889",
"data_age": 2,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 2,
"train_loss": 1.0831659038861592,
"val_loss": 1.1212460199991863,
"train_acc": 0.390625,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:03.134833",
"data_age": 4,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 3,
"train_loss": 1.0740693012873332,
"val_loss": 1.0992945830027263,
"train_acc": 0.4739583333333333,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:04.425272",
"data_age": 5,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 4,
"train_loss": 1.0747728943824768,
"val_loss": 1.0821794271469116,
"train_acc": 0.4609375,
"val_acc": 0.3229166666666667,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:05.716421",
"data_age": 6,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 5,
"train_loss": 1.0489931503931682,
"val_loss": 1.0669521888097127,
"train_acc": 0.5833333333333334,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:07.007935",
"data_age": 8,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 6,
"train_loss": 1.0533669590950012,
"val_loss": 1.0505590836207073,
"train_acc": 0.5104166666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:08.296061",
"data_age": 9,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 7,
"train_loss": 1.0456886688868205,
"val_loss": 1.0351698795954387,
"train_acc": 0.5651041666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:09.607584",
"data_age": 10,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 8,
"train_loss": 1.040040671825409,
"val_loss": 1.0227736632029216,
"train_acc": 0.6119791666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:10.940892",
"data_age": 11,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
}
],
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"total_wins": {
"train": 0,
"val": 0
}
}

View File

@@ -1,192 +0,0 @@
{
"epochs_completed": 7,
"best_val_pnl": 0.002028853100759435,
"best_epoch": 6,
"best_win_rate": 0.5157894736842106,
"training_started": "2025-03-31T02:50:10.418670",
"last_update": "2025-03-31T02:50:15.227593",
"epochs": [
{
"epoch": 1,
"train_loss": 1.1206786036491394,
"val_loss": 1.0542699098587036,
"train_acc": 0.11197916666666667,
"val_acc": 0.25,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:12.881423",
"data_age": 2
},
{
"epoch": 2,
"train_loss": 1.1266120672225952,
"val_loss": 1.072133183479309,
"train_acc": 0.1171875,
"val_acc": 0.25,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:13.186840",
"data_age": 2
},
{
"epoch": 3,
"train_loss": 1.1415620843569438,
"val_loss": 1.1701548099517822,
"train_acc": 0.1015625,
"val_acc": 0.5208333333333334,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:13.442018",
"data_age": 3
},
{
"epoch": 4,
"train_loss": 1.1331567962964375,
"val_loss": 1.070081114768982,
"train_acc": 0.09375,
"val_acc": 0.22916666666666666,
"train_pnl": 0.010650217327384765,
"val_pnl": -0.0007049481907895126,
"train_win_rate": 0.49279538904899134,
"val_win_rate": 0.40625,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.9036458333333334,
"HOLD": 0.09635416666666667
},
"val": {
"BUY": 0.0,
"SELL": 0.3333333333333333,
"HOLD": 0.6666666666666666
}
},
"timestamp": "2025-03-31T02:50:13.739899",
"data_age": 3
},
{
"epoch": 5,
"train_loss": 1.10965762535731,
"val_loss": 1.0485950708389282,
"train_acc": 0.12239583333333333,
"val_acc": 0.17708333333333334,
"train_pnl": 0.011924086862580204,
"val_pnl": 0.0,
"train_win_rate": 0.5070422535211268,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.7395833333333334,
"HOLD": 0.2604166666666667
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:14.073439",
"data_age": 3
},
{
"epoch": 6,
"train_loss": 1.1272419293721516,
"val_loss": 1.084235429763794,
"train_acc": 0.1015625,
"val_acc": 0.22916666666666666,
"train_pnl": 0.014825159601390072,
"val_pnl": 0.00405770620151887,
"train_win_rate": 0.4908616187989556,
"val_win_rate": 0.5157894736842106,
"best_position_size": 2.0,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
},
"val": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
}
},
"timestamp": "2025-03-31T02:50:14.658295",
"data_age": 4
},
{
"epoch": 7,
"train_loss": 1.1171108484268188,
"val_loss": 1.0741244554519653,
"train_acc": 0.1171875,
"val_acc": 0.22916666666666666,
"train_pnl": 0.0059474696523706605,
"val_pnl": 0.00405770620151887,
"train_win_rate": 0.4838709677419355,
"val_win_rate": 0.5157894736842106,
"best_position_size": 2.0,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.7291666666666666,
"HOLD": 0.2708333333333333
},
"val": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
}
},
"timestamp": "2025-03-31T02:50:15.227593",
"data_age": 4
}
]
}

View File

@@ -0,0 +1,512 @@
"""
Standardized CNN Model for Multi-Modal Trading System
This module extends the existing EnhancedCNN to work with standardized BaseDataInput format
and provides ModelOutput for cross-model feeding.
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import logging
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
import sys
import os
# Add the project root to the path to import core modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from core.data_models import BaseDataInput, ModelOutput, create_model_output
from .enhanced_cnn import EnhancedCNN, SelfAttention, ResidualBlock
logger = logging.getLogger(__name__)
class StandardizedCNN(nn.Module):
"""
Standardized CNN Model that accepts BaseDataInput and outputs ModelOutput
Features:
- Accepts standardized BaseDataInput format
- Processes COB+OHLCV data: 300 frames (1s,1m,1h,1d) ETH + 300s 1s BTC
- Includes COB ±20 buckets and MA (1s,5s,15s,60s) of COB imbalance ±5 buckets
- Outputs BUY/SELL trading action with confidence scores
- Provides hidden states for cross-model feeding
- Integrates with checkpoint management system
"""
def __init__(self, model_name: str = "standardized_cnn_v1", confidence_threshold: float = 0.6):
"""
Initialize the standardized CNN model
Args:
model_name: Name identifier for this model instance
confidence_threshold: Minimum confidence threshold for predictions
"""
super(StandardizedCNN, self).__init__()
self.model_name = model_name
self.model_type = "cnn"
self.confidence_threshold = confidence_threshold
# Calculate expected input dimensions from BaseDataInput
self.expected_feature_dim = self._calculate_expected_features()
# Initialize the underlying enhanced CNN with calculated dimensions
self.enhanced_cnn = EnhancedCNN(
input_shape=self.expected_feature_dim,
n_actions=3, # BUY, SELL, HOLD
confidence_threshold=confidence_threshold
)
# Additional layers for processing BaseDataInput structure
self.input_processor = self._build_input_processor()
# Output processing layers
self.output_processor = self._build_output_processor()
# Optional numeric return head (predicts percent change for 1s,1m,1h,1d)
# Uses cnn_features (1024) to regress predicted returns per timeframe
self.return_head = nn.Sequential(
nn.Linear(1024, 256),
nn.ReLU(),
nn.Dropout(0.1),
nn.Linear(256, 4) # [return_1s, return_1m, return_1h, return_1d]
)
# Device management
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.to(self.device)
try:
import torch.backends.cudnn as cudnn
cudnn.benchmark = True
except Exception:
pass
logger.info(f"StandardizedCNN '{model_name}' initialized")
logger.info(f"Expected feature dimension: {self.expected_feature_dim}")
logger.info(f"Device: {self.device}")
def _calculate_expected_features(self) -> int:
"""
Calculate expected feature dimension from BaseDataInput structure
Based on actual BaseDataInput.get_feature_vector():
- OHLCV ETH: 300 frames x 4 timeframes x 5 features = 6000
- OHLCV BTC: 300 frames x 5 features = 1500
- COB features: ~184 features (actual from implementation)
- Technical indicators: 100 features (padded)
- Last predictions: 50 features (padded)
Total: ~7834 features (actual measured)
"""
return 7834 # Based on actual BaseDataInput.get_feature_vector() measurement
def _build_input_processor(self) -> nn.Module:
"""
Build input processing layers for BaseDataInput
Returns:
nn.Module: Input processing layers
"""
return nn.Sequential(
# Initial processing of raw BaseDataInput features
nn.Linear(self.expected_feature_dim, 4096),
nn.ReLU(),
nn.Dropout(0.2),
nn.BatchNorm1d(4096),
# Feature refinement
nn.Linear(4096, 2048),
nn.ReLU(),
nn.Dropout(0.2),
nn.BatchNorm1d(2048),
# Final feature extraction
nn.Linear(2048, 1024),
nn.ReLU(),
nn.Dropout(0.1)
)
def _build_output_processor(self) -> nn.Module:
"""
Build output processing layers for standardized ModelOutput
Returns:
nn.Module: Output processing layers
"""
return nn.Sequential(
# Process CNN outputs for standardized format
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.2),
# Final action prediction
nn.Linear(512, 3), # BUY, SELL, HOLD
nn.Softmax(dim=1)
)
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
"""
Forward pass through the standardized CNN
Args:
x: Input tensor from BaseDataInput.get_feature_vector()
Returns:
Tuple of (action_probabilities, hidden_states_dict)
"""
batch_size = x.size(0)
# Validate input dimensions
if x.size(1) != self.expected_feature_dim:
logger.warning(f"Input dimension mismatch: expected {self.expected_feature_dim}, got {x.size(1)}")
# Pad or truncate as needed
if x.size(1) < self.expected_feature_dim:
padding = torch.zeros(batch_size, self.expected_feature_dim - x.size(1), device=x.device)
x = torch.cat([x, padding], dim=1)
else:
x = x[:, :self.expected_feature_dim]
# Process input through input processor
processed_features = self.input_processor(x) # [batch, 1024]
# Get enhanced CNN predictions (using processed features as input)
# We need to reshape for the enhanced CNN which expects different input format
cnn_input = processed_features.unsqueeze(1) # Add sequence dimension
try:
q_values, extrema_pred, price_pred, cnn_features, advanced_pred = self.enhanced_cnn(cnn_input)
except Exception as e:
logger.warning(f"Enhanced CNN forward pass failed: {e}, using fallback")
# Fallback to direct processing
cnn_features = processed_features
q_values = torch.zeros(batch_size, 3, device=x.device)
extrema_pred = torch.zeros(batch_size, 3, device=x.device)
price_pred = torch.zeros(batch_size, 3, device=x.device)
advanced_pred = torch.zeros(batch_size, 5, device=x.device)
# Process outputs for standardized format
action_probs = self.output_processor(cnn_features) # [batch, 3]
# Predict numeric returns per timeframe from cnn_features
predicted_returns = self.return_head(cnn_features) # [batch, 4]
# Prepare hidden states for cross-model feeding
hidden_states = {
'processed_features': processed_features.detach(),
'cnn_features': cnn_features.detach(),
'q_values': q_values.detach(),
'extrema_predictions': extrema_pred.detach(),
'price_predictions': price_pred.detach(),
'advanced_predictions': advanced_pred.detach(),
'attention_weights': torch.ones(batch_size, 1, device=x.device) # Placeholder
}
return action_probs, hidden_states, predicted_returns.detach()
def predict_from_base_input(self, base_input: BaseDataInput) -> ModelOutput:
"""
Make prediction from BaseDataInput and return standardized ModelOutput
Args:
base_input: Standardized input data
Returns:
ModelOutput: Standardized model output
"""
try:
# Convert BaseDataInput to feature vector
feature_vector = base_input.get_feature_vector()
# Convert to tensor and add batch dimension
input_tensor = torch.tensor(feature_vector, dtype=torch.float32, device=self.device).unsqueeze(0)
# Set model to evaluation mode
self.eval()
with torch.no_grad():
# Forward pass
action_probs, hidden_states, predicted_returns = self.forward(input_tensor)
# Get action and confidence
action_probs_np = action_probs.squeeze(0).cpu().numpy()
action_idx = np.argmax(action_probs_np)
confidence = float(action_probs_np[action_idx])
# Map action index to action name
action_names = ['BUY', 'SELL', 'HOLD']
action = action_names[action_idx]
# Prepare predictions dictionary
predictions = {
'action': action,
'buy_probability': float(action_probs_np[0]),
'sell_probability': float(action_probs_np[1]),
'hold_probability': float(action_probs_np[2]),
'action_probabilities': action_probs_np.tolist(),
'extrema_detected': self._interpret_extrema(hidden_states.get('extrema_predictions')),
'price_direction': self._interpret_price_direction(hidden_states.get('price_predictions')),
'market_conditions': self._interpret_advanced_predictions(hidden_states.get('advanced_predictions'))
}
# Add numeric predicted returns per timeframe if available
try:
pr = predicted_returns.squeeze(0).cpu().numpy().tolist()
# Ensure length 4; if not, safely handle
if isinstance(pr, list) and len(pr) >= 4:
predictions['predicted_returns'] = pr[:4]
predictions['predicted_return_1s'] = float(pr[0])
predictions['predicted_return_1m'] = float(pr[1])
predictions['predicted_return_1h'] = float(pr[2])
predictions['predicted_return_1d'] = float(pr[3])
except Exception:
pass
# Prepare hidden states for cross-model feeding (convert tensors to numpy)
cross_model_states = {}
for key, tensor in hidden_states.items():
if isinstance(tensor, torch.Tensor):
cross_model_states[key] = tensor.squeeze(0).cpu().numpy().tolist()
else:
cross_model_states[key] = tensor
# Create metadata
metadata = {
'model_version': '1.0',
'confidence_threshold': self.confidence_threshold,
'feature_dimension': self.expected_feature_dim,
'processing_time_ms': 0, # Could add timing if needed
'input_validation': base_input.validate()
}
# Create standardized ModelOutput
model_output = ModelOutput(
model_type=self.model_type,
model_name=self.model_name,
symbol=base_input.symbol,
timestamp=datetime.now(),
confidence=confidence,
predictions=predictions,
hidden_states=cross_model_states,
metadata=metadata
)
return model_output
except Exception as e:
logger.error(f"Error in CNN prediction: {e}")
# Return default output
return self._create_default_output(base_input.symbol)
def _interpret_extrema(self, extrema_tensor: Optional[torch.Tensor]) -> str:
"""Interpret extrema predictions"""
if extrema_tensor is None:
return "unknown"
try:
extrema_probs = torch.softmax(extrema_tensor.squeeze(0), dim=0)
extrema_idx = torch.argmax(extrema_probs).item()
extrema_labels = ['bottom', 'top', 'neither']
return extrema_labels[extrema_idx]
except:
return "unknown"
def _interpret_price_direction(self, price_tensor: Optional[torch.Tensor]) -> str:
"""Interpret price direction predictions"""
if price_tensor is None:
return "unknown"
try:
price_probs = torch.softmax(price_tensor.squeeze(0), dim=0)
price_idx = torch.argmax(price_probs).item()
price_labels = ['up', 'down', 'sideways']
return price_labels[price_idx]
except:
return "unknown"
def _interpret_advanced_predictions(self, advanced_tensor: Optional[torch.Tensor]) -> Dict[str, str]:
"""Interpret advanced market predictions"""
if advanced_tensor is None:
return {"volatility": "unknown", "risk": "unknown"}
try:
# Assuming advanced predictions include volatility (5 classes)
if advanced_tensor.size(-1) >= 5:
volatility_probs = torch.softmax(advanced_tensor.squeeze(0)[:5], dim=0)
volatility_idx = torch.argmax(volatility_probs).item()
volatility_labels = ['very_low', 'low', 'medium', 'high', 'very_high']
volatility = volatility_labels[volatility_idx]
else:
volatility = "unknown"
return {
"volatility": volatility,
"risk": "medium" # Placeholder
}
except:
return {"volatility": "unknown", "risk": "unknown"}
def _create_default_output(self, symbol: str) -> ModelOutput:
"""Create default ModelOutput for error cases"""
return create_model_output(
model_type=self.model_type,
model_name=self.model_name,
symbol=symbol,
action='HOLD',
confidence=0.5,
metadata={'error': True, 'default_output': True}
)
def train_step(self, base_inputs: List[BaseDataInput], targets: List[str],
optimizer: torch.optim.Optimizer) -> float:
"""
Perform a single training step
Args:
base_inputs: List of BaseDataInput for training
targets: List of target actions ('BUY', 'SELL', 'HOLD')
optimizer: PyTorch optimizer
Returns:
float: Training loss
"""
self.train()
try:
# Convert inputs to tensors
feature_vectors = []
for base_input in base_inputs:
feature_vector = base_input.get_feature_vector()
feature_vectors.append(feature_vector)
input_tensor = torch.tensor(np.array(feature_vectors), dtype=torch.float32, device=self.device)
# Convert targets to tensor
action_to_idx = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
target_indices = [action_to_idx.get(target, 2) for target in targets]
target_tensor = torch.tensor(target_indices, dtype=torch.long, device=self.device)
# Forward pass
action_probs, _ = self.forward(input_tensor)
# Calculate loss
loss = F.cross_entropy(action_probs, target_tensor)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
return float(loss.item())
except Exception as e:
logger.error(f"Error in training step: {e}")
return float('inf')
def evaluate(self, base_inputs: List[BaseDataInput], targets: List[str]) -> Dict[str, float]:
"""
Evaluate model performance
Args:
base_inputs: List of BaseDataInput for evaluation
targets: List of target actions
Returns:
Dict containing evaluation metrics
"""
self.eval()
try:
correct = 0
total = len(base_inputs)
total_confidence = 0.0
with torch.no_grad():
for base_input, target in zip(base_inputs, targets):
model_output = self.predict_from_base_input(base_input)
predicted_action = model_output.predictions['action']
if predicted_action == target:
correct += 1
total_confidence += model_output.confidence
accuracy = correct / total if total > 0 else 0.0
avg_confidence = total_confidence / total if total > 0 else 0.0
return {
'accuracy': accuracy,
'avg_confidence': avg_confidence,
'correct_predictions': correct,
'total_predictions': total
}
except Exception as e:
logger.error(f"Error in evaluation: {e}")
return {'accuracy': 0.0, 'avg_confidence': 0.0, 'correct_predictions': 0, 'total_predictions': 0}
def save_checkpoint(self, filepath: str, metadata: Optional[Dict[str, Any]] = None):
"""
Save model checkpoint
Args:
filepath: Path to save checkpoint
metadata: Optional metadata to save with checkpoint
"""
try:
checkpoint = {
'model_state_dict': self.state_dict(),
'model_name': self.model_name,
'model_type': self.model_type,
'confidence_threshold': self.confidence_threshold,
'expected_feature_dim': self.expected_feature_dim,
'metadata': metadata or {},
'timestamp': datetime.now().isoformat()
}
torch.save(checkpoint, filepath)
logger.info(f"Checkpoint saved to {filepath}")
except Exception as e:
logger.error(f"Error saving checkpoint: {e}")
def load_checkpoint(self, filepath: str) -> bool:
"""
Load model checkpoint
Args:
filepath: Path to checkpoint file
Returns:
bool: True if loaded successfully, False otherwise
"""
try:
checkpoint = torch.load(filepath, map_location=self.device)
# Load model state
self.load_state_dict(checkpoint['model_state_dict'])
# Load configuration
self.model_name = checkpoint.get('model_name', self.model_name)
self.confidence_threshold = checkpoint.get('confidence_threshold', self.confidence_threshold)
self.expected_feature_dim = checkpoint.get('expected_feature_dim', self.expected_feature_dim)
logger.info(f"Checkpoint loaded from {filepath}")
return True
except Exception as e:
logger.error(f"Error loading checkpoint: {e}")
return False
def get_model_info(self) -> Dict[str, Any]:
"""Get model information"""
return {
'model_name': self.model_name,
'model_type': self.model_type,
'confidence_threshold': self.confidence_threshold,
'expected_feature_dim': self.expected_feature_dim,
'device': str(self.device),
'parameter_count': sum(p.numel() for p in self.parameters()),
'trainable_parameters': sum(p.numel() for p in self.parameters() if p.requires_grad)
}

View File

@@ -1,821 +0,0 @@
"""
Transformer Neural Network for timeseries analysis
This module implements a Transformer model with attention mechanisms for cryptocurrency price analysis.
It also includes a Mixture of Experts model that combines predictions from multiple models.
"""
import os
import logging
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
Input, Dense, Dropout, BatchNormalization,
Concatenate, Layer, LayerNormalization, MultiHeadAttention,
Add, GlobalAveragePooling1D, Conv1D, Reshape
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import datetime
import json
logger = logging.getLogger(__name__)
class TransformerBlock(Layer):
"""
Transformer block implementation with multi-head attention and feed-forward networks.
"""
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super(TransformerBlock, self).__init__()
self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.ffn = tf.keras.Sequential([
Dense(ff_dim, activation="relu"),
Dense(embed_dim),
])
self.layernorm1 = LayerNormalization(epsilon=1e-6)
self.layernorm2 = LayerNormalization(epsilon=1e-6)
self.dropout1 = Dropout(rate)
self.dropout2 = Dropout(rate)
def call(self, inputs, training=False):
attn_output = self.att(inputs, inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
return self.layernorm2(out1 + ffn_output)
def get_config(self):
config = super().get_config()
config.update({
'att': self.att,
'ffn': self.ffn,
'layernorm1': self.layernorm1,
'layernorm2': self.layernorm2,
'dropout1': self.dropout1,
'dropout2': self.dropout2
})
return config
class PositionalEncoding(Layer):
"""
Positional encoding layer to add position information to input embeddings.
"""
def __init__(self, position, d_model):
super(PositionalEncoding, self).__init__()
self.position = position
self.d_model = d_model
self.pos_encoding = self.positional_encoding(position, d_model)
def get_angles(self, position, i, d_model):
angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
return position * angles
def positional_encoding(self, position, d_model):
angle_rads = self.get_angles(
position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
d_model=d_model
)
# Apply sin to even indices in the array
sines = tf.math.sin(angle_rads[:, 0::2])
# Apply cos to odd indices in the array
cosines = tf.math.cos(angle_rads[:, 1::2])
pos_encoding = tf.concat([sines, cosines], axis=-1)
pos_encoding = pos_encoding[tf.newaxis, ...]
return tf.cast(pos_encoding, tf.float32)
def call(self, inputs):
return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
def get_config(self):
config = super().get_config()
config.update({
'position': self.position,
'd_model': self.d_model,
'pos_encoding': self.pos_encoding
})
return config
class TransformerModel:
"""
Transformer Neural Network for time series analysis.
This model uses self-attention mechanisms to capture relationships between
different time points in the input data.
"""
def __init__(self, ts_input_shape=(20, 5), feature_input_shape=64, output_size=1, model_dir="NN/models/saved"):
"""
Initialize the Transformer model.
Args:
ts_input_shape (tuple): Shape of time series input data (sequence_length, features)
feature_input_shape (int): Shape of additional feature input (e.g., from CNN)
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
model_dir (str): Directory to save trained models
"""
self.ts_input_shape = ts_input_shape
self.feature_input_shape = feature_input_shape
self.output_size = output_size
self.model_dir = model_dir
self.model = None
self.history = None
# Create model directory if it doesn't exist
os.makedirs(self.model_dir, exist_ok=True)
logger.info(f"Initialized Transformer model with TS input shape {ts_input_shape}, "
f"feature input shape {feature_input_shape}, and output size {output_size}")
def build_model(self, embed_dim=32, num_heads=4, ff_dim=64, num_transformer_blocks=2, dropout_rate=0.1, learning_rate=0.001):
"""
Build the Transformer model architecture.
Args:
embed_dim (int): Embedding dimension for transformer
num_heads (int): Number of attention heads
ff_dim (int): Hidden dimension of the feed forward network
num_transformer_blocks (int): Number of transformer blocks
dropout_rate (float): Dropout rate for regularization
learning_rate (float): Learning rate for Adam optimizer
Returns:
The compiled model
"""
# Time series input
ts_inputs = Input(shape=self.ts_input_shape, name="ts_input")
# Additional feature input (e.g., from CNN)
feature_inputs = Input(shape=(self.feature_input_shape,), name="feature_input")
# Process time series with transformer
# First, project the input to the embedding dimension
x = Conv1D(embed_dim, 1, activation="relu")(ts_inputs)
# Add positional encoding
x = PositionalEncoding(self.ts_input_shape[0], embed_dim)(x)
# Add transformer blocks
for _ in range(num_transformer_blocks):
x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
# Global pooling to get a single vector representation
x = GlobalAveragePooling1D()(x)
x = Dropout(dropout_rate)(x)
# Combine with additional features
combined = Concatenate()([x, feature_inputs])
# Dense layers for final classification/regression
x = Dense(64, activation="relu")(combined)
x = BatchNormalization()(x)
x = Dropout(dropout_rate)(x)
# Output layer
if self.output_size == 1:
# Binary classification (up/down)
outputs = Dense(1, activation='sigmoid', name='output')(x)
loss = 'binary_crossentropy'
metrics = ['accuracy']
elif self.output_size == 3:
# Multi-class classification (buy/hold/sell)
outputs = Dense(3, activation='softmax', name='output')(x)
loss = 'categorical_crossentropy'
metrics = ['accuracy']
else:
# Regression
outputs = Dense(self.output_size, activation='linear', name='output')(x)
loss = 'mse'
metrics = ['mae']
# Create and compile model
self.model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
# Compile with Adam optimizer
self.model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss=loss,
metrics=metrics
)
# Log model summary
self.model.summary(print_fn=lambda x: logger.info(x))
return self.model
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
callbacks=None, class_weights=None):
"""
Train the Transformer model on the provided data.
Args:
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
y (numpy.ndarray): Target labels
batch_size (int): Batch size
epochs (int): Number of epochs
validation_split (float): Fraction of data to use for validation
callbacks (list): List of Keras callbacks
class_weights (dict): Class weights for imbalanced datasets
Returns:
History object containing training metrics
"""
if self.model is None:
self.build_model()
# Default callbacks if none provided
if callbacks is None:
# Create a timestamp for model checkpoints
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
callbacks = [
EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
),
ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=1e-6
),
ModelCheckpoint(
filepath=os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5"),
monitor='val_loss',
save_best_only=True
)
]
# Check if y needs to be one-hot encoded for multi-class
if self.output_size == 3 and len(y.shape) == 1:
y = tf.keras.utils.to_categorical(y, num_classes=3)
# Train the model
logger.info(f"Training Transformer model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
self.history = self.model.fit(
[X_ts, X_features], y,
batch_size=batch_size,
epochs=epochs,
validation_split=validation_split,
callbacks=callbacks,
class_weight=class_weights,
verbose=2
)
# Save the trained model
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
model_path = os.path.join(self.model_dir, f"transformer_model_final_{timestamp}.h5")
self.model.save(model_path)
logger.info(f"Model saved to {model_path}")
# Save training history
history_path = os.path.join(self.model_dir, f"transformer_model_history_{timestamp}.json")
with open(history_path, 'w') as f:
# Convert numpy values to Python native types for JSON serialization
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
json.dump(history_dict, f, indent=2)
return self.history
def evaluate(self, X_ts, X_features, y):
"""
Evaluate the model on test data.
Args:
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
y (numpy.ndarray): Target labels
Returns:
dict: Evaluation metrics
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Convert y to one-hot encoding for multi-class
if self.output_size == 3 and len(y.shape) == 1:
y = tf.keras.utils.to_categorical(y, num_classes=3)
# Evaluate model
logger.info(f"Evaluating Transformer model on {len(X_ts)} samples")
eval_results = self.model.evaluate([X_ts, X_features], y, verbose=0)
metrics = {}
for metric, value in zip(self.model.metrics_names, eval_results):
metrics[metric] = value
logger.info(f"{metric}: {value:.4f}")
return metrics
def predict(self, X_ts, X_features=None):
"""
Make predictions on new data.
Args:
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
Returns:
tuple: (y_pred, y_proba) where:
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
y_proba is the class probability
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Ensure X_ts has the right shape
if len(X_ts.shape) == 2:
# Single sample, add batch dimension
X_ts = np.expand_dims(X_ts, axis=0)
# Ensure X_features has the right shape
if X_features is None:
# Extract features from time series data if no external features provided
X_features = self._extract_features_from_timeseries(X_ts)
elif len(X_features.shape) == 1:
# Single sample, add batch dimension
X_features = np.expand_dims(X_features, axis=0)
def _extract_features_from_timeseries(self, X_ts: np.ndarray) -> np.ndarray:
"""Extract meaningful features from time series data instead of using dummy zeros"""
try:
batch_size = X_ts.shape[0]
features = []
for i in range(batch_size):
sample = X_ts[i] # Shape: (timesteps, features)
# Extract statistical features from each feature dimension
sample_features = []
for feature_idx in range(sample.shape[1]):
feature_data = sample[:, feature_idx]
# Basic statistical features
sample_features.extend([
np.mean(feature_data), # Mean
np.std(feature_data), # Standard deviation
np.min(feature_data), # Minimum
np.max(feature_data), # Maximum
np.percentile(feature_data, 25), # 25th percentile
np.percentile(feature_data, 75), # 75th percentile
])
# Trend features
if len(feature_data) > 1:
# Linear trend (slope)
x = np.arange(len(feature_data))
slope = np.polyfit(x, feature_data, 1)[0]
sample_features.append(slope)
# Rate of change
rate_of_change = (feature_data[-1] - feature_data[0]) / feature_data[0] if feature_data[0] != 0 else 0
sample_features.append(rate_of_change)
else:
sample_features.extend([0.0, 0.0])
# Pad or truncate to expected feature size
while len(sample_features) < self.feature_input_shape:
sample_features.append(0.0)
sample_features = sample_features[:self.feature_input_shape]
features.append(sample_features)
return np.array(features, dtype=np.float32)
except Exception as e:
logger.error(f"Error extracting features from time series: {e}")
# Fallback to zeros if extraction fails
return np.zeros((X_ts.shape[0], self.feature_input_shape), dtype=np.float32)
# Get predictions
y_proba = self.model.predict([X_ts, X_features])
# Process based on output type
if self.output_size == 1:
# Binary classification
y_pred = (y_proba > 0.5).astype(int).flatten()
return y_pred, y_proba.flatten()
elif self.output_size == 3:
# Multi-class classification
y_pred = np.argmax(y_proba, axis=1)
return y_pred, y_proba
else:
# Regression
return y_proba, y_proba
def save(self, filepath=None):
"""
Save the model to disk.
Args:
filepath (str): Path to save the model
Returns:
str: Path where the model was saved
"""
if self.model is None:
raise ValueError("Model has not been built yet")
if filepath is None:
# Create a default filepath with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5")
self.model.save(filepath)
logger.info(f"Model saved to {filepath}")
return filepath
def load(self, filepath):
"""
Load a saved model from disk.
Args:
filepath (str): Path to the saved model
Returns:
The loaded model
"""
# Register custom layers
custom_objects = {
'TransformerBlock': TransformerBlock,
'PositionalEncoding': PositionalEncoding
}
self.model = load_model(filepath, custom_objects=custom_objects)
logger.info(f"Model loaded from {filepath}")
return self.model
def plot_training_history(self):
"""
Plot training history (loss and metrics).
Returns:
str: Path to the saved plot
"""
if self.history is None:
raise ValueError("Model has not been trained yet")
plt.figure(figsize=(12, 5))
# Plot loss
plt.subplot(1, 2, 1)
plt.plot(self.history.history['loss'], label='Training Loss')
if 'val_loss' in self.history.history:
plt.plot(self.history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
# Plot accuracy
plt.subplot(1, 2, 2)
if 'accuracy' in self.history.history:
plt.plot(self.history.history['accuracy'], label='Training Accuracy')
if 'val_accuracy' in self.history.history:
plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
elif 'mae' in self.history.history:
plt.plot(self.history.history['mae'], label='Training MAE')
if 'val_mae' in self.history.history:
plt.plot(self.history.history['val_mae'], label='Validation MAE')
plt.title('Model MAE')
plt.ylabel('MAE')
plt.xlabel('Epoch')
plt.legend()
plt.tight_layout()
# Save figure
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
fig_path = os.path.join(self.model_dir, f"transformer_training_history_{timestamp}.png")
plt.savefig(fig_path)
plt.close()
logger.info(f"Training history plot saved to {fig_path}")
return fig_path
class MixtureOfExpertsModel:
"""
Mixture of Experts (MoE) model.
This model combines predictions from multiple expert models (such as CNN and Transformer)
using a weighted ensemble approach.
"""
def __init__(self, output_size=1, model_dir="NN/models/saved"):
"""
Initialize the MoE model.
Args:
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
model_dir (str): Directory to save trained models
"""
self.output_size = output_size
self.model_dir = model_dir
self.model = None
self.history = None
self.experts = {}
# Create model directory if it doesn't exist
os.makedirs(self.model_dir, exist_ok=True)
logger.info(f"Initialized Mixture of Experts model with output size {output_size}")
def add_expert(self, name, model):
"""
Add an expert model to the MoE.
Args:
name (str): Name of the expert model
model: The expert model instance
Returns:
None
"""
self.experts[name] = model
logger.info(f"Added expert model '{name}' to MoE")
def build_model(self, ts_input_shape=(20, 5), expert_weights=None, learning_rate=0.001):
"""
Build the MoE model by combining expert models.
Args:
ts_input_shape (tuple): Shape of time series input data
expert_weights (dict): Weights for each expert model
learning_rate (float): Learning rate for Adam optimizer
Returns:
The compiled model
"""
# Time series input
ts_inputs = Input(shape=ts_input_shape, name="ts_input")
# Additional feature input (from CNN)
feature_inputs = Input(shape=(64,), name="feature_input") # Default size for features
# Process with each expert model
expert_outputs = []
expert_names = []
for name, expert in self.experts.items():
# Skip if expert model is not valid or doesn't have a call/predict method
if expert is None:
logger.warning(f"Expert model '{name}' is None, skipping")
continue
try:
# Different handling based on model type
if name == 'cnn':
# CNN model takes only time series input
expert_output = expert(ts_inputs)
expert_outputs.append(expert_output)
expert_names.append(name)
elif name == 'transformer':
# Transformer model takes both time series and feature inputs
expert_output = expert([ts_inputs, feature_inputs])
expert_outputs.append(expert_output)
expert_names.append(name)
else:
logger.warning(f"Unknown expert model type: {name}")
except Exception as e:
logger.error(f"Error adding expert '{name}': {str(e)}")
if not expert_outputs:
logger.error("No valid expert models found")
return None
# Use expert weighting
if expert_weights is None:
# Equal weighting
weights = [1.0 / len(expert_outputs)] * len(expert_outputs)
else:
# User-provided weights
weights = [expert_weights.get(name, 1.0 / len(expert_outputs)) for name in expert_names]
# Normalize weights
weights = [w / sum(weights) for w in weights]
# Combine expert outputs using weighted average
if len(expert_outputs) == 1:
# Only one expert, use its output directly
combined_output = expert_outputs[0]
else:
# Multiple experts, compute weighted average
weighted_outputs = [output * weight for output, weight in zip(expert_outputs, weights)]
combined_output = Add()(weighted_outputs)
# Create the MoE model
moe_model = Model(inputs=[ts_inputs, feature_inputs], outputs=combined_output)
# Compile the model
if self.output_size == 1:
# Binary classification
moe_model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy']
)
elif self.output_size == 3:
# Multi-class classification for BUY/HOLD/SELL
moe_model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='categorical_crossentropy',
metrics=['accuracy']
)
else:
# Regression
moe_model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss='mse',
metrics=['mae']
)
self.model = moe_model
# Log model summary
self.model.summary(print_fn=lambda x: logger.info(x))
logger.info(f"Built MoE model with weights: {weights}")
return self.model
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
callbacks=None, class_weights=None):
"""
Train the MoE model on the provided data.
Args:
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
y (numpy.ndarray): Target labels
batch_size (int): Batch size
epochs (int): Number of epochs
validation_split (float): Fraction of data to use for validation
callbacks (list): List of Keras callbacks
class_weights (dict): Class weights for imbalanced datasets
Returns:
History object containing training metrics
"""
if self.model is None:
logger.error("MoE model has not been built yet")
return None
# Default callbacks if none provided
if callbacks is None:
# Create a timestamp for model checkpoints
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
callbacks = [
EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
),
ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=1e-6
),
ModelCheckpoint(
filepath=os.path.join(self.model_dir, f"moe_model_{timestamp}.h5"),
monitor='val_loss',
save_best_only=True
)
]
# Check if y needs to be one-hot encoded for multi-class
if self.output_size == 3 and len(y.shape) == 1:
y = tf.keras.utils.to_categorical(y, num_classes=3)
# Train the model
logger.info(f"Training MoE model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
self.history = self.model.fit(
[X_ts, X_features], y,
batch_size=batch_size,
epochs=epochs,
validation_split=validation_split,
callbacks=callbacks,
class_weight=class_weights,
verbose=2
)
# Save the trained model
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
model_path = os.path.join(self.model_dir, f"moe_model_final_{timestamp}.h5")
self.model.save(model_path)
logger.info(f"Model saved to {model_path}")
# Save training history
history_path = os.path.join(self.model_dir, f"moe_model_history_{timestamp}.json")
with open(history_path, 'w') as f:
# Convert numpy values to Python native types for JSON serialization
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
json.dump(history_dict, f, indent=2)
return self.history
def predict(self, X_ts, X_features=None):
"""
Make predictions on new data.
Args:
X_ts (numpy.ndarray): Time series input features
X_features (numpy.ndarray): Additional input features
Returns:
tuple: (y_pred, y_proba) where:
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
y_proba is the class probability
"""
if self.model is None:
raise ValueError("Model has not been built or trained yet")
# Ensure X_ts has the right shape
if len(X_ts.shape) == 2:
# Single sample, add batch dimension
X_ts = np.expand_dims(X_ts, axis=0)
# Ensure X_features has the right shape
if X_features is None:
# Create dummy features with zeros
X_features = np.zeros((X_ts.shape[0], 64)) # Default size
elif len(X_features.shape) == 1:
# Single sample, add batch dimension
X_features = np.expand_dims(X_features, axis=0)
# Get predictions
y_proba = self.model.predict([X_ts, X_features])
# Process based on output type
if self.output_size == 1:
# Binary classification
y_pred = (y_proba > 0.5).astype(int).flatten()
return y_pred, y_proba.flatten()
elif self.output_size == 3:
# Multi-class classification
y_pred = np.argmax(y_proba, axis=1)
return y_pred, y_proba
else:
# Regression
return y_proba, y_proba
def save(self, filepath=None):
"""
Save the model to disk.
Args:
filepath (str): Path to save the model
Returns:
str: Path where the model was saved
"""
if self.model is None:
raise ValueError("Model has not been built yet")
if filepath is None:
# Create a default filepath with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(self.model_dir, f"moe_model_{timestamp}.h5")
self.model.save(filepath)
logger.info(f"Model saved to {filepath}")
return filepath
def load(self, filepath):
"""
Load a saved model from disk.
Args:
filepath (str): Path to the saved model
Returns:
The loaded model
"""
# Register custom layers
custom_objects = {
'TransformerBlock': TransformerBlock,
'PositionalEncoding': PositionalEncoding
}
self.model = load_model(filepath, custom_objects=custom_objects)
logger.info(f"Model loaded from {filepath}")
return self.model
# Example usage:
if __name__ == "__main__":
# This would be a complete implementation in a real system
print("Transformer and MoE models defined, but not implemented here.")