Merge commit 'd49a473ed6f4aef55bfdd47d6370e53582be6b7b' into cleanup
This commit is contained in:
@@ -1,21 +0,0 @@
|
||||
"""
|
||||
Neural Network Models
|
||||
====================
|
||||
|
||||
This package contains the neural network models used in the trading system:
|
||||
- CNN Model: Deep convolutional neural network for feature extraction
|
||||
- DQN Agent: Deep Q-Network for reinforcement learning
|
||||
- COB RL Model: Specialized RL model for order book data
|
||||
- Advanced Transformer: High-performance transformer for trading
|
||||
|
||||
PyTorch implementation only.
|
||||
"""
|
||||
|
||||
from NN.models.cnn_model import EnhancedCNNModel as CNNModel
|
||||
from NN.models.dqn_agent import DQNAgent
|
||||
from NN.models.cob_rl_model import MassiveRLNetwork, COBRLModelInterface
|
||||
from NN.models.advanced_transformer_trading import AdvancedTradingTransformer, TradingTransformerConfig
|
||||
from NN.models.model_interfaces import ModelInterface, CNNModelInterface, RLAgentInterface, ExtremaTrainerInterface
|
||||
|
||||
__all__ = ['CNNModel', 'DQNAgent', 'MassiveRLNetwork', 'COBRLModelInterface', 'AdvancedTradingTransformer', 'TradingTransformerConfig',
|
||||
'ModelInterface', 'CNNModelInterface', 'RLAgentInterface', 'ExtremaTrainerInterface']
|
||||
@@ -267,7 +267,17 @@ class COBRLModelInterface(ModelInterface):
|
||||
|
||||
logger.info(f"COB RL Model Interface initialized on {self.device}")
|
||||
|
||||
<<<<<<< HEAD
|
||||
def predict(self, cob_features) -> Dict[str, Any]:
|
||||
=======
|
||||
def to(self, device):
|
||||
"""PyTorch-style device movement method"""
|
||||
self.device = device
|
||||
self.model = self.model.to(device)
|
||||
return self
|
||||
|
||||
def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
|
||||
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
|
||||
"""Make prediction using the model"""
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,6 +3,7 @@ import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import torch.nn.functional as F
|
||||
from typing import List, Tuple, Dict, Any, Optional, Union
|
||||
@@ -80,6 +81,9 @@ class EnhancedCNN(nn.Module):
|
||||
self.n_actions = n_actions
|
||||
self.confidence_threshold = confidence_threshold
|
||||
|
||||
# Training data storage
|
||||
self.training_data = []
|
||||
|
||||
# Calculate input dimensions
|
||||
if isinstance(input_shape, (list, tuple)):
|
||||
if len(input_shape) == 3: # [channels, height, width]
|
||||
@@ -265,8 +269,9 @@ class EnhancedCNN(nn.Module):
|
||||
nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
|
||||
)
|
||||
|
||||
# ULTRA MASSIVE multi-timeframe price prediction heads
|
||||
self.price_pred_immediate = nn.Sequential(
|
||||
# ULTRA MASSIVE price direction prediction head
|
||||
# Outputs single direction and confidence values
|
||||
self.price_direction_head = nn.Sequential(
|
||||
nn.Linear(1024, 1024), # Increased from 512
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
@@ -275,32 +280,13 @@ class EnhancedCNN(nn.Module):
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(512, 256), # Increased from 128
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 3) # Up, Down, Sideways
|
||||
nn.Linear(256, 2) # [direction, confidence]
|
||||
)
|
||||
|
||||
self.price_pred_midterm = nn.Sequential(
|
||||
nn.Linear(1024, 1024), # Increased from 512
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(1024, 512), # Increased from 256
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(512, 256), # Increased from 128
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 3) # Up, Down, Sideways
|
||||
)
|
||||
|
||||
self.price_pred_longterm = nn.Sequential(
|
||||
nn.Linear(1024, 1024), # Increased from 512
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(1024, 512), # Increased from 256
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.3),
|
||||
nn.Linear(512, 256), # Increased from 128
|
||||
nn.ReLU(),
|
||||
nn.Linear(256, 3) # Up, Down, Sideways
|
||||
)
|
||||
# Direction activation (tanh for -1 to 1)
|
||||
self.direction_activation = nn.Tanh()
|
||||
# Confidence activation (sigmoid for 0 to 1)
|
||||
self.confidence_activation = nn.Sigmoid()
|
||||
|
||||
# ULTRA MASSIVE value prediction with ensemble approaches
|
||||
self.price_pred_value = nn.Sequential(
|
||||
@@ -371,21 +357,45 @@ class EnhancedCNN(nn.Module):
|
||||
nn.Linear(128, 4) # Low risk, medium risk, high risk, extreme risk
|
||||
)
|
||||
|
||||
def _memory_barrier(self, tensor: torch.Tensor) -> torch.Tensor:
|
||||
"""Create a memory barrier to prevent in-place operation issues"""
|
||||
return tensor.detach().clone().requires_grad_(tensor.requires_grad)
|
||||
|
||||
def _check_rebuild_network(self, features):
|
||||
"""Check if network needs to be rebuilt for different feature dimensions"""
|
||||
"""DEPRECATED: Network should have fixed architecture - no runtime rebuilding"""
|
||||
if features != self.feature_dim:
|
||||
logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
|
||||
self.feature_dim = features
|
||||
self._build_network()
|
||||
# Move to device after rebuilding
|
||||
self.to(self.device)
|
||||
return True
|
||||
logger.error(f"CRITICAL: Input feature dimension mismatch! Expected {self.feature_dim}, got {features}")
|
||||
logger.error("This indicates a bug in data preprocessing - input should be fixed size!")
|
||||
logger.error("Network architecture should NOT change at runtime!")
|
||||
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {features}")
|
||||
return False
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward pass through the ULTRA MASSIVE network"""
|
||||
batch_size = x.size(0)
|
||||
|
||||
# Validate input dimensions to prevent zero-element tensor issues
|
||||
if x.numel() == 0:
|
||||
logger.error(f"Forward pass received empty tensor with shape {x.shape}")
|
||||
# Return default outputs for all 5 expected values to prevent crash
|
||||
default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
|
||||
default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither
|
||||
default_price_pred = torch.zeros(batch_size, 1, device=x.device)
|
||||
default_features = torch.zeros(batch_size, 1024, device=x.device)
|
||||
default_advanced = torch.zeros(batch_size, 1, device=x.device)
|
||||
return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
|
||||
|
||||
# Check for zero feature dimensions
|
||||
if len(x.shape) > 1 and any(dim == 0 for dim in x.shape[1:]):
|
||||
logger.error(f"Forward pass received tensor with zero feature dimensions: {x.shape}")
|
||||
# Return default outputs for all 5 expected values to prevent crash
|
||||
default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
|
||||
default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither
|
||||
default_price_pred = torch.zeros(batch_size, 1, device=x.device)
|
||||
default_features = torch.zeros(batch_size, 1024, device=x.device)
|
||||
default_advanced = torch.zeros(batch_size, 1, device=x.device)
|
||||
return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
|
||||
|
||||
# Process different input shapes
|
||||
if len(x.shape) > 2:
|
||||
# Handle 4D input [batch, timeframes, window, features] or 3D input [batch, timeframes, features]
|
||||
@@ -397,10 +407,11 @@ class EnhancedCNN(nn.Module):
|
||||
# Now x is 3D: [batch, timeframes, features]
|
||||
x_reshaped = x
|
||||
|
||||
# Check if the feature dimension has changed and rebuild if necessary
|
||||
if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim:
|
||||
total_features = x_reshaped.size(1) * x_reshaped.size(2)
|
||||
self._check_rebuild_network(total_features)
|
||||
# Validate input dimensions (should be fixed)
|
||||
total_features = x_reshaped.size(1) * x_reshaped.size(2)
|
||||
if total_features != self.feature_dim:
|
||||
logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
|
||||
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
|
||||
|
||||
# Apply ultra massive convolutions
|
||||
x_conv = self.conv_layers(x_reshaped)
|
||||
@@ -413,9 +424,10 @@ class EnhancedCNN(nn.Module):
|
||||
# For 2D input [batch, features]
|
||||
x_flat = x
|
||||
|
||||
# Check if dimensions have changed
|
||||
# Validate input dimensions (should be fixed)
|
||||
if x_flat.size(1) != self.feature_dim:
|
||||
self._check_rebuild_network(x_flat.size(1))
|
||||
logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
|
||||
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
|
||||
|
||||
# Apply ULTRA MASSIVE FC layers to get base features
|
||||
features = self.fc_layers(x_flat) # [batch, 1024]
|
||||
@@ -464,10 +476,14 @@ class EnhancedCNN(nn.Module):
|
||||
# Extrema predictions (bottom/top/neither detection)
|
||||
extrema_pred = self.extrema_head(features_refined)
|
||||
|
||||
# Multi-timeframe price movement predictions
|
||||
price_immediate = self.price_pred_immediate(features_refined)
|
||||
price_midterm = self.price_pred_midterm(features_refined)
|
||||
price_longterm = self.price_pred_longterm(features_refined)
|
||||
# Price direction predictions
|
||||
price_direction_raw = self.price_direction_head(features_refined)
|
||||
|
||||
# Apply separate activations to direction and confidence
|
||||
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
|
||||
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
|
||||
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
|
||||
|
||||
price_values = self.price_pred_value(features_refined)
|
||||
|
||||
# Additional specialized predictions for enhanced accuracy
|
||||
@@ -476,38 +492,42 @@ class EnhancedCNN(nn.Module):
|
||||
market_regime_pred = self.market_regime_head(features_refined)
|
||||
risk_pred = self.risk_head(features_refined)
|
||||
|
||||
# Package all price predictions
|
||||
price_predictions = {
|
||||
'immediate': price_immediate,
|
||||
'midterm': price_midterm,
|
||||
'longterm': price_longterm,
|
||||
'values': price_values
|
||||
}
|
||||
# Use the price direction prediction directly (already [batch, 2])
|
||||
price_direction_tensor = price_direction_pred
|
||||
|
||||
# Package additional predictions for enhanced decision making
|
||||
advanced_predictions = {
|
||||
'volatility': volatility_pred,
|
||||
'support_resistance': support_resistance_pred,
|
||||
'market_regime': market_regime_pred,
|
||||
'risk_assessment': risk_pred
|
||||
}
|
||||
# Package additional predictions into a single tensor (use volatility as primary)
|
||||
# For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
|
||||
advanced_pred_tensor = volatility_pred
|
||||
|
||||
return q_values, extrema_pred, price_predictions, features_refined, advanced_predictions
|
||||
return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
|
||||
|
||||
def act(self, state, explore=True):
|
||||
def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
|
||||
"""Enhanced action selection with ultra massive model predictions"""
|
||||
if explore and np.random.random() < 0.1: # 10% random exploration
|
||||
return np.random.choice(self.n_actions)
|
||||
|
||||
self.eval()
|
||||
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
||||
|
||||
# Accept both NumPy arrays and already-built torch tensors
|
||||
if isinstance(state, torch.Tensor):
|
||||
state_tensor = state.detach().to(self.device)
|
||||
if state_tensor.dim() == 1:
|
||||
state_tensor = state_tensor.unsqueeze(0)
|
||||
else:
|
||||
# Convert to tensor **directly on the target device** to avoid intermediate CPU copies
|
||||
state_tensor = torch.as_tensor(state, dtype=torch.float32, device=self.device)
|
||||
if state_tensor.dim() == 1:
|
||||
state_tensor = state_tensor.unsqueeze(0)
|
||||
|
||||
with torch.no_grad():
|
||||
q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor)
|
||||
q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
|
||||
|
||||
# Process price direction predictions
|
||||
if price_direction_predictions is not None:
|
||||
self.process_price_direction_predictions(price_direction_predictions)
|
||||
|
||||
# Apply softmax to get action probabilities
|
||||
action_probs = torch.softmax(q_values, dim=1)
|
||||
action = torch.argmax(action_probs, dim=1).item()
|
||||
action_probs_tensor = torch.softmax(q_values, dim=1)
|
||||
action_idx = int(torch.argmax(action_probs_tensor, dim=1).item())
|
||||
confidence = float(action_probs_tensor[0, action_idx].item()) # Confidence of the chosen action
|
||||
action_probs = action_probs_tensor.squeeze(0).tolist() # Convert to list of floats for return
|
||||
|
||||
# Log advanced predictions for better decision making
|
||||
if hasattr(self, '_log_predictions') and self._log_predictions:
|
||||
@@ -537,7 +557,180 @@ class EnhancedCNN(nn.Module):
|
||||
logger.info(f" Market Regime: {regime_labels[regime_class]} ({regime[regime_class]:.3f})")
|
||||
logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
|
||||
|
||||
return action
|
||||
return action_idx, confidence, action_probs
|
||||
|
||||
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
|
||||
"""
|
||||
Process price direction predictions and convert to standardized format
|
||||
|
||||
Args:
|
||||
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
|
||||
|
||||
Returns:
|
||||
Dict with direction (-1 to 1) and confidence (0 to 1)
|
||||
"""
|
||||
try:
|
||||
if price_direction_pred is None or price_direction_pred.numel() == 0:
|
||||
return {}
|
||||
|
||||
# Extract direction and confidence values
|
||||
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
|
||||
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
|
||||
|
||||
processed_directions = {
|
||||
'direction': direction_value,
|
||||
'confidence': confidence_value
|
||||
}
|
||||
|
||||
# Store for later access
|
||||
self.last_price_direction = processed_directions
|
||||
|
||||
return processed_directions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing price direction predictions: {e}")
|
||||
return {}
|
||||
|
||||
def get_price_direction_vector(self) -> Dict[str, float]:
|
||||
"""
|
||||
Get the current price direction and confidence
|
||||
|
||||
Returns:
|
||||
Dict with direction (-1 to 1) and confidence (0 to 1)
|
||||
"""
|
||||
return getattr(self, 'last_price_direction', {})
|
||||
|
||||
def get_price_direction_summary(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a summary of price direction prediction
|
||||
|
||||
Returns:
|
||||
Dict containing direction and confidence information
|
||||
"""
|
||||
try:
|
||||
last_direction = getattr(self, 'last_price_direction', {})
|
||||
if not last_direction:
|
||||
return {
|
||||
'direction_value': 0.0,
|
||||
'confidence_value': 0.0,
|
||||
'direction_label': "SIDEWAYS",
|
||||
'discrete_direction': 0,
|
||||
'strength': 0.0,
|
||||
'weighted_strength': 0.0
|
||||
}
|
||||
|
||||
direction_value = last_direction['direction']
|
||||
confidence_value = last_direction['confidence']
|
||||
|
||||
# Convert to discrete direction
|
||||
if direction_value > 0.1:
|
||||
direction_label = "UP"
|
||||
discrete_direction = 1
|
||||
elif direction_value < -0.1:
|
||||
direction_label = "DOWN"
|
||||
discrete_direction = -1
|
||||
else:
|
||||
direction_label = "SIDEWAYS"
|
||||
discrete_direction = 0
|
||||
|
||||
return {
|
||||
'direction_value': float(direction_value),
|
||||
'confidence_value': float(confidence_value),
|
||||
'direction_label': direction_label,
|
||||
'discrete_direction': discrete_direction,
|
||||
'strength': abs(float(direction_value)),
|
||||
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating price direction summary: {e}")
|
||||
return {
|
||||
'direction_value': 0.0,
|
||||
'confidence_value': 0.0,
|
||||
'direction_label': "SIDEWAYS",
|
||||
'discrete_direction': 0,
|
||||
'strength': 0.0,
|
||||
'weighted_strength': 0.0
|
||||
}
|
||||
|
||||
def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
|
||||
"""
|
||||
Add training data to the model's training buffer with position-based reward enhancement
|
||||
|
||||
Args:
|
||||
state: Input state
|
||||
action: Action taken
|
||||
reward: Base reward received
|
||||
position_pnl: Current position P&L (0.0 if no position)
|
||||
has_position: Whether we currently have an open position
|
||||
"""
|
||||
try:
|
||||
# Enhance reward based on position status
|
||||
enhanced_reward = self._calculate_position_enhanced_reward(
|
||||
reward, action, position_pnl, has_position
|
||||
)
|
||||
|
||||
self.training_data.append({
|
||||
'state': state,
|
||||
'action': action,
|
||||
'reward': enhanced_reward,
|
||||
'base_reward': reward, # Keep original reward for analysis
|
||||
'position_pnl': position_pnl,
|
||||
'has_position': has_position,
|
||||
'timestamp': time.time()
|
||||
})
|
||||
|
||||
# Keep only the last 1000 training samples
|
||||
if len(self.training_data) > 1000:
|
||||
self.training_data = self.training_data[-1000:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding training data: {e}")
|
||||
|
||||
def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
|
||||
"""
|
||||
Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
|
||||
|
||||
Args:
|
||||
base_reward: Original reward from price prediction accuracy
|
||||
action: Action taken ('BUY', 'SELL', 'HOLD')
|
||||
position_pnl: Current position P&L
|
||||
has_position: Whether we have an open position
|
||||
|
||||
Returns:
|
||||
Enhanced reward that incentivizes profitable behavior
|
||||
"""
|
||||
try:
|
||||
enhanced_reward = base_reward
|
||||
|
||||
if has_position and position_pnl != 0.0:
|
||||
# Position-based reward adjustments
|
||||
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
|
||||
|
||||
if position_pnl > 0: # Profitable position
|
||||
if action == "HOLD":
|
||||
# Reward holding profitable positions (let winners run)
|
||||
enhanced_reward += abs(pnl_factor) * 0.5
|
||||
elif action in ["BUY", "SELL"]:
|
||||
# Moderate reward for taking action on profitable positions
|
||||
enhanced_reward += abs(pnl_factor) * 0.3
|
||||
|
||||
elif position_pnl < 0: # Losing position
|
||||
if action == "HOLD":
|
||||
# Penalty for holding losing positions (cut losses)
|
||||
enhanced_reward -= abs(pnl_factor) * 0.8
|
||||
elif action in ["BUY", "SELL"]:
|
||||
# Reward for taking action to close losing positions
|
||||
enhanced_reward += abs(pnl_factor) * 0.6
|
||||
|
||||
# Ensure reward doesn't become extreme
|
||||
enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
|
||||
|
||||
return enhanced_reward
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating position-enhanced reward: {e}")
|
||||
return base_reward
|
||||
|
||||
def save(self, path):
|
||||
"""Save model weights and architecture"""
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}
|
||||
@@ -1,20 +0,0 @@
|
||||
{
|
||||
"supervised": {
|
||||
"epochs_completed": 22650,
|
||||
"best_val_pnl": 0.0,
|
||||
"best_epoch": 50,
|
||||
"best_win_rate": 0
|
||||
},
|
||||
"reinforcement": {
|
||||
"episodes_completed": 0,
|
||||
"best_reward": -Infinity,
|
||||
"best_episode": 0,
|
||||
"best_win_rate": 0
|
||||
},
|
||||
"hybrid": {
|
||||
"iterations_completed": 453,
|
||||
"best_combined_score": 0.0,
|
||||
"training_started": "2025-04-09T10:30:42.510856",
|
||||
"last_update": "2025-04-09T10:40:02.217840"
|
||||
}
|
||||
}
|
||||
@@ -1,326 +0,0 @@
|
||||
{
|
||||
"epochs_completed": 8,
|
||||
"best_val_pnl": 0.0,
|
||||
"best_epoch": 1,
|
||||
"best_win_rate": 0.0,
|
||||
"training_started": "2025-04-02T10:43:58.946682",
|
||||
"last_update": "2025-04-02T10:44:10.940892",
|
||||
"epochs": [
|
||||
{
|
||||
"epoch": 1,
|
||||
"train_loss": 1.0950355529785156,
|
||||
"val_loss": 1.1657923062642415,
|
||||
"train_acc": 0.3255208333333333,
|
||||
"val_acc": 0.0,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:01.840889",
|
||||
"data_age": 2,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"epoch": 2,
|
||||
"train_loss": 1.0831659038861592,
|
||||
"val_loss": 1.1212460199991863,
|
||||
"train_acc": 0.390625,
|
||||
"val_acc": 0.0,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:03.134833",
|
||||
"data_age": 4,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"epoch": 3,
|
||||
"train_loss": 1.0740693012873332,
|
||||
"val_loss": 1.0992945830027263,
|
||||
"train_acc": 0.4739583333333333,
|
||||
"val_acc": 0.0,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:04.425272",
|
||||
"data_age": 5,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"epoch": 4,
|
||||
"train_loss": 1.0747728943824768,
|
||||
"val_loss": 1.0821794271469116,
|
||||
"train_acc": 0.4609375,
|
||||
"val_acc": 0.3229166666666667,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:05.716421",
|
||||
"data_age": 6,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"epoch": 5,
|
||||
"train_loss": 1.0489931503931682,
|
||||
"val_loss": 1.0669521888097127,
|
||||
"train_acc": 0.5833333333333334,
|
||||
"val_acc": 1.0,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:07.007935",
|
||||
"data_age": 8,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"epoch": 6,
|
||||
"train_loss": 1.0533669590950012,
|
||||
"val_loss": 1.0505590836207073,
|
||||
"train_acc": 0.5104166666666666,
|
||||
"val_acc": 1.0,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:08.296061",
|
||||
"data_age": 9,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"epoch": 7,
|
||||
"train_loss": 1.0456886688868205,
|
||||
"val_loss": 1.0351698795954387,
|
||||
"train_acc": 0.5651041666666666,
|
||||
"val_acc": 1.0,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:09.607584",
|
||||
"data_age": 10,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
},
|
||||
{
|
||||
"epoch": 8,
|
||||
"train_loss": 1.040040671825409,
|
||||
"val_loss": 1.0227736632029216,
|
||||
"train_acc": 0.6119791666666666,
|
||||
"val_acc": 1.0,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 1.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-04-02T10:44:10.940892",
|
||||
"data_age": 11,
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"overall_win_rate": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
}
|
||||
}
|
||||
],
|
||||
"cumulative_pnl": {
|
||||
"train": 0.0,
|
||||
"val": 0.0
|
||||
},
|
||||
"total_trades": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
},
|
||||
"total_wins": {
|
||||
"train": 0,
|
||||
"val": 0
|
||||
}
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
{
|
||||
"epochs_completed": 7,
|
||||
"best_val_pnl": 0.002028853100759435,
|
||||
"best_epoch": 6,
|
||||
"best_win_rate": 0.5157894736842106,
|
||||
"training_started": "2025-03-31T02:50:10.418670",
|
||||
"last_update": "2025-03-31T02:50:15.227593",
|
||||
"epochs": [
|
||||
{
|
||||
"epoch": 1,
|
||||
"train_loss": 1.1206786036491394,
|
||||
"val_loss": 1.0542699098587036,
|
||||
"train_acc": 0.11197916666666667,
|
||||
"val_acc": 0.25,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 1.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 1.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-03-31T02:50:12.881423",
|
||||
"data_age": 2
|
||||
},
|
||||
{
|
||||
"epoch": 2,
|
||||
"train_loss": 1.1266120672225952,
|
||||
"val_loss": 1.072133183479309,
|
||||
"train_acc": 0.1171875,
|
||||
"val_acc": 0.25,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 1.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 1.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-03-31T02:50:13.186840",
|
||||
"data_age": 2
|
||||
},
|
||||
{
|
||||
"epoch": 3,
|
||||
"train_loss": 1.1415620843569438,
|
||||
"val_loss": 1.1701548099517822,
|
||||
"train_acc": 0.1015625,
|
||||
"val_acc": 0.5208333333333334,
|
||||
"train_pnl": 0.0,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.0,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 1.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 1.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-03-31T02:50:13.442018",
|
||||
"data_age": 3
|
||||
},
|
||||
{
|
||||
"epoch": 4,
|
||||
"train_loss": 1.1331567962964375,
|
||||
"val_loss": 1.070081114768982,
|
||||
"train_acc": 0.09375,
|
||||
"val_acc": 0.22916666666666666,
|
||||
"train_pnl": 0.010650217327384765,
|
||||
"val_pnl": -0.0007049481907895126,
|
||||
"train_win_rate": 0.49279538904899134,
|
||||
"val_win_rate": 0.40625,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.9036458333333334,
|
||||
"HOLD": 0.09635416666666667
|
||||
},
|
||||
"val": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.3333333333333333,
|
||||
"HOLD": 0.6666666666666666
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-03-31T02:50:13.739899",
|
||||
"data_age": 3
|
||||
},
|
||||
{
|
||||
"epoch": 5,
|
||||
"train_loss": 1.10965762535731,
|
||||
"val_loss": 1.0485950708389282,
|
||||
"train_acc": 0.12239583333333333,
|
||||
"val_acc": 0.17708333333333334,
|
||||
"train_pnl": 0.011924086862580204,
|
||||
"val_pnl": 0.0,
|
||||
"train_win_rate": 0.5070422535211268,
|
||||
"val_win_rate": 0.0,
|
||||
"best_position_size": 0.1,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.7395833333333334,
|
||||
"HOLD": 0.2604166666666667
|
||||
},
|
||||
"val": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.0,
|
||||
"HOLD": 1.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-03-31T02:50:14.073439",
|
||||
"data_age": 3
|
||||
},
|
||||
{
|
||||
"epoch": 6,
|
||||
"train_loss": 1.1272419293721516,
|
||||
"val_loss": 1.084235429763794,
|
||||
"train_acc": 0.1015625,
|
||||
"val_acc": 0.22916666666666666,
|
||||
"train_pnl": 0.014825159601390072,
|
||||
"val_pnl": 0.00405770620151887,
|
||||
"train_win_rate": 0.4908616187989556,
|
||||
"val_win_rate": 0.5157894736842106,
|
||||
"best_position_size": 2.0,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 1.0,
|
||||
"HOLD": 0.0
|
||||
},
|
||||
"val": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 1.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-03-31T02:50:14.658295",
|
||||
"data_age": 4
|
||||
},
|
||||
{
|
||||
"epoch": 7,
|
||||
"train_loss": 1.1171108484268188,
|
||||
"val_loss": 1.0741244554519653,
|
||||
"train_acc": 0.1171875,
|
||||
"val_acc": 0.22916666666666666,
|
||||
"train_pnl": 0.0059474696523706605,
|
||||
"val_pnl": 0.00405770620151887,
|
||||
"train_win_rate": 0.4838709677419355,
|
||||
"val_win_rate": 0.5157894736842106,
|
||||
"best_position_size": 2.0,
|
||||
"signal_distribution": {
|
||||
"train": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 0.7291666666666666,
|
||||
"HOLD": 0.2708333333333333
|
||||
},
|
||||
"val": {
|
||||
"BUY": 0.0,
|
||||
"SELL": 1.0,
|
||||
"HOLD": 0.0
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-03-31T02:50:15.227593",
|
||||
"data_age": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
512
NN/models/standardized_cnn.py
Normal file
512
NN/models/standardized_cnn.py
Normal file
@@ -0,0 +1,512 @@
|
||||
"""
|
||||
Standardized CNN Model for Multi-Modal Trading System
|
||||
|
||||
This module extends the existing EnhancedCNN to work with standardized BaseDataInput format
|
||||
and provides ModelOutput for cross-model feeding.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the project root to the path to import core modules
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from core.data_models import BaseDataInput, ModelOutput, create_model_output
|
||||
from .enhanced_cnn import EnhancedCNN, SelfAttention, ResidualBlock
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class StandardizedCNN(nn.Module):
|
||||
"""
|
||||
Standardized CNN Model that accepts BaseDataInput and outputs ModelOutput
|
||||
|
||||
Features:
|
||||
- Accepts standardized BaseDataInput format
|
||||
- Processes COB+OHLCV data: 300 frames (1s,1m,1h,1d) ETH + 300s 1s BTC
|
||||
- Includes COB ±20 buckets and MA (1s,5s,15s,60s) of COB imbalance ±5 buckets
|
||||
- Outputs BUY/SELL trading action with confidence scores
|
||||
- Provides hidden states for cross-model feeding
|
||||
- Integrates with checkpoint management system
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: str = "standardized_cnn_v1", confidence_threshold: float = 0.6):
|
||||
"""
|
||||
Initialize the standardized CNN model
|
||||
|
||||
Args:
|
||||
model_name: Name identifier for this model instance
|
||||
confidence_threshold: Minimum confidence threshold for predictions
|
||||
"""
|
||||
super(StandardizedCNN, self).__init__()
|
||||
|
||||
self.model_name = model_name
|
||||
self.model_type = "cnn"
|
||||
self.confidence_threshold = confidence_threshold
|
||||
|
||||
# Calculate expected input dimensions from BaseDataInput
|
||||
self.expected_feature_dim = self._calculate_expected_features()
|
||||
|
||||
# Initialize the underlying enhanced CNN with calculated dimensions
|
||||
self.enhanced_cnn = EnhancedCNN(
|
||||
input_shape=self.expected_feature_dim,
|
||||
n_actions=3, # BUY, SELL, HOLD
|
||||
confidence_threshold=confidence_threshold
|
||||
)
|
||||
|
||||
# Additional layers for processing BaseDataInput structure
|
||||
self.input_processor = self._build_input_processor()
|
||||
|
||||
# Output processing layers
|
||||
self.output_processor = self._build_output_processor()
|
||||
|
||||
# Optional numeric return head (predicts percent change for 1s,1m,1h,1d)
|
||||
# Uses cnn_features (1024) to regress predicted returns per timeframe
|
||||
self.return_head = nn.Sequential(
|
||||
nn.Linear(1024, 256),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.1),
|
||||
nn.Linear(256, 4) # [return_1s, return_1m, return_1h, return_1d]
|
||||
)
|
||||
|
||||
# Device management
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
self.to(self.device)
|
||||
try:
|
||||
import torch.backends.cudnn as cudnn
|
||||
cudnn.benchmark = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(f"StandardizedCNN '{model_name}' initialized")
|
||||
logger.info(f"Expected feature dimension: {self.expected_feature_dim}")
|
||||
logger.info(f"Device: {self.device}")
|
||||
|
||||
def _calculate_expected_features(self) -> int:
|
||||
"""
|
||||
Calculate expected feature dimension from BaseDataInput structure
|
||||
|
||||
Based on actual BaseDataInput.get_feature_vector():
|
||||
- OHLCV ETH: 300 frames x 4 timeframes x 5 features = 6000
|
||||
- OHLCV BTC: 300 frames x 5 features = 1500
|
||||
- COB features: ~184 features (actual from implementation)
|
||||
- Technical indicators: 100 features (padded)
|
||||
- Last predictions: 50 features (padded)
|
||||
Total: ~7834 features (actual measured)
|
||||
"""
|
||||
return 7834 # Based on actual BaseDataInput.get_feature_vector() measurement
|
||||
|
||||
def _build_input_processor(self) -> nn.Module:
|
||||
"""
|
||||
Build input processing layers for BaseDataInput
|
||||
|
||||
Returns:
|
||||
nn.Module: Input processing layers
|
||||
"""
|
||||
return nn.Sequential(
|
||||
# Initial processing of raw BaseDataInput features
|
||||
nn.Linear(self.expected_feature_dim, 4096),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.BatchNorm1d(4096),
|
||||
|
||||
# Feature refinement
|
||||
nn.Linear(4096, 2048),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.BatchNorm1d(2048),
|
||||
|
||||
# Final feature extraction
|
||||
nn.Linear(2048, 1024),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.1)
|
||||
)
|
||||
|
||||
def _build_output_processor(self) -> nn.Module:
|
||||
"""
|
||||
Build output processing layers for standardized ModelOutput
|
||||
|
||||
Returns:
|
||||
nn.Module: Output processing layers
|
||||
"""
|
||||
return nn.Sequential(
|
||||
# Process CNN outputs for standardized format
|
||||
nn.Linear(1024, 512),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.2),
|
||||
|
||||
# Final action prediction
|
||||
nn.Linear(512, 3), # BUY, SELL, HOLD
|
||||
nn.Softmax(dim=1)
|
||||
)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
|
||||
"""
|
||||
Forward pass through the standardized CNN
|
||||
|
||||
Args:
|
||||
x: Input tensor from BaseDataInput.get_feature_vector()
|
||||
|
||||
Returns:
|
||||
Tuple of (action_probabilities, hidden_states_dict)
|
||||
"""
|
||||
batch_size = x.size(0)
|
||||
|
||||
# Validate input dimensions
|
||||
if x.size(1) != self.expected_feature_dim:
|
||||
logger.warning(f"Input dimension mismatch: expected {self.expected_feature_dim}, got {x.size(1)}")
|
||||
# Pad or truncate as needed
|
||||
if x.size(1) < self.expected_feature_dim:
|
||||
padding = torch.zeros(batch_size, self.expected_feature_dim - x.size(1), device=x.device)
|
||||
x = torch.cat([x, padding], dim=1)
|
||||
else:
|
||||
x = x[:, :self.expected_feature_dim]
|
||||
|
||||
# Process input through input processor
|
||||
processed_features = self.input_processor(x) # [batch, 1024]
|
||||
|
||||
# Get enhanced CNN predictions (using processed features as input)
|
||||
# We need to reshape for the enhanced CNN which expects different input format
|
||||
cnn_input = processed_features.unsqueeze(1) # Add sequence dimension
|
||||
|
||||
try:
|
||||
q_values, extrema_pred, price_pred, cnn_features, advanced_pred = self.enhanced_cnn(cnn_input)
|
||||
except Exception as e:
|
||||
logger.warning(f"Enhanced CNN forward pass failed: {e}, using fallback")
|
||||
# Fallback to direct processing
|
||||
cnn_features = processed_features
|
||||
q_values = torch.zeros(batch_size, 3, device=x.device)
|
||||
extrema_pred = torch.zeros(batch_size, 3, device=x.device)
|
||||
price_pred = torch.zeros(batch_size, 3, device=x.device)
|
||||
advanced_pred = torch.zeros(batch_size, 5, device=x.device)
|
||||
|
||||
# Process outputs for standardized format
|
||||
action_probs = self.output_processor(cnn_features) # [batch, 3]
|
||||
|
||||
# Predict numeric returns per timeframe from cnn_features
|
||||
predicted_returns = self.return_head(cnn_features) # [batch, 4]
|
||||
|
||||
# Prepare hidden states for cross-model feeding
|
||||
hidden_states = {
|
||||
'processed_features': processed_features.detach(),
|
||||
'cnn_features': cnn_features.detach(),
|
||||
'q_values': q_values.detach(),
|
||||
'extrema_predictions': extrema_pred.detach(),
|
||||
'price_predictions': price_pred.detach(),
|
||||
'advanced_predictions': advanced_pred.detach(),
|
||||
'attention_weights': torch.ones(batch_size, 1, device=x.device) # Placeholder
|
||||
}
|
||||
|
||||
return action_probs, hidden_states, predicted_returns.detach()
|
||||
|
||||
def predict_from_base_input(self, base_input: BaseDataInput) -> ModelOutput:
|
||||
"""
|
||||
Make prediction from BaseDataInput and return standardized ModelOutput
|
||||
|
||||
Args:
|
||||
base_input: Standardized input data
|
||||
|
||||
Returns:
|
||||
ModelOutput: Standardized model output
|
||||
"""
|
||||
try:
|
||||
# Convert BaseDataInput to feature vector
|
||||
feature_vector = base_input.get_feature_vector()
|
||||
|
||||
# Convert to tensor and add batch dimension
|
||||
input_tensor = torch.tensor(feature_vector, dtype=torch.float32, device=self.device).unsqueeze(0)
|
||||
|
||||
# Set model to evaluation mode
|
||||
self.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
# Forward pass
|
||||
action_probs, hidden_states, predicted_returns = self.forward(input_tensor)
|
||||
|
||||
# Get action and confidence
|
||||
action_probs_np = action_probs.squeeze(0).cpu().numpy()
|
||||
action_idx = np.argmax(action_probs_np)
|
||||
confidence = float(action_probs_np[action_idx])
|
||||
|
||||
# Map action index to action name
|
||||
action_names = ['BUY', 'SELL', 'HOLD']
|
||||
action = action_names[action_idx]
|
||||
|
||||
# Prepare predictions dictionary
|
||||
predictions = {
|
||||
'action': action,
|
||||
'buy_probability': float(action_probs_np[0]),
|
||||
'sell_probability': float(action_probs_np[1]),
|
||||
'hold_probability': float(action_probs_np[2]),
|
||||
'action_probabilities': action_probs_np.tolist(),
|
||||
'extrema_detected': self._interpret_extrema(hidden_states.get('extrema_predictions')),
|
||||
'price_direction': self._interpret_price_direction(hidden_states.get('price_predictions')),
|
||||
'market_conditions': self._interpret_advanced_predictions(hidden_states.get('advanced_predictions'))
|
||||
}
|
||||
|
||||
# Add numeric predicted returns per timeframe if available
|
||||
try:
|
||||
pr = predicted_returns.squeeze(0).cpu().numpy().tolist()
|
||||
# Ensure length 4; if not, safely handle
|
||||
if isinstance(pr, list) and len(pr) >= 4:
|
||||
predictions['predicted_returns'] = pr[:4]
|
||||
predictions['predicted_return_1s'] = float(pr[0])
|
||||
predictions['predicted_return_1m'] = float(pr[1])
|
||||
predictions['predicted_return_1h'] = float(pr[2])
|
||||
predictions['predicted_return_1d'] = float(pr[3])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Prepare hidden states for cross-model feeding (convert tensors to numpy)
|
||||
cross_model_states = {}
|
||||
for key, tensor in hidden_states.items():
|
||||
if isinstance(tensor, torch.Tensor):
|
||||
cross_model_states[key] = tensor.squeeze(0).cpu().numpy().tolist()
|
||||
else:
|
||||
cross_model_states[key] = tensor
|
||||
|
||||
# Create metadata
|
||||
metadata = {
|
||||
'model_version': '1.0',
|
||||
'confidence_threshold': self.confidence_threshold,
|
||||
'feature_dimension': self.expected_feature_dim,
|
||||
'processing_time_ms': 0, # Could add timing if needed
|
||||
'input_validation': base_input.validate()
|
||||
}
|
||||
|
||||
# Create standardized ModelOutput
|
||||
model_output = ModelOutput(
|
||||
model_type=self.model_type,
|
||||
model_name=self.model_name,
|
||||
symbol=base_input.symbol,
|
||||
timestamp=datetime.now(),
|
||||
confidence=confidence,
|
||||
predictions=predictions,
|
||||
hidden_states=cross_model_states,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
return model_output
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in CNN prediction: {e}")
|
||||
# Return default output
|
||||
return self._create_default_output(base_input.symbol)
|
||||
|
||||
def _interpret_extrema(self, extrema_tensor: Optional[torch.Tensor]) -> str:
|
||||
"""Interpret extrema predictions"""
|
||||
if extrema_tensor is None:
|
||||
return "unknown"
|
||||
|
||||
try:
|
||||
extrema_probs = torch.softmax(extrema_tensor.squeeze(0), dim=0)
|
||||
extrema_idx = torch.argmax(extrema_probs).item()
|
||||
extrema_labels = ['bottom', 'top', 'neither']
|
||||
return extrema_labels[extrema_idx]
|
||||
except:
|
||||
return "unknown"
|
||||
|
||||
def _interpret_price_direction(self, price_tensor: Optional[torch.Tensor]) -> str:
|
||||
"""Interpret price direction predictions"""
|
||||
if price_tensor is None:
|
||||
return "unknown"
|
||||
|
||||
try:
|
||||
price_probs = torch.softmax(price_tensor.squeeze(0), dim=0)
|
||||
price_idx = torch.argmax(price_probs).item()
|
||||
price_labels = ['up', 'down', 'sideways']
|
||||
return price_labels[price_idx]
|
||||
except:
|
||||
return "unknown"
|
||||
|
||||
def _interpret_advanced_predictions(self, advanced_tensor: Optional[torch.Tensor]) -> Dict[str, str]:
|
||||
"""Interpret advanced market predictions"""
|
||||
if advanced_tensor is None:
|
||||
return {"volatility": "unknown", "risk": "unknown"}
|
||||
|
||||
try:
|
||||
# Assuming advanced predictions include volatility (5 classes)
|
||||
if advanced_tensor.size(-1) >= 5:
|
||||
volatility_probs = torch.softmax(advanced_tensor.squeeze(0)[:5], dim=0)
|
||||
volatility_idx = torch.argmax(volatility_probs).item()
|
||||
volatility_labels = ['very_low', 'low', 'medium', 'high', 'very_high']
|
||||
volatility = volatility_labels[volatility_idx]
|
||||
else:
|
||||
volatility = "unknown"
|
||||
|
||||
return {
|
||||
"volatility": volatility,
|
||||
"risk": "medium" # Placeholder
|
||||
}
|
||||
except:
|
||||
return {"volatility": "unknown", "risk": "unknown"}
|
||||
|
||||
def _create_default_output(self, symbol: str) -> ModelOutput:
|
||||
"""Create default ModelOutput for error cases"""
|
||||
return create_model_output(
|
||||
model_type=self.model_type,
|
||||
model_name=self.model_name,
|
||||
symbol=symbol,
|
||||
action='HOLD',
|
||||
confidence=0.5,
|
||||
metadata={'error': True, 'default_output': True}
|
||||
)
|
||||
|
||||
def train_step(self, base_inputs: List[BaseDataInput], targets: List[str],
|
||||
optimizer: torch.optim.Optimizer) -> float:
|
||||
"""
|
||||
Perform a single training step
|
||||
|
||||
Args:
|
||||
base_inputs: List of BaseDataInput for training
|
||||
targets: List of target actions ('BUY', 'SELL', 'HOLD')
|
||||
optimizer: PyTorch optimizer
|
||||
|
||||
Returns:
|
||||
float: Training loss
|
||||
"""
|
||||
self.train()
|
||||
|
||||
try:
|
||||
# Convert inputs to tensors
|
||||
feature_vectors = []
|
||||
for base_input in base_inputs:
|
||||
feature_vector = base_input.get_feature_vector()
|
||||
feature_vectors.append(feature_vector)
|
||||
|
||||
input_tensor = torch.tensor(np.array(feature_vectors), dtype=torch.float32, device=self.device)
|
||||
|
||||
# Convert targets to tensor
|
||||
action_to_idx = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
|
||||
target_indices = [action_to_idx.get(target, 2) for target in targets]
|
||||
target_tensor = torch.tensor(target_indices, dtype=torch.long, device=self.device)
|
||||
|
||||
# Forward pass
|
||||
action_probs, _ = self.forward(input_tensor)
|
||||
|
||||
# Calculate loss
|
||||
loss = F.cross_entropy(action_probs, target_tensor)
|
||||
|
||||
# Backward pass
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
return float(loss.item())
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in training step: {e}")
|
||||
return float('inf')
|
||||
|
||||
def evaluate(self, base_inputs: List[BaseDataInput], targets: List[str]) -> Dict[str, float]:
|
||||
"""
|
||||
Evaluate model performance
|
||||
|
||||
Args:
|
||||
base_inputs: List of BaseDataInput for evaluation
|
||||
targets: List of target actions
|
||||
|
||||
Returns:
|
||||
Dict containing evaluation metrics
|
||||
"""
|
||||
self.eval()
|
||||
|
||||
try:
|
||||
correct = 0
|
||||
total = len(base_inputs)
|
||||
total_confidence = 0.0
|
||||
|
||||
with torch.no_grad():
|
||||
for base_input, target in zip(base_inputs, targets):
|
||||
model_output = self.predict_from_base_input(base_input)
|
||||
predicted_action = model_output.predictions['action']
|
||||
|
||||
if predicted_action == target:
|
||||
correct += 1
|
||||
|
||||
total_confidence += model_output.confidence
|
||||
|
||||
accuracy = correct / total if total > 0 else 0.0
|
||||
avg_confidence = total_confidence / total if total > 0 else 0.0
|
||||
|
||||
return {
|
||||
'accuracy': accuracy,
|
||||
'avg_confidence': avg_confidence,
|
||||
'correct_predictions': correct,
|
||||
'total_predictions': total
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in evaluation: {e}")
|
||||
return {'accuracy': 0.0, 'avg_confidence': 0.0, 'correct_predictions': 0, 'total_predictions': 0}
|
||||
|
||||
def save_checkpoint(self, filepath: str, metadata: Optional[Dict[str, Any]] = None):
|
||||
"""
|
||||
Save model checkpoint
|
||||
|
||||
Args:
|
||||
filepath: Path to save checkpoint
|
||||
metadata: Optional metadata to save with checkpoint
|
||||
"""
|
||||
try:
|
||||
checkpoint = {
|
||||
'model_state_dict': self.state_dict(),
|
||||
'model_name': self.model_name,
|
||||
'model_type': self.model_type,
|
||||
'confidence_threshold': self.confidence_threshold,
|
||||
'expected_feature_dim': self.expected_feature_dim,
|
||||
'metadata': metadata or {},
|
||||
'timestamp': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
torch.save(checkpoint, filepath)
|
||||
logger.info(f"Checkpoint saved to {filepath}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving checkpoint: {e}")
|
||||
|
||||
def load_checkpoint(self, filepath: str) -> bool:
|
||||
"""
|
||||
Load model checkpoint
|
||||
|
||||
Args:
|
||||
filepath: Path to checkpoint file
|
||||
|
||||
Returns:
|
||||
bool: True if loaded successfully, False otherwise
|
||||
"""
|
||||
try:
|
||||
checkpoint = torch.load(filepath, map_location=self.device)
|
||||
|
||||
# Load model state
|
||||
self.load_state_dict(checkpoint['model_state_dict'])
|
||||
|
||||
# Load configuration
|
||||
self.model_name = checkpoint.get('model_name', self.model_name)
|
||||
self.confidence_threshold = checkpoint.get('confidence_threshold', self.confidence_threshold)
|
||||
self.expected_feature_dim = checkpoint.get('expected_feature_dim', self.expected_feature_dim)
|
||||
|
||||
logger.info(f"Checkpoint loaded from {filepath}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading checkpoint: {e}")
|
||||
return False
|
||||
|
||||
def get_model_info(self) -> Dict[str, Any]:
|
||||
"""Get model information"""
|
||||
return {
|
||||
'model_name': self.model_name,
|
||||
'model_type': self.model_type,
|
||||
'confidence_threshold': self.confidence_threshold,
|
||||
'expected_feature_dim': self.expected_feature_dim,
|
||||
'device': str(self.device),
|
||||
'parameter_count': sum(p.numel() for p in self.parameters()),
|
||||
'trainable_parameters': sum(p.numel() for p in self.parameters() if p.requires_grad)
|
||||
}
|
||||
@@ -1,821 +0,0 @@
|
||||
"""
|
||||
Transformer Neural Network for timeseries analysis
|
||||
|
||||
This module implements a Transformer model with attention mechanisms for cryptocurrency price analysis.
|
||||
It also includes a Mixture of Experts model that combines predictions from multiple models.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model, load_model
|
||||
from tensorflow.keras.layers import (
|
||||
Input, Dense, Dropout, BatchNormalization,
|
||||
Concatenate, Layer, LayerNormalization, MultiHeadAttention,
|
||||
Add, GlobalAveragePooling1D, Conv1D, Reshape
|
||||
)
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
||||
import datetime
|
||||
import json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TransformerBlock(Layer):
|
||||
"""
|
||||
Transformer block implementation with multi-head attention and feed-forward networks.
|
||||
"""
|
||||
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
|
||||
super(TransformerBlock, self).__init__()
|
||||
self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
|
||||
self.ffn = tf.keras.Sequential([
|
||||
Dense(ff_dim, activation="relu"),
|
||||
Dense(embed_dim),
|
||||
])
|
||||
self.layernorm1 = LayerNormalization(epsilon=1e-6)
|
||||
self.layernorm2 = LayerNormalization(epsilon=1e-6)
|
||||
self.dropout1 = Dropout(rate)
|
||||
self.dropout2 = Dropout(rate)
|
||||
|
||||
def call(self, inputs, training=False):
|
||||
attn_output = self.att(inputs, inputs)
|
||||
attn_output = self.dropout1(attn_output, training=training)
|
||||
out1 = self.layernorm1(inputs + attn_output)
|
||||
ffn_output = self.ffn(out1)
|
||||
ffn_output = self.dropout2(ffn_output, training=training)
|
||||
return self.layernorm2(out1 + ffn_output)
|
||||
|
||||
def get_config(self):
|
||||
config = super().get_config()
|
||||
config.update({
|
||||
'att': self.att,
|
||||
'ffn': self.ffn,
|
||||
'layernorm1': self.layernorm1,
|
||||
'layernorm2': self.layernorm2,
|
||||
'dropout1': self.dropout1,
|
||||
'dropout2': self.dropout2
|
||||
})
|
||||
return config
|
||||
|
||||
class PositionalEncoding(Layer):
|
||||
"""
|
||||
Positional encoding layer to add position information to input embeddings.
|
||||
"""
|
||||
def __init__(self, position, d_model):
|
||||
super(PositionalEncoding, self).__init__()
|
||||
self.position = position
|
||||
self.d_model = d_model
|
||||
self.pos_encoding = self.positional_encoding(position, d_model)
|
||||
|
||||
def get_angles(self, position, i, d_model):
|
||||
angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
|
||||
return position * angles
|
||||
|
||||
def positional_encoding(self, position, d_model):
|
||||
angle_rads = self.get_angles(
|
||||
position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
|
||||
i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
|
||||
d_model=d_model
|
||||
)
|
||||
|
||||
# Apply sin to even indices in the array
|
||||
sines = tf.math.sin(angle_rads[:, 0::2])
|
||||
|
||||
# Apply cos to odd indices in the array
|
||||
cosines = tf.math.cos(angle_rads[:, 1::2])
|
||||
|
||||
pos_encoding = tf.concat([sines, cosines], axis=-1)
|
||||
pos_encoding = pos_encoding[tf.newaxis, ...]
|
||||
|
||||
return tf.cast(pos_encoding, tf.float32)
|
||||
|
||||
def call(self, inputs):
|
||||
return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]
|
||||
|
||||
def get_config(self):
|
||||
config = super().get_config()
|
||||
config.update({
|
||||
'position': self.position,
|
||||
'd_model': self.d_model,
|
||||
'pos_encoding': self.pos_encoding
|
||||
})
|
||||
return config
|
||||
|
||||
class TransformerModel:
|
||||
"""
|
||||
Transformer Neural Network for time series analysis.
|
||||
|
||||
This model uses self-attention mechanisms to capture relationships between
|
||||
different time points in the input data.
|
||||
"""
|
||||
|
||||
def __init__(self, ts_input_shape=(20, 5), feature_input_shape=64, output_size=1, model_dir="NN/models/saved"):
|
||||
"""
|
||||
Initialize the Transformer model.
|
||||
|
||||
Args:
|
||||
ts_input_shape (tuple): Shape of time series input data (sequence_length, features)
|
||||
feature_input_shape (int): Shape of additional feature input (e.g., from CNN)
|
||||
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
|
||||
model_dir (str): Directory to save trained models
|
||||
"""
|
||||
self.ts_input_shape = ts_input_shape
|
||||
self.feature_input_shape = feature_input_shape
|
||||
self.output_size = output_size
|
||||
self.model_dir = model_dir
|
||||
self.model = None
|
||||
self.history = None
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
os.makedirs(self.model_dir, exist_ok=True)
|
||||
|
||||
logger.info(f"Initialized Transformer model with TS input shape {ts_input_shape}, "
|
||||
f"feature input shape {feature_input_shape}, and output size {output_size}")
|
||||
|
||||
def build_model(self, embed_dim=32, num_heads=4, ff_dim=64, num_transformer_blocks=2, dropout_rate=0.1, learning_rate=0.001):
|
||||
"""
|
||||
Build the Transformer model architecture.
|
||||
|
||||
Args:
|
||||
embed_dim (int): Embedding dimension for transformer
|
||||
num_heads (int): Number of attention heads
|
||||
ff_dim (int): Hidden dimension of the feed forward network
|
||||
num_transformer_blocks (int): Number of transformer blocks
|
||||
dropout_rate (float): Dropout rate for regularization
|
||||
learning_rate (float): Learning rate for Adam optimizer
|
||||
|
||||
Returns:
|
||||
The compiled model
|
||||
"""
|
||||
# Time series input
|
||||
ts_inputs = Input(shape=self.ts_input_shape, name="ts_input")
|
||||
|
||||
# Additional feature input (e.g., from CNN)
|
||||
feature_inputs = Input(shape=(self.feature_input_shape,), name="feature_input")
|
||||
|
||||
# Process time series with transformer
|
||||
# First, project the input to the embedding dimension
|
||||
x = Conv1D(embed_dim, 1, activation="relu")(ts_inputs)
|
||||
|
||||
# Add positional encoding
|
||||
x = PositionalEncoding(self.ts_input_shape[0], embed_dim)(x)
|
||||
|
||||
# Add transformer blocks
|
||||
for _ in range(num_transformer_blocks):
|
||||
x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
|
||||
|
||||
# Global pooling to get a single vector representation
|
||||
x = GlobalAveragePooling1D()(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Combine with additional features
|
||||
combined = Concatenate()([x, feature_inputs])
|
||||
|
||||
# Dense layers for final classification/regression
|
||||
x = Dense(64, activation="relu")(combined)
|
||||
x = BatchNormalization()(x)
|
||||
x = Dropout(dropout_rate)(x)
|
||||
|
||||
# Output layer
|
||||
if self.output_size == 1:
|
||||
# Binary classification (up/down)
|
||||
outputs = Dense(1, activation='sigmoid', name='output')(x)
|
||||
loss = 'binary_crossentropy'
|
||||
metrics = ['accuracy']
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification (buy/hold/sell)
|
||||
outputs = Dense(3, activation='softmax', name='output')(x)
|
||||
loss = 'categorical_crossentropy'
|
||||
metrics = ['accuracy']
|
||||
else:
|
||||
# Regression
|
||||
outputs = Dense(self.output_size, activation='linear', name='output')(x)
|
||||
loss = 'mse'
|
||||
metrics = ['mae']
|
||||
|
||||
# Create and compile model
|
||||
self.model = Model(inputs=[ts_inputs, feature_inputs], outputs=outputs)
|
||||
|
||||
# Compile with Adam optimizer
|
||||
self.model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss=loss,
|
||||
metrics=metrics
|
||||
)
|
||||
|
||||
# Log model summary
|
||||
self.model.summary(print_fn=lambda x: logger.info(x))
|
||||
|
||||
return self.model
|
||||
|
||||
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
|
||||
callbacks=None, class_weights=None):
|
||||
"""
|
||||
Train the Transformer model on the provided data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
y (numpy.ndarray): Target labels
|
||||
batch_size (int): Batch size
|
||||
epochs (int): Number of epochs
|
||||
validation_split (float): Fraction of data to use for validation
|
||||
callbacks (list): List of Keras callbacks
|
||||
class_weights (dict): Class weights for imbalanced datasets
|
||||
|
||||
Returns:
|
||||
History object containing training metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
self.build_model()
|
||||
|
||||
# Default callbacks if none provided
|
||||
if callbacks is None:
|
||||
# Create a timestamp for model checkpoints
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
callbacks = [
|
||||
EarlyStopping(
|
||||
monitor='val_loss',
|
||||
patience=10,
|
||||
restore_best_weights=True
|
||||
),
|
||||
ReduceLROnPlateau(
|
||||
monitor='val_loss',
|
||||
factor=0.5,
|
||||
patience=5,
|
||||
min_lr=1e-6
|
||||
),
|
||||
ModelCheckpoint(
|
||||
filepath=os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5"),
|
||||
monitor='val_loss',
|
||||
save_best_only=True
|
||||
)
|
||||
]
|
||||
|
||||
# Check if y needs to be one-hot encoded for multi-class
|
||||
if self.output_size == 3 and len(y.shape) == 1:
|
||||
y = tf.keras.utils.to_categorical(y, num_classes=3)
|
||||
|
||||
# Train the model
|
||||
logger.info(f"Training Transformer model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
|
||||
self.history = self.model.fit(
|
||||
[X_ts, X_features], y,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
validation_split=validation_split,
|
||||
callbacks=callbacks,
|
||||
class_weight=class_weights,
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Save the trained model
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
model_path = os.path.join(self.model_dir, f"transformer_model_final_{timestamp}.h5")
|
||||
self.model.save(model_path)
|
||||
logger.info(f"Model saved to {model_path}")
|
||||
|
||||
# Save training history
|
||||
history_path = os.path.join(self.model_dir, f"transformer_model_history_{timestamp}.json")
|
||||
with open(history_path, 'w') as f:
|
||||
# Convert numpy values to Python native types for JSON serialization
|
||||
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
|
||||
json.dump(history_dict, f, indent=2)
|
||||
|
||||
return self.history
|
||||
|
||||
def evaluate(self, X_ts, X_features, y):
|
||||
"""
|
||||
Evaluate the model on test data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
y (numpy.ndarray): Target labels
|
||||
|
||||
Returns:
|
||||
dict: Evaluation metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Convert y to one-hot encoding for multi-class
|
||||
if self.output_size == 3 and len(y.shape) == 1:
|
||||
y = tf.keras.utils.to_categorical(y, num_classes=3)
|
||||
|
||||
# Evaluate model
|
||||
logger.info(f"Evaluating Transformer model on {len(X_ts)} samples")
|
||||
eval_results = self.model.evaluate([X_ts, X_features], y, verbose=0)
|
||||
|
||||
metrics = {}
|
||||
for metric, value in zip(self.model.metrics_names, eval_results):
|
||||
metrics[metric] = value
|
||||
logger.info(f"{metric}: {value:.4f}")
|
||||
|
||||
return metrics
|
||||
|
||||
def predict(self, X_ts, X_features=None):
|
||||
"""
|
||||
Make predictions on new data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
|
||||
Returns:
|
||||
tuple: (y_pred, y_proba) where:
|
||||
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
|
||||
y_proba is the class probability
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Ensure X_ts has the right shape
|
||||
if len(X_ts.shape) == 2:
|
||||
# Single sample, add batch dimension
|
||||
X_ts = np.expand_dims(X_ts, axis=0)
|
||||
|
||||
# Ensure X_features has the right shape
|
||||
if X_features is None:
|
||||
# Extract features from time series data if no external features provided
|
||||
X_features = self._extract_features_from_timeseries(X_ts)
|
||||
elif len(X_features.shape) == 1:
|
||||
# Single sample, add batch dimension
|
||||
X_features = np.expand_dims(X_features, axis=0)
|
||||
|
||||
def _extract_features_from_timeseries(self, X_ts: np.ndarray) -> np.ndarray:
|
||||
"""Extract meaningful features from time series data instead of using dummy zeros"""
|
||||
try:
|
||||
batch_size = X_ts.shape[0]
|
||||
features = []
|
||||
|
||||
for i in range(batch_size):
|
||||
sample = X_ts[i] # Shape: (timesteps, features)
|
||||
|
||||
# Extract statistical features from each feature dimension
|
||||
sample_features = []
|
||||
|
||||
for feature_idx in range(sample.shape[1]):
|
||||
feature_data = sample[:, feature_idx]
|
||||
|
||||
# Basic statistical features
|
||||
sample_features.extend([
|
||||
np.mean(feature_data), # Mean
|
||||
np.std(feature_data), # Standard deviation
|
||||
np.min(feature_data), # Minimum
|
||||
np.max(feature_data), # Maximum
|
||||
np.percentile(feature_data, 25), # 25th percentile
|
||||
np.percentile(feature_data, 75), # 75th percentile
|
||||
])
|
||||
|
||||
# Trend features
|
||||
if len(feature_data) > 1:
|
||||
# Linear trend (slope)
|
||||
x = np.arange(len(feature_data))
|
||||
slope = np.polyfit(x, feature_data, 1)[0]
|
||||
sample_features.append(slope)
|
||||
|
||||
# Rate of change
|
||||
rate_of_change = (feature_data[-1] - feature_data[0]) / feature_data[0] if feature_data[0] != 0 else 0
|
||||
sample_features.append(rate_of_change)
|
||||
else:
|
||||
sample_features.extend([0.0, 0.0])
|
||||
|
||||
# Pad or truncate to expected feature size
|
||||
while len(sample_features) < self.feature_input_shape:
|
||||
sample_features.append(0.0)
|
||||
sample_features = sample_features[:self.feature_input_shape]
|
||||
|
||||
features.append(sample_features)
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting features from time series: {e}")
|
||||
# Fallback to zeros if extraction fails
|
||||
return np.zeros((X_ts.shape[0], self.feature_input_shape), dtype=np.float32)
|
||||
|
||||
# Get predictions
|
||||
y_proba = self.model.predict([X_ts, X_features])
|
||||
|
||||
# Process based on output type
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
y_pred = (y_proba > 0.5).astype(int).flatten()
|
||||
return y_pred, y_proba.flatten()
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification
|
||||
y_pred = np.argmax(y_proba, axis=1)
|
||||
return y_pred, y_proba
|
||||
else:
|
||||
# Regression
|
||||
return y_proba, y_proba
|
||||
|
||||
def save(self, filepath=None):
|
||||
"""
|
||||
Save the model to disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to save the model
|
||||
|
||||
Returns:
|
||||
str: Path where the model was saved
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built yet")
|
||||
|
||||
if filepath is None:
|
||||
# Create a default filepath with timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filepath = os.path.join(self.model_dir, f"transformer_model_{timestamp}.h5")
|
||||
|
||||
self.model.save(filepath)
|
||||
logger.info(f"Model saved to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load a saved model from disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the saved model
|
||||
|
||||
Returns:
|
||||
The loaded model
|
||||
"""
|
||||
# Register custom layers
|
||||
custom_objects = {
|
||||
'TransformerBlock': TransformerBlock,
|
||||
'PositionalEncoding': PositionalEncoding
|
||||
}
|
||||
|
||||
self.model = load_model(filepath, custom_objects=custom_objects)
|
||||
logger.info(f"Model loaded from {filepath}")
|
||||
return self.model
|
||||
|
||||
def plot_training_history(self):
|
||||
"""
|
||||
Plot training history (loss and metrics).
|
||||
|
||||
Returns:
|
||||
str: Path to the saved plot
|
||||
"""
|
||||
if self.history is None:
|
||||
raise ValueError("Model has not been trained yet")
|
||||
|
||||
plt.figure(figsize=(12, 5))
|
||||
|
||||
# Plot loss
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.plot(self.history.history['loss'], label='Training Loss')
|
||||
if 'val_loss' in self.history.history:
|
||||
plt.plot(self.history.history['val_loss'], label='Validation Loss')
|
||||
plt.title('Model Loss')
|
||||
plt.xlabel('Epoch')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend()
|
||||
|
||||
# Plot accuracy
|
||||
plt.subplot(1, 2, 2)
|
||||
|
||||
if 'accuracy' in self.history.history:
|
||||
plt.plot(self.history.history['accuracy'], label='Training Accuracy')
|
||||
if 'val_accuracy' in self.history.history:
|
||||
plt.plot(self.history.history['val_accuracy'], label='Validation Accuracy')
|
||||
plt.title('Model Accuracy')
|
||||
plt.ylabel('Accuracy')
|
||||
elif 'mae' in self.history.history:
|
||||
plt.plot(self.history.history['mae'], label='Training MAE')
|
||||
if 'val_mae' in self.history.history:
|
||||
plt.plot(self.history.history['val_mae'], label='Validation MAE')
|
||||
plt.title('Model MAE')
|
||||
plt.ylabel('MAE')
|
||||
|
||||
plt.xlabel('Epoch')
|
||||
plt.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save figure
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
fig_path = os.path.join(self.model_dir, f"transformer_training_history_{timestamp}.png")
|
||||
plt.savefig(fig_path)
|
||||
plt.close()
|
||||
|
||||
logger.info(f"Training history plot saved to {fig_path}")
|
||||
return fig_path
|
||||
|
||||
|
||||
class MixtureOfExpertsModel:
|
||||
"""
|
||||
Mixture of Experts (MoE) model.
|
||||
|
||||
This model combines predictions from multiple expert models (such as CNN and Transformer)
|
||||
using a weighted ensemble approach.
|
||||
"""
|
||||
|
||||
def __init__(self, output_size=1, model_dir="NN/models/saved"):
|
||||
"""
|
||||
Initialize the MoE model.
|
||||
|
||||
Args:
|
||||
output_size (int): Number of output classes (1 for binary, 3 for buy/hold/sell)
|
||||
model_dir (str): Directory to save trained models
|
||||
"""
|
||||
self.output_size = output_size
|
||||
self.model_dir = model_dir
|
||||
self.model = None
|
||||
self.history = None
|
||||
self.experts = {}
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
os.makedirs(self.model_dir, exist_ok=True)
|
||||
|
||||
logger.info(f"Initialized Mixture of Experts model with output size {output_size}")
|
||||
|
||||
def add_expert(self, name, model):
|
||||
"""
|
||||
Add an expert model to the MoE.
|
||||
|
||||
Args:
|
||||
name (str): Name of the expert model
|
||||
model: The expert model instance
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
self.experts[name] = model
|
||||
logger.info(f"Added expert model '{name}' to MoE")
|
||||
|
||||
def build_model(self, ts_input_shape=(20, 5), expert_weights=None, learning_rate=0.001):
|
||||
"""
|
||||
Build the MoE model by combining expert models.
|
||||
|
||||
Args:
|
||||
ts_input_shape (tuple): Shape of time series input data
|
||||
expert_weights (dict): Weights for each expert model
|
||||
learning_rate (float): Learning rate for Adam optimizer
|
||||
|
||||
Returns:
|
||||
The compiled model
|
||||
"""
|
||||
# Time series input
|
||||
ts_inputs = Input(shape=ts_input_shape, name="ts_input")
|
||||
|
||||
# Additional feature input (from CNN)
|
||||
feature_inputs = Input(shape=(64,), name="feature_input") # Default size for features
|
||||
|
||||
# Process with each expert model
|
||||
expert_outputs = []
|
||||
expert_names = []
|
||||
|
||||
for name, expert in self.experts.items():
|
||||
# Skip if expert model is not valid or doesn't have a call/predict method
|
||||
if expert is None:
|
||||
logger.warning(f"Expert model '{name}' is None, skipping")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Different handling based on model type
|
||||
if name == 'cnn':
|
||||
# CNN model takes only time series input
|
||||
expert_output = expert(ts_inputs)
|
||||
expert_outputs.append(expert_output)
|
||||
expert_names.append(name)
|
||||
elif name == 'transformer':
|
||||
# Transformer model takes both time series and feature inputs
|
||||
expert_output = expert([ts_inputs, feature_inputs])
|
||||
expert_outputs.append(expert_output)
|
||||
expert_names.append(name)
|
||||
else:
|
||||
logger.warning(f"Unknown expert model type: {name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding expert '{name}': {str(e)}")
|
||||
|
||||
if not expert_outputs:
|
||||
logger.error("No valid expert models found")
|
||||
return None
|
||||
|
||||
# Use expert weighting
|
||||
if expert_weights is None:
|
||||
# Equal weighting
|
||||
weights = [1.0 / len(expert_outputs)] * len(expert_outputs)
|
||||
else:
|
||||
# User-provided weights
|
||||
weights = [expert_weights.get(name, 1.0 / len(expert_outputs)) for name in expert_names]
|
||||
# Normalize weights
|
||||
weights = [w / sum(weights) for w in weights]
|
||||
|
||||
# Combine expert outputs using weighted average
|
||||
if len(expert_outputs) == 1:
|
||||
# Only one expert, use its output directly
|
||||
combined_output = expert_outputs[0]
|
||||
else:
|
||||
# Multiple experts, compute weighted average
|
||||
weighted_outputs = [output * weight for output, weight in zip(expert_outputs, weights)]
|
||||
combined_output = Add()(weighted_outputs)
|
||||
|
||||
# Create the MoE model
|
||||
moe_model = Model(inputs=[ts_inputs, feature_inputs], outputs=combined_output)
|
||||
|
||||
# Compile the model
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
moe_model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy']
|
||||
)
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification for BUY/HOLD/SELL
|
||||
moe_model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss='categorical_crossentropy',
|
||||
metrics=['accuracy']
|
||||
)
|
||||
else:
|
||||
# Regression
|
||||
moe_model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss='mse',
|
||||
metrics=['mae']
|
||||
)
|
||||
|
||||
self.model = moe_model
|
||||
|
||||
# Log model summary
|
||||
self.model.summary(print_fn=lambda x: logger.info(x))
|
||||
|
||||
logger.info(f"Built MoE model with weights: {weights}")
|
||||
return self.model
|
||||
|
||||
def train(self, X_ts, X_features, y, batch_size=32, epochs=100, validation_split=0.2,
|
||||
callbacks=None, class_weights=None):
|
||||
"""
|
||||
Train the MoE model on the provided data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
y (numpy.ndarray): Target labels
|
||||
batch_size (int): Batch size
|
||||
epochs (int): Number of epochs
|
||||
validation_split (float): Fraction of data to use for validation
|
||||
callbacks (list): List of Keras callbacks
|
||||
class_weights (dict): Class weights for imbalanced datasets
|
||||
|
||||
Returns:
|
||||
History object containing training metrics
|
||||
"""
|
||||
if self.model is None:
|
||||
logger.error("MoE model has not been built yet")
|
||||
return None
|
||||
|
||||
# Default callbacks if none provided
|
||||
if callbacks is None:
|
||||
# Create a timestamp for model checkpoints
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
callbacks = [
|
||||
EarlyStopping(
|
||||
monitor='val_loss',
|
||||
patience=10,
|
||||
restore_best_weights=True
|
||||
),
|
||||
ReduceLROnPlateau(
|
||||
monitor='val_loss',
|
||||
factor=0.5,
|
||||
patience=5,
|
||||
min_lr=1e-6
|
||||
),
|
||||
ModelCheckpoint(
|
||||
filepath=os.path.join(self.model_dir, f"moe_model_{timestamp}.h5"),
|
||||
monitor='val_loss',
|
||||
save_best_only=True
|
||||
)
|
||||
]
|
||||
|
||||
# Check if y needs to be one-hot encoded for multi-class
|
||||
if self.output_size == 3 and len(y.shape) == 1:
|
||||
y = tf.keras.utils.to_categorical(y, num_classes=3)
|
||||
|
||||
# Train the model
|
||||
logger.info(f"Training MoE model with {len(X_ts)} samples, batch size {batch_size}, epochs {epochs}")
|
||||
self.history = self.model.fit(
|
||||
[X_ts, X_features], y,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
validation_split=validation_split,
|
||||
callbacks=callbacks,
|
||||
class_weight=class_weights,
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Save the trained model
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
model_path = os.path.join(self.model_dir, f"moe_model_final_{timestamp}.h5")
|
||||
self.model.save(model_path)
|
||||
logger.info(f"Model saved to {model_path}")
|
||||
|
||||
# Save training history
|
||||
history_path = os.path.join(self.model_dir, f"moe_model_history_{timestamp}.json")
|
||||
with open(history_path, 'w') as f:
|
||||
# Convert numpy values to Python native types for JSON serialization
|
||||
history_dict = {key: [float(value) for value in values] for key, values in self.history.history.items()}
|
||||
json.dump(history_dict, f, indent=2)
|
||||
|
||||
return self.history
|
||||
|
||||
def predict(self, X_ts, X_features=None):
|
||||
"""
|
||||
Make predictions on new data.
|
||||
|
||||
Args:
|
||||
X_ts (numpy.ndarray): Time series input features
|
||||
X_features (numpy.ndarray): Additional input features
|
||||
|
||||
Returns:
|
||||
tuple: (y_pred, y_proba) where:
|
||||
y_pred is the predicted class (0/1 for binary, 0/1/2 for multi-class)
|
||||
y_proba is the class probability
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built or trained yet")
|
||||
|
||||
# Ensure X_ts has the right shape
|
||||
if len(X_ts.shape) == 2:
|
||||
# Single sample, add batch dimension
|
||||
X_ts = np.expand_dims(X_ts, axis=0)
|
||||
|
||||
# Ensure X_features has the right shape
|
||||
if X_features is None:
|
||||
# Create dummy features with zeros
|
||||
X_features = np.zeros((X_ts.shape[0], 64)) # Default size
|
||||
elif len(X_features.shape) == 1:
|
||||
# Single sample, add batch dimension
|
||||
X_features = np.expand_dims(X_features, axis=0)
|
||||
|
||||
# Get predictions
|
||||
y_proba = self.model.predict([X_ts, X_features])
|
||||
|
||||
# Process based on output type
|
||||
if self.output_size == 1:
|
||||
# Binary classification
|
||||
y_pred = (y_proba > 0.5).astype(int).flatten()
|
||||
return y_pred, y_proba.flatten()
|
||||
elif self.output_size == 3:
|
||||
# Multi-class classification
|
||||
y_pred = np.argmax(y_proba, axis=1)
|
||||
return y_pred, y_proba
|
||||
else:
|
||||
# Regression
|
||||
return y_proba, y_proba
|
||||
|
||||
def save(self, filepath=None):
|
||||
"""
|
||||
Save the model to disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to save the model
|
||||
|
||||
Returns:
|
||||
str: Path where the model was saved
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model has not been built yet")
|
||||
|
||||
if filepath is None:
|
||||
# Create a default filepath with timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filepath = os.path.join(self.model_dir, f"moe_model_{timestamp}.h5")
|
||||
|
||||
self.model.save(filepath)
|
||||
logger.info(f"Model saved to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load(self, filepath):
|
||||
"""
|
||||
Load a saved model from disk.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the saved model
|
||||
|
||||
Returns:
|
||||
The loaded model
|
||||
"""
|
||||
# Register custom layers
|
||||
custom_objects = {
|
||||
'TransformerBlock': TransformerBlock,
|
||||
'PositionalEncoding': PositionalEncoding
|
||||
}
|
||||
|
||||
self.model = load_model(filepath, custom_objects=custom_objects)
|
||||
logger.info(f"Model loaded from {filepath}")
|
||||
return self.model
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# This would be a complete implementation in a real system
|
||||
print("Transformer and MoE models defined, but not implemented here.")
|
||||
Reference in New Issue
Block a user