models overhaul

This commit is contained in:
Dobromir Popov
2025-07-29 19:22:04 +03:00
parent 0b5fa07498
commit b1ae557843
7 changed files with 465 additions and 430 deletions

View File

@ -1,201 +1,201 @@
""" # """
Legacy CNN Model Compatibility Layer # Legacy CNN Model Compatibility Layer
This module provides compatibility redirects to the unified StandardizedCNN model. # This module provides compatibility redirects to the unified StandardizedCNN model.
All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired # All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired
in favor of the StandardizedCNN architecture. # in favor of the StandardizedCNN architecture.
""" # """
import logging # import logging
import warnings # import warnings
from typing import Tuple, Dict, Any, Optional # from typing import Tuple, Dict, Any, Optional
import torch # import torch
import numpy as np # import numpy as np
# Import the standardized CNN model # # Import the standardized CNN model
from .standardized_cnn import StandardizedCNN # from .standardized_cnn import StandardizedCNN
logger = logging.getLogger(__name__) # logger = logging.getLogger(__name__)
# Compatibility aliases and wrappers # # Compatibility aliases and wrappers
class EnhancedCNNModel: # class EnhancedCNNModel:
"""Legacy compatibility wrapper - redirects to StandardizedCNN""" # """Legacy compatibility wrapper - redirects to StandardizedCNN"""
def __init__(self, *args, **kwargs): # def __init__(self, *args, **kwargs):
warnings.warn( # warnings.warn(
"EnhancedCNNModel is deprecated. Use StandardizedCNN instead.", # "EnhancedCNNModel is deprecated. Use StandardizedCNN instead.",
DeprecationWarning, # DeprecationWarning,
stacklevel=2 # stacklevel=2
) # )
# Create StandardizedCNN with default parameters # # Create StandardizedCNN with default parameters
self.standardized_cnn = StandardizedCNN() # self.standardized_cnn = StandardizedCNN()
logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN") # logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN")
def __getattr__(self, name): # def __getattr__(self, name):
"""Delegate all method calls to StandardizedCNN""" # """Delegate all method calls to StandardizedCNN"""
return getattr(self.standardized_cnn, name) # return getattr(self.standardized_cnn, name)
class CNNModelTrainer: # class CNNModelTrainer:
"""Legacy compatibility wrapper for CNN training""" # """Legacy compatibility wrapper for CNN training"""
def __init__(self, model=None, *args, **kwargs): # def __init__(self, model=None, *args, **kwargs):
warnings.warn( # warnings.warn(
"CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.", # "CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.",
DeprecationWarning, # DeprecationWarning,
stacklevel=2 # stacklevel=2
) # )
if isinstance(model, EnhancedCNNModel): # if isinstance(model, EnhancedCNNModel):
self.model = model.standardized_cnn # self.model = model.standardized_cnn
else: # else:
self.model = StandardizedCNN() # self.model = StandardizedCNN()
logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()") # logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()")
def train_step(self, x, y, *args, **kwargs): # def train_step(self, x, y, *args, **kwargs):
"""Legacy train step wrapper""" # """Legacy train step wrapper"""
try: # try:
# Convert to BaseDataInput format if needed # # Convert to BaseDataInput format if needed
if hasattr(x, 'get_feature_vector'): # if hasattr(x, 'get_feature_vector'):
# Already BaseDataInput # # Already BaseDataInput
base_input = x # base_input = x
else: # else:
# Create mock BaseDataInput for legacy compatibility # # Create mock BaseDataInput for legacy compatibility
from core.data_models import BaseDataInput # from core.data_models import BaseDataInput
base_input = BaseDataInput() # base_input = BaseDataInput()
# Set mock feature vector # # Set mock feature vector
if isinstance(x, torch.Tensor): # if isinstance(x, torch.Tensor):
feature_vector = x.flatten().cpu().numpy() # feature_vector = x.flatten().cpu().numpy()
else: # else:
feature_vector = np.array(x).flatten() # feature_vector = np.array(x).flatten()
# Pad or truncate to expected size # # Pad or truncate to expected size
expected_size = self.model.expected_feature_dim # expected_size = self.model.expected_feature_dim
if len(feature_vector) < expected_size: # if len(feature_vector) < expected_size:
padding = np.zeros(expected_size - len(feature_vector)) # padding = np.zeros(expected_size - len(feature_vector))
feature_vector = np.concatenate([feature_vector, padding]) # feature_vector = np.concatenate([feature_vector, padding])
else: # else:
feature_vector = feature_vector[:expected_size] # feature_vector = feature_vector[:expected_size]
base_input._feature_vector = feature_vector # base_input._feature_vector = feature_vector
# Convert target to string format # # Convert target to string format
if isinstance(y, torch.Tensor): # if isinstance(y, torch.Tensor):
y_val = y.item() if y.numel() == 1 else y.argmax().item() # y_val = y.item() if y.numel() == 1 else y.argmax().item()
else: # else:
y_val = int(y) if np.isscalar(y) else int(np.argmax(y)) # y_val = int(y) if np.isscalar(y) else int(np.argmax(y))
target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'} # target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'}
target = target_map.get(y_val, 'HOLD') # target = target_map.get(y_val, 'HOLD')
# Use StandardizedCNN training # # Use StandardizedCNN training
optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001) # optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
loss = self.model.train_step([base_input], [target], optimizer) # loss = self.model.train_step([base_input], [target], optimizer)
return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5} # return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5}
except Exception as e: # except Exception as e:
logger.error(f"Legacy train_step error: {e}") # logger.error(f"Legacy train_step error: {e}")
return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5} # return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5}
class CNNModel: # # class CNNModel:
"""Legacy compatibility wrapper for CNN model interface""" # # """Legacy compatibility wrapper for CNN model interface"""
def __init__(self, input_shape=(900, 50), output_size=3, model_path=None): # # def __init__(self, input_shape=(900, 50), output_size=3, model_path=None):
warnings.warn( # # warnings.warn(
"CNNModel is deprecated. Use StandardizedCNN directly.", # # "CNNModel is deprecated. Use StandardizedCNN directly.",
DeprecationWarning, # # DeprecationWarning,
stacklevel=2 # # stacklevel=2
) # # )
self.input_shape = input_shape # # self.input_shape = input_shape
self.output_size = output_size # # self.output_size = output_size
self.standardized_cnn = StandardizedCNN() # # self.standardized_cnn = StandardizedCNN()
self.trainer = CNNModelTrainer(self.standardized_cnn) # # self.trainer = CNNModelTrainer(self.standardized_cnn)
logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN") # # logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN")
def build_model(self, **kwargs): # # def build_model(self, **kwargs):
"""Legacy build method - no-op for StandardizedCNN""" # # """Legacy build method - no-op for StandardizedCNN"""
return self # # return self
def predict(self, X): # # def predict(self, X):
"""Legacy predict method""" # # """Legacy predict method"""
try: # # try:
# Convert input to BaseDataInput # # # Convert input to BaseDataInput
from core.data_models import BaseDataInput # # from core.data_models import BaseDataInput
base_input = BaseDataInput() # # base_input = BaseDataInput()
if isinstance(X, np.ndarray): # # if isinstance(X, np.ndarray):
feature_vector = X.flatten() # # feature_vector = X.flatten()
else: # # else:
feature_vector = np.array(X).flatten() # # feature_vector = np.array(X).flatten()
# Pad or truncate to expected size # # # Pad or truncate to expected size
expected_size = self.standardized_cnn.expected_feature_dim # # expected_size = self.standardized_cnn.expected_feature_dim
if len(feature_vector) < expected_size: # # if len(feature_vector) < expected_size:
padding = np.zeros(expected_size - len(feature_vector)) # # padding = np.zeros(expected_size - len(feature_vector))
feature_vector = np.concatenate([feature_vector, padding]) # # feature_vector = np.concatenate([feature_vector, padding])
else: # # else:
feature_vector = feature_vector[:expected_size] # # feature_vector = feature_vector[:expected_size]
base_input._feature_vector = feature_vector # # base_input._feature_vector = feature_vector
# Get prediction from StandardizedCNN # # # Get prediction from StandardizedCNN
result = self.standardized_cnn.predict_from_base_input(base_input) # # result = self.standardized_cnn.predict_from_base_input(base_input)
# Convert to legacy format # # # Convert to legacy format
action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2} # # action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
pred_class = np.array([action_map.get(result.predictions['action'], 2)]) # # pred_class = np.array([action_map.get(result.predictions['action'], 2)])
pred_proba = np.array([result.predictions['action_probabilities']]) # # pred_proba = np.array([result.predictions['action_probabilities']])
return pred_class, pred_proba # # return pred_class, pred_proba
except Exception as e: # # except Exception as e:
logger.error(f"Legacy predict error: {e}") # # logger.error(f"Legacy predict error: {e}")
# Return safe defaults # # # Return safe defaults
pred_class = np.array([2]) # HOLD # # pred_class = np.array([2]) # HOLD
pred_proba = np.array([[0.33, 0.33, 0.34]]) # # pred_proba = np.array([[0.33, 0.33, 0.34]])
return pred_class, pred_proba # # return pred_class, pred_proba
def fit(self, X, y, **kwargs): # # def fit(self, X, y, **kwargs):
"""Legacy fit method""" # # """Legacy fit method"""
try: # # try:
return self.trainer.train_step(X, y) # # return self.trainer.train_step(X, y)
except Exception as e: # # except Exception as e:
logger.error(f"Legacy fit error: {e}") # # logger.error(f"Legacy fit error: {e}")
return self # # return self
def save(self, filepath: str): # # def save(self, filepath: str):
"""Legacy save method""" # # """Legacy save method"""
try: # # try:
torch.save(self.standardized_cnn.state_dict(), filepath) # # torch.save(self.standardized_cnn.state_dict(), filepath)
logger.info(f"StandardizedCNN saved to {filepath}") # # logger.info(f"StandardizedCNN saved to {filepath}")
except Exception as e: # # except Exception as e:
logger.error(f"Error saving model: {e}") # # logger.error(f"Error saving model: {e}")
def create_enhanced_cnn_model(input_size: int = 60, # def create_enhanced_cnn_model(input_size: int = 60,
feature_dim: int = 50, # feature_dim: int = 50,
output_size: int = 3, # output_size: int = 3,
base_channels: int = 256, # base_channels: int = 256,
device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]: # device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]:
"""Legacy compatibility function - returns StandardizedCNN""" # """Legacy compatibility function - returns StandardizedCNN"""
warnings.warn( # warnings.warn(
"create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.", # "create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.",
DeprecationWarning, # DeprecationWarning,
stacklevel=2 # stacklevel=2
) # )
model = StandardizedCNN() # model = StandardizedCNN()
trainer = CNNModelTrainer(model) # trainer = CNNModelTrainer(model)
logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly") # logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly")
return model, trainer # return model, trainer
# Export compatibility symbols # # Export compatibility symbols
__all__ = [ # __all__ = [
'EnhancedCNNModel', # 'EnhancedCNNModel',
'CNNModelTrainer', # 'CNNModelTrainer',
'CNNModel', # # 'CNNModel',
'create_enhanced_cnn_model' # 'create_enhanced_cnn_model'
] # ]

View File

@ -23,11 +23,11 @@ logger = logging.getLogger(__name__)
class DQNNetwork(nn.Module): class DQNNetwork(nn.Module):
""" """
Massive Deep Q-Network specifically designed for RL trading with unified BaseDataInput features Configurable Deep Q-Network specifically designed for RL trading with unified BaseDataInput features
Handles 7850 input features from multi-timeframe, multi-asset data Handles 7850 input features from multi-timeframe, multi-asset data
TARGET: 50M parameters for enhanced learning capacity Architecture is configurable via config.yaml
""" """
def __init__(self, input_dim: int, n_actions: int): def __init__(self, input_dim: int, n_actions: int, config: dict = None):
super(DQNNetwork, self).__init__() super(DQNNetwork, self).__init__()
# Handle different input dimension formats # Handle different input dimension formats
@ -41,59 +41,65 @@ class DQNNetwork(nn.Module):
self.n_actions = n_actions self.n_actions = n_actions
# MASSIVE network architecture optimized for trading features # Get network architecture from config or use defaults
# Target: ~50M parameters if config and 'network_architecture' in config:
self.feature_extractor = nn.Sequential( arch_config = config['network_architecture']
# Initial feature extraction with massive width feature_layers = arch_config.get('feature_layers', [4096, 3072, 2048, 1536, 1024])
nn.Linear(self.input_size, 8192), # 7850 -> 8192 = ~64M weights regime_head = arch_config.get('regime_head', [512, 256])
nn.LayerNorm(8192), price_direction_head = arch_config.get('price_direction_head', [512, 256])
nn.ReLU(inplace=True), volatility_head = arch_config.get('volatility_head', [512, 128])
nn.Dropout(0.1), value_head = arch_config.get('value_head', [512, 256])
advantage_head = arch_config.get('advantage_head', [512, 256])
dropout_rate = arch_config.get('dropout_rate', 0.1)
use_layer_norm = arch_config.get('use_layer_norm', True)
else:
# Default reduced architecture (half the original size)
feature_layers = [4096, 3072, 2048, 1536, 1024]
regime_head = [512, 256]
price_direction_head = [512, 256]
volatility_head = [512, 128]
value_head = [512, 256]
advantage_head = [512, 256]
dropout_rate = 0.1
use_layer_norm = True
# Deep feature processing layers # Build configurable feature extractor
nn.Linear(8192, 6144), # 8192 -> 6144 = ~50M weights feature_layers_list = []
nn.LayerNorm(6144), prev_size = self.input_size
nn.ReLU(inplace=True),
nn.Dropout(0.1),
nn.Linear(6144, 4096), # 6144 -> 4096 = ~25M weights for layer_size in feature_layers:
nn.LayerNorm(4096), feature_layers_list.append(nn.Linear(prev_size, layer_size))
nn.ReLU(inplace=True), if use_layer_norm:
nn.Dropout(0.1), feature_layers_list.append(nn.LayerNorm(layer_size))
feature_layers_list.append(nn.ReLU(inplace=True))
feature_layers_list.append(nn.Dropout(dropout_rate))
prev_size = layer_size
nn.Linear(4096, 3072), # 4096 -> 3072 = ~12M weights self.feature_extractor = nn.Sequential(*feature_layers_list)
nn.LayerNorm(3072), self.feature_size = feature_layers[-1] # Final feature size
nn.ReLU(inplace=True),
nn.Dropout(0.1),
nn.Linear(3072, 2048), # 3072 -> 2048 = ~6M weights # Build configurable network heads
nn.LayerNorm(2048), def build_head_layers(input_size, layer_sizes, output_size):
nn.ReLU(inplace=True), layers = []
nn.Dropout(0.1), prev_size = input_size
) for layer_size in layer_sizes:
layers.append(nn.Linear(prev_size, layer_size))
if use_layer_norm:
layers.append(nn.LayerNorm(layer_size))
layers.append(nn.ReLU(inplace=True))
layers.append(nn.Dropout(dropout_rate))
prev_size = layer_size
layers.append(nn.Linear(prev_size, output_size))
return nn.Sequential(*layers)
# Market regime detection head # Market regime detection head
self.regime_head = nn.Sequential( self.regime_head = build_head_layers(
nn.Linear(2048, 1024), self.feature_size, regime_head, 4 # trending, ranging, volatile, mixed
nn.LayerNorm(1024),
nn.ReLU(inplace=True),
nn.Dropout(0.1),
nn.Linear(1024, 512),
nn.LayerNorm(512),
nn.ReLU(inplace=True),
nn.Linear(512, 4) # trending, ranging, volatile, mixed
) )
# Price direction prediction head - outputs direction and confidence # Price direction prediction head - outputs direction and confidence
self.price_direction_head = nn.Sequential( self.price_direction_head = build_head_layers(
nn.Linear(2048, 1024), self.feature_size, price_direction_head, 2 # [direction, confidence]
nn.LayerNorm(1024),
nn.ReLU(inplace=True),
nn.Dropout(0.1),
nn.Linear(1024, 512),
nn.LayerNorm(512),
nn.ReLU(inplace=True),
nn.Linear(512, 2) # [direction, confidence]
) )
# Direction activation (tanh for -1 to 1) # Direction activation (tanh for -1 to 1)
@ -102,38 +108,18 @@ class DQNNetwork(nn.Module):
self.confidence_activation = nn.Sigmoid() self.confidence_activation = nn.Sigmoid()
# Volatility prediction head # Volatility prediction head
self.volatility_head = nn.Sequential( self.volatility_head = build_head_layers(
nn.Linear(2048, 1024), self.feature_size, volatility_head, 4 # predicted volatility for 4 timeframes
nn.LayerNorm(1024),
nn.ReLU(inplace=True),
nn.Dropout(0.1),
nn.Linear(1024, 256),
nn.LayerNorm(256),
nn.ReLU(inplace=True),
nn.Linear(256, 4) # predicted volatility for 4 timeframes
) )
# Main Q-value head (dueling architecture) # Main Q-value head (dueling architecture)
self.value_head = nn.Sequential( self.value_head = build_head_layers(
nn.Linear(2048, 1024), self.feature_size, value_head, 1 # Single value for dueling architecture
nn.LayerNorm(1024),
nn.ReLU(inplace=True),
nn.Dropout(0.1),
nn.Linear(1024, 512),
nn.LayerNorm(512),
nn.ReLU(inplace=True),
nn.Linear(512, 1) # State value
) )
self.advantage_head = nn.Sequential( # Advantage head (dueling architecture)
nn.Linear(2048, 1024), self.advantage_head = build_head_layers(
nn.LayerNorm(1024), self.feature_size, advantage_head, n_actions # Action advantages
nn.ReLU(inplace=True),
nn.Dropout(0.1),
nn.Linear(1024, 512),
nn.LayerNorm(512),
nn.ReLU(inplace=True),
nn.Linear(512, n_actions) # Action advantages
) )
# Initialize weights # Initialize weights
@ -248,7 +234,8 @@ class DQNAgent:
priority_memory: bool = True, priority_memory: bool = True,
device=None, device=None,
model_name: str = "dqn_agent", model_name: str = "dqn_agent",
enable_checkpoints: bool = True): enable_checkpoints: bool = True,
config: dict = None):
# Checkpoint management # Checkpoint management
self.model_name = model_name self.model_name = model_name
@ -292,8 +279,8 @@ class DQNAgent:
logger.info(f"DQN Agent using device: {self.device}") logger.info(f"DQN Agent using device: {self.device}")
# Initialize models with RL-specific network architecture # Initialize models with RL-specific network architecture
self.policy_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device) self.policy_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
self.target_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device) self.target_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
# Ensure models are on the correct device # Ensure models are on the correct device
self.policy_net = self.policy_net.to(self.device) self.policy_net = self.policy_net.to(self.device)

View File

@ -88,119 +88,14 @@ data:
market_regime_detection: true market_regime_detection: true
volatility_analysis: true volatility_analysis: true
# Enhanced CNN Configuration # Model configurations have been moved to models.yml for better organization
cnn: # See models.yml for all model-specific settings including:
window_size: 20 # - CNN configuration
features: ["open", "high", "low", "close", "volume"] # - RL/DQN configuration
timeframes: ["1m", "5m", "15m", "1h", "4h", "1d"] # - Orchestrator settings
hidden_layers: [64, 128, 256] # - Training configuration
dropout: 0.2 # - Enhanced training system
learning_rate: 0.001 # - Real-time RL COB trader
batch_size: 32
epochs: 100
confidence_threshold: 0.6
early_stopping_patience: 10
model_dir: "models/enhanced_cnn" # Ultra-fast scalping weights (500x leverage)
timeframe_importance:
"1s": 0.60 # Primary scalping signal
"1m": 0.20 # Short-term confirmation
"1h": 0.15 # Medium-term trend
"1d": 0.05 # Long-term direction (minimal)
# Enhanced RL Agent Configuration
rl:
state_size: 100 # Will be calculated dynamically based on features
action_space: 3 # BUY, HOLD, SELL
hidden_size: 256
epsilon: 1.0
epsilon_decay: 0.995
epsilon_min: 0.01
learning_rate: 0.0001
gamma: 0.99
memory_size: 10000
batch_size: 64
target_update_freq: 1000
buffer_size: 10000
model_dir: "models/enhanced_rl"
# Market regime adaptation
market_regime_weights:
trending: 1.2 # Higher confidence in trending markets
ranging: 0.8 # Lower confidence in ranging markets
volatile: 0.6 # Much lower confidence in volatile markets
# Prioritized experience replay
replay_alpha: 0.6 # Priority exponent
replay_beta: 0.4 # Importance sampling exponent
# Enhanced Orchestrator Settings
orchestrator:
# Model weights for decision combination
cnn_weight: 0.7 # Weight for CNN predictions
rl_weight: 0.3 # Weight for RL decisions
confidence_threshold: 0.45
confidence_threshold_close: 0.35
decision_frequency: 30
# Multi-symbol coordination
symbol_correlation_matrix:
"ETH/USDT-BTC/USDT": 0.85 # ETH-BTC correlation
# Perfect move marking
perfect_move_threshold: 0.02 # 2% price change to mark as significant
perfect_move_buffer_size: 10000
# RL evaluation settings
evaluation_delay: 3600 # Evaluate actions after 1 hour
reward_calculation:
success_multiplier: 10 # Reward for correct predictions
failure_penalty: 5 # Penalty for wrong predictions
confidence_scaling: true # Scale rewards by confidence
# Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
entry_aggressiveness: 0.5
# Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
exit_aggressiveness: 0.5
# Decision Fusion Configuration
decision_fusion:
enabled: true # Use neural network decision fusion instead of programmatic
mode: "neural" # "neural" or "programmatic"
input_size: 128 # Size of input features for decision fusion network
hidden_size: 256 # Hidden layer size
history_length: 20 # Number of recent decisions to include
training_interval: 10 # Train decision fusion every 10 decisions in programmatic mode
learning_rate: 0.001 # Learning rate for decision fusion network
batch_size: 32 # Training batch size
min_samples_for_training: 20 # Lower threshold for faster training in programmatic mode
# Training Configuration
training:
learning_rate: 0.001
batch_size: 32
epochs: 100
validation_split: 0.2
early_stopping_patience: 10
# CNN specific training
cnn_training_interval: 3600 # Train CNN every hour (was 6 hours)
min_perfect_moves: 50 # Reduced from 200 for faster learning
# RL specific training
rl_training_interval: 300 # Train RL every 5 minutes (was 1 hour)
min_experiences: 50 # Reduced from 100 for faster learning
training_steps_per_cycle: 20 # Increased from 10 for more learning
model_type: "optimized_short_term"
use_realtime: true
use_ticks: true
checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
save_best_model: true
save_final_model: false # We only want to keep the best performing model
# Continuous learning settings
continuous_learning: true
learning_from_trades: true
pattern_recognition: true
retrospective_learning: true
# Universal Trading Configuration (applies to all exchanges) # Universal Trading Configuration (applies to all exchanges)
trading: trading:
@ -227,69 +122,7 @@ memory:
model_limit_gb: 4.0 # Per-model memory limit model_limit_gb: 4.0 # Per-model memory limit
cleanup_interval: 1800 # Memory cleanup every 30 minutes cleanup_interval: 1800 # Memory cleanup every 30 minutes
# Enhanced Training System Configuration # Enhanced training and real-time RL configurations moved to models.yml
enhanced_training:
enabled: true # Enable enhanced real-time training
auto_start: true # Automatically start training when orchestrator starts
training_intervals:
cob_rl_training_interval: 1 # Train COB RL every 1 second (HIGHEST PRIORITY)
dqn_training_interval: 5 # Train DQN every 5 seconds
cnn_training_interval: 10 # Train CNN every 10 seconds
validation_interval: 60 # Validate every minute
batch_size: 64 # Training batch size
memory_size: 10000 # Experience buffer size
min_training_samples: 100 # Minimum samples before training starts
adaptation_threshold: 0.1 # Performance threshold for adaptation
forward_looking_predictions: true # Enable forward-looking prediction validation
# COB RL Priority Settings (since order book imbalance predicts price moves)
cob_rl_priority: true # Enable COB RL as highest priority model
cob_rl_batch_size: 16 # Smaller batches for faster COB updates
cob_rl_min_samples: 5 # Lower threshold for COB training
# Real-time RL COB Trader Configuration
realtime_rl:
# Model parameters for 400M parameter network (faster startup)
model:
input_size: 2000 # COB feature dimensions
hidden_size: 2048 # Optimized hidden layer size for 400M params
num_layers: 8 # Efficient transformer layers for faster training
learning_rate: 0.0001 # Higher learning rate for faster convergence
weight_decay: 0.00001 # Balanced L2 regularization
# Inference configuration
inference_interval_ms: 200 # Inference every 200ms
min_confidence_threshold: 0.7 # Minimum confidence for signal accumulation
required_confident_predictions: 3 # Need 3 confident predictions for trade
# Training configuration
training_interval_s: 1.0 # Train every second
batch_size: 32 # Training batch size
replay_buffer_size: 1000 # Store last 1000 predictions for training
# Signal accumulation
signal_buffer_size: 10 # Buffer size for signal accumulation
consensus_threshold: 3 # Need 3 signals in same direction
# Model checkpointing
model_checkpoint_dir: "models/realtime_rl_cob"
save_interval_s: 300 # Save models every 5 minutes
# COB integration
symbols: ["BTC/USDT", "ETH/USDT"] # Symbols to trade
cob_feature_normalization: "robust" # Feature normalization method
# Reward engineering for RL
reward_structure:
correct_direction_base: 1.0 # Base reward for correct prediction
confidence_scaling: true # Scale reward by confidence
magnitude_bonus: 0.5 # Bonus for predicting magnitude accurately
overconfidence_penalty: 1.5 # Penalty multiplier for wrong high-confidence predictions
trade_execution_multiplier: 10.0 # Higher weight for actual trade outcomes
# Performance monitoring
statistics_interval_s: 60 # Print stats every minute
detailed_logging: true # Enable detailed performance logging
# Web Dashboard # Web Dashboard
web: web:

View File

@ -24,16 +24,31 @@ class Config:
self._setup_directories() self._setup_directories()
def _load_config(self) -> Dict[str, Any]: def _load_config(self) -> Dict[str, Any]:
"""Load configuration from YAML file""" """Load configuration from YAML files (config.yaml + models.yml)"""
try: try:
# Load main config
if not self.config_path.exists(): if not self.config_path.exists():
logger.warning(f"Config file {self.config_path} not found, using defaults") logger.warning(f"Config file {self.config_path} not found, using defaults")
return self._get_default_config() config = self._get_default_config()
else:
with open(self.config_path, 'r') as f:
config = yaml.safe_load(f)
logger.info(f"Loaded main configuration from {self.config_path}")
with open(self.config_path, 'r') as f: # Load models config
config = yaml.safe_load(f) models_config_path = Path("models.yml")
if models_config_path.exists():
try:
with open(models_config_path, 'r') as f:
models_config = yaml.safe_load(f)
# Merge models config into main config
config.update(models_config)
logger.info(f"Loaded models configuration from {models_config_path}")
except Exception as e:
logger.warning(f"Error loading models.yml: {e}, using main config only")
else:
logger.info("models.yml not found, using main config only")
logger.info(f"Loaded configuration from {self.config_path}")
return config return config
except Exception as e: except Exception as e:

View File

@ -605,7 +605,9 @@ class TradingOrchestrator:
action_size = self.config.rl.get("action_space", 3) action_size = self.config.rl.get("action_space", 3)
self.rl_agent = DQNAgent( self.rl_agent = DQNAgent(
state_shape=actual_state_size, n_actions=action_size state_shape=actual_state_size,
n_actions=action_size,
config=self.config.rl
) )
self.rl_agent.to(self.device) # Move DQN agent to the determined device self.rl_agent.to(self.device) # Move DQN agent to the determined device

View File

@ -14,7 +14,7 @@
}, },
"decision_fusion": { "decision_fusion": {
"inference_enabled": false, "inference_enabled": false,
"training_enabled": true "training_enabled": false
}, },
"transformer": { "transformer": {
"inference_enabled": false, "inference_enabled": false,
@ -25,5 +25,5 @@
"training_enabled": true "training_enabled": true
} }
}, },
"timestamp": "2025-07-29T18:37:29.759605" "timestamp": "2025-07-29T19:17:32.971226"
} }

198
models.yml Normal file
View File

@ -0,0 +1,198 @@
# Model Configurations
# This file contains all model-specific configurations to keep the main config.yaml clean
# Enhanced CNN Configuration ( does not use yml file now)
# cnn:
# window_size: 20
# features: ["open", "high", "low", "close", "volume"]
# timeframes: ["1s", "1m", "1h", "1d"]
# hidden_layers: [64, 128, 256]
# dropout: 0.2
# learning_rate: 0.001
# batch_size: 32
# epochs: 100
# confidence_threshold: 0.6
# early_stopping_patience: 10
# model_dir: "models/enhanced_cnn" # Ultra-fast scalping weights (500x leverage)
# timeframe_importance:
# "1s": 0.60 # Primary scalping signal
# "1m": 0.20 # Short-term confirmation
# "1h": 0.15 # Medium-term trend
# "1d": 0.05 # Long-term direction (minimal)
# Enhanced RL Agent Configuration
rl:
state_size: 100 # Will be calculated dynamically based on features
action_space: 3 # BUY, HOLD, SELL
hidden_size: 256
epsilon: 1.0
epsilon_decay: 0.995
epsilon_min: 0.01
learning_rate: 0.0001
gamma: 0.99
memory_size: 10000
batch_size: 64
target_update_freq: 1000
buffer_size: 10000
model_dir: "models/enhanced_rl"
# DQN Network Architecture Configuration
network_architecture:
# Feature extractor layers (reduced by half from original)
feature_layers: [4096, 3072, 2048, 1536, 1024] # Reduced from [8192, 6144, 4096, 3072, 2048]
# Market regime detection head
regime_head: [512, 256] # Reduced from [1024, 512]
# Price direction prediction head
price_direction_head: [512, 256] # Reduced from [1024, 512]
# Volatility prediction head
volatility_head: [512, 128] # Reduced from [1024, 256]
# Main Q-value head (dueling architecture)
value_head: [512, 256] # Reduced from [1024, 512]
advantage_head: [512, 256] # Reduced from [1024, 512]
# Dropout rate
dropout_rate: 0.1
# Layer normalization
use_layer_norm: true
# Market regime adaptation
market_regime_weights:
trending: 1.2 # Higher confidence in trending markets
ranging: 0.8 # Lower confidence in ranging markets
volatile: 0.6 # Much lower confidence in volatile markets
# Prioritized experience replay
replay_alpha: 0.6 # Priority exponent
replay_beta: 0.4 # Importance sampling exponent
# Real-time RL COB Trader Configuration
realtime_rl:
# Model parameters for 400M parameter network (faster startup)
model:
input_size: 2000 # COB feature dimensions
hidden_size: 2048 # Optimized hidden layer size for 400M params
num_layers: 8 # Efficient transformer layers for faster training
learning_rate: 0.0001 # Higher learning rate for faster convergence
weight_decay: 0.00001 # Balanced L2 regularization
# Inference configuration
inference_interval_ms: 200 # Inference every 200ms
min_confidence_threshold: 0.7 # Minimum confidence for signal accumulation
required_confident_predictions: 3 # Need 3 confident predictions for trade
# Training configuration
training_interval_s: 1.0 # Train every second
batch_size: 32 # Training batch size
replay_buffer_size: 1000 # Store last 1000 predictions for training
# Signal accumulation
signal_buffer_size: 10 # Buffer size for signal accumulation
consensus_threshold: 3 # Need 3 signals in same direction
# Model checkpointing
model_checkpoint_dir: "models/realtime_rl_cob"
save_interval_s: 300 # Save models every 5 minutes
# COB integration
symbols: ["BTC/USDT", "ETH/USDT"] # Symbols to trade
cob_feature_normalization: "robust" # Feature normalization method
# Reward engineering for RL
reward_structure:
correct_direction_base: 1.0 # Base reward for correct prediction
confidence_scaling: true # Scale reward by confidence
magnitude_bonus: 0.5 # Bonus for predicting magnitude accurately
overconfidence_penalty: 1.5 # Penalty multiplier for wrong high-confidence predictions
trade_execution_multiplier: 10.0 # Higher weight for actual trade outcomes
# Performance monitoring
statistics_interval_s: 60 # Print stats every minute
detailed_logging: true # Enable detailed performance logging
# Enhanced Orchestrator Settings
orchestrator:
# Model weights for decision combination
cnn_weight: 0.7 # Weight for CNN predictions
rl_weight: 0.3 # Weight for RL decisions
confidence_threshold: 0.45
confidence_threshold_close: 0.35
decision_frequency: 30
# Multi-symbol coordination
symbol_correlation_matrix:
"ETH/USDT-BTC/USDT": 0.85 # ETH-BTC correlation
# Perfect move marking
perfect_move_threshold: 0.02 # 2% price change to mark as significant
perfect_move_buffer_size: 10000
# RL evaluation settings
evaluation_delay: 3600 # Evaluate actions after 1 hour
reward_calculation:
success_multiplier: 10 # Reward for correct predictions
failure_penalty: 5 # Penalty for wrong predictions
confidence_scaling: true # Scale rewards by confidence
# Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
entry_aggressiveness: 0.5
# Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
exit_aggressiveness: 0.5
# Decision Fusion Configuration
decision_fusion:
enabled: true # Use neural network decision fusion instead of programmatic
mode: "neural" # "neural" or "programmatic"
input_size: 128 # Size of input features for decision fusion network
hidden_size: 256 # Hidden layer size
history_length: 20 # Number of recent decisions to include
training_interval: 10 # Train decision fusion every 10 decisions in programmatic mode
learning_rate: 0.001 # Learning rate for decision fusion network
batch_size: 32 # Training batch size
min_samples_for_training: 20 # Lower threshold for faster training in programmatic mode
# Training Configuration
training:
learning_rate: 0.001
batch_size: 32
epochs: 100
validation_split: 0.2
early_stopping_patience: 10
# CNN specific training
cnn_training_interval: 3600 # Train CNN every hour (was 6 hours)
min_perfect_moves: 50 # Reduced from 200 for faster learning
# RL specific training
rl_training_interval: 300 # Train RL every 5 minutes (was 1 hour)
min_experiences: 50 # Reduced from 100 for faster learning
training_steps_per_cycle: 20 # Increased from 10 for more learning
model_type: "optimized_short_term"
use_realtime: true
use_ticks: true
checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
save_best_model: true
save_final_model: false # We only want to keep the best performing model
# Continuous learning settings
continuous_learning: true
adaptive_learning_rate: true
performance_threshold: 0.6
# Enhanced Training System Configuration
enhanced_training:
enabled: true # Enable enhanced real-time training
auto_start: true # Automatically start training when orchestrator starts
training_intervals:
cob_rl_training_interval: 1 # Train COB RL every 1 second (HIGHEST PRIORITY)
dqn_training_interval: 5 # Train DQN every 5 seconds
cnn_training_interval: 10 # Train CNN every 10 seconds
validation_interval: 60 # Validate every minute
batch_size: 64 # Training batch size
memory_size: 10000 # Experience buffer size
min_training_samples: 100 # Minimum samples before training starts
adaptation_threshold: 0.1 # Performance threshold for adaptation
forward_looking_predictions: true # Enable forward-looking prediction validation
# COB RL Priority Settings (since order book imbalance predicts price moves)
cob_rl_priority: true # Enable COB RL as highest priority model
cob_rl_batch_size: 16 # Smaller batches for faster COB updates
cob_rl_min_samples: 5 # Lower threshold for COB training