diff --git a/NN/models/cnn_model.py b/NN/models/cnn_model.py index 773da62..4b34bb0 100644 --- a/NN/models/cnn_model.py +++ b/NN/models/cnn_model.py @@ -1,201 +1,201 @@ -""" -Legacy CNN Model Compatibility Layer +# """ +# Legacy CNN Model Compatibility Layer -This module provides compatibility redirects to the unified StandardizedCNN model. -All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired -in favor of the StandardizedCNN architecture. -""" +# This module provides compatibility redirects to the unified StandardizedCNN model. +# All legacy models (EnhancedCNNModel, CNNModelTrainer, CNNModel) have been retired +# in favor of the StandardizedCNN architecture. +# """ -import logging -import warnings -from typing import Tuple, Dict, Any, Optional -import torch -import numpy as np +# import logging +# import warnings +# from typing import Tuple, Dict, Any, Optional +# import torch +# import numpy as np -# Import the standardized CNN model -from .standardized_cnn import StandardizedCNN +# # Import the standardized CNN model +# from .standardized_cnn import StandardizedCNN -logger = logging.getLogger(__name__) +# logger = logging.getLogger(__name__) -# Compatibility aliases and wrappers -class EnhancedCNNModel: - """Legacy compatibility wrapper - redirects to StandardizedCNN""" +# # Compatibility aliases and wrappers +# class EnhancedCNNModel: +# """Legacy compatibility wrapper - redirects to StandardizedCNN""" - def __init__(self, *args, **kwargs): - warnings.warn( - "EnhancedCNNModel is deprecated. Use StandardizedCNN instead.", - DeprecationWarning, - stacklevel=2 - ) - # Create StandardizedCNN with default parameters - self.standardized_cnn = StandardizedCNN() - logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN") +# def __init__(self, *args, **kwargs): +# warnings.warn( +# "EnhancedCNNModel is deprecated. Use StandardizedCNN instead.", +# DeprecationWarning, +# stacklevel=2 +# ) +# # Create StandardizedCNN with default parameters +# self.standardized_cnn = StandardizedCNN() +# logger.warning("EnhancedCNNModel compatibility wrapper created - please migrate to StandardizedCNN") - def __getattr__(self, name): - """Delegate all method calls to StandardizedCNN""" - return getattr(self.standardized_cnn, name) +# def __getattr__(self, name): +# """Delegate all method calls to StandardizedCNN""" +# return getattr(self.standardized_cnn, name) -class CNNModelTrainer: - """Legacy compatibility wrapper for CNN training""" +# class CNNModelTrainer: +# """Legacy compatibility wrapper for CNN training""" - def __init__(self, model=None, *args, **kwargs): - warnings.warn( - "CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.", - DeprecationWarning, - stacklevel=2 - ) - if isinstance(model, EnhancedCNNModel): - self.model = model.standardized_cnn - else: - self.model = StandardizedCNN() - logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()") +# def __init__(self, model=None, *args, **kwargs): +# warnings.warn( +# "CNNModelTrainer is deprecated. Use StandardizedCNN.train_step() instead.", +# DeprecationWarning, +# stacklevel=2 +# ) +# if isinstance(model, EnhancedCNNModel): +# self.model = model.standardized_cnn +# else: +# self.model = StandardizedCNN() +# logger.warning("CNNModelTrainer compatibility wrapper created - please use StandardizedCNN.train_step()") - def train_step(self, x, y, *args, **kwargs): - """Legacy train step wrapper""" - try: - # Convert to BaseDataInput format if needed - if hasattr(x, 'get_feature_vector'): - # Already BaseDataInput - base_input = x - else: - # Create mock BaseDataInput for legacy compatibility - from core.data_models import BaseDataInput - base_input = BaseDataInput() - # Set mock feature vector - if isinstance(x, torch.Tensor): - feature_vector = x.flatten().cpu().numpy() - else: - feature_vector = np.array(x).flatten() +# def train_step(self, x, y, *args, **kwargs): +# """Legacy train step wrapper""" +# try: +# # Convert to BaseDataInput format if needed +# if hasattr(x, 'get_feature_vector'): +# # Already BaseDataInput +# base_input = x +# else: +# # Create mock BaseDataInput for legacy compatibility +# from core.data_models import BaseDataInput +# base_input = BaseDataInput() +# # Set mock feature vector +# if isinstance(x, torch.Tensor): +# feature_vector = x.flatten().cpu().numpy() +# else: +# feature_vector = np.array(x).flatten() - # Pad or truncate to expected size - expected_size = self.model.expected_feature_dim - if len(feature_vector) < expected_size: - padding = np.zeros(expected_size - len(feature_vector)) - feature_vector = np.concatenate([feature_vector, padding]) - else: - feature_vector = feature_vector[:expected_size] +# # Pad or truncate to expected size +# expected_size = self.model.expected_feature_dim +# if len(feature_vector) < expected_size: +# padding = np.zeros(expected_size - len(feature_vector)) +# feature_vector = np.concatenate([feature_vector, padding]) +# else: +# feature_vector = feature_vector[:expected_size] - base_input._feature_vector = feature_vector +# base_input._feature_vector = feature_vector - # Convert target to string format - if isinstance(y, torch.Tensor): - y_val = y.item() if y.numel() == 1 else y.argmax().item() - else: - y_val = int(y) if np.isscalar(y) else int(np.argmax(y)) +# # Convert target to string format +# if isinstance(y, torch.Tensor): +# y_val = y.item() if y.numel() == 1 else y.argmax().item() +# else: +# y_val = int(y) if np.isscalar(y) else int(np.argmax(y)) - target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'} - target = target_map.get(y_val, 'HOLD') +# target_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'} +# target = target_map.get(y_val, 'HOLD') - # Use StandardizedCNN training - optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001) - loss = self.model.train_step([base_input], [target], optimizer) +# # Use StandardizedCNN training +# optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001) +# loss = self.model.train_step([base_input], [target], optimizer) - return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5} +# return {'total_loss': loss, 'main_loss': loss, 'accuracy': 0.5} - except Exception as e: - logger.error(f"Legacy train_step error: {e}") - return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5} +# except Exception as e: +# logger.error(f"Legacy train_step error: {e}") +# return {'total_loss': 0.0, 'main_loss': 0.0, 'accuracy': 0.5} -class CNNModel: - """Legacy compatibility wrapper for CNN model interface""" +# # class CNNModel: +# # """Legacy compatibility wrapper for CNN model interface""" - def __init__(self, input_shape=(900, 50), output_size=3, model_path=None): - warnings.warn( - "CNNModel is deprecated. Use StandardizedCNN directly.", - DeprecationWarning, - stacklevel=2 - ) - self.input_shape = input_shape - self.output_size = output_size - self.standardized_cnn = StandardizedCNN() - self.trainer = CNNModelTrainer(self.standardized_cnn) - logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN") +# # def __init__(self, input_shape=(900, 50), output_size=3, model_path=None): +# # warnings.warn( +# # "CNNModel is deprecated. Use StandardizedCNN directly.", +# # DeprecationWarning, +# # stacklevel=2 +# # ) +# # self.input_shape = input_shape +# # self.output_size = output_size +# # self.standardized_cnn = StandardizedCNN() +# # self.trainer = CNNModelTrainer(self.standardized_cnn) +# # logger.warning("CNNModel compatibility wrapper created - please migrate to StandardizedCNN") - def build_model(self, **kwargs): - """Legacy build method - no-op for StandardizedCNN""" - return self +# # def build_model(self, **kwargs): +# # """Legacy build method - no-op for StandardizedCNN""" +# # return self - def predict(self, X): - """Legacy predict method""" - try: - # Convert input to BaseDataInput - from core.data_models import BaseDataInput - base_input = BaseDataInput() +# # def predict(self, X): +# # """Legacy predict method""" +# # try: +# # # Convert input to BaseDataInput +# # from core.data_models import BaseDataInput +# # base_input = BaseDataInput() - if isinstance(X, np.ndarray): - feature_vector = X.flatten() - else: - feature_vector = np.array(X).flatten() +# # if isinstance(X, np.ndarray): +# # feature_vector = X.flatten() +# # else: +# # feature_vector = np.array(X).flatten() - # Pad or truncate to expected size - expected_size = self.standardized_cnn.expected_feature_dim - if len(feature_vector) < expected_size: - padding = np.zeros(expected_size - len(feature_vector)) - feature_vector = np.concatenate([feature_vector, padding]) - else: - feature_vector = feature_vector[:expected_size] +# # # Pad or truncate to expected size +# # expected_size = self.standardized_cnn.expected_feature_dim +# # if len(feature_vector) < expected_size: +# # padding = np.zeros(expected_size - len(feature_vector)) +# # feature_vector = np.concatenate([feature_vector, padding]) +# # else: +# # feature_vector = feature_vector[:expected_size] - base_input._feature_vector = feature_vector +# # base_input._feature_vector = feature_vector - # Get prediction from StandardizedCNN - result = self.standardized_cnn.predict_from_base_input(base_input) +# # # Get prediction from StandardizedCNN +# # result = self.standardized_cnn.predict_from_base_input(base_input) - # Convert to legacy format - action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2} - pred_class = np.array([action_map.get(result.predictions['action'], 2)]) - pred_proba = np.array([result.predictions['action_probabilities']]) +# # # Convert to legacy format +# # action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2} +# # pred_class = np.array([action_map.get(result.predictions['action'], 2)]) +# # pred_proba = np.array([result.predictions['action_probabilities']]) - return pred_class, pred_proba +# # return pred_class, pred_proba - except Exception as e: - logger.error(f"Legacy predict error: {e}") - # Return safe defaults - pred_class = np.array([2]) # HOLD - pred_proba = np.array([[0.33, 0.33, 0.34]]) - return pred_class, pred_proba +# # except Exception as e: +# # logger.error(f"Legacy predict error: {e}") +# # # Return safe defaults +# # pred_class = np.array([2]) # HOLD +# # pred_proba = np.array([[0.33, 0.33, 0.34]]) +# # return pred_class, pred_proba - def fit(self, X, y, **kwargs): - """Legacy fit method""" - try: - return self.trainer.train_step(X, y) - except Exception as e: - logger.error(f"Legacy fit error: {e}") - return self +# # def fit(self, X, y, **kwargs): +# # """Legacy fit method""" +# # try: +# # return self.trainer.train_step(X, y) +# # except Exception as e: +# # logger.error(f"Legacy fit error: {e}") +# # return self - def save(self, filepath: str): - """Legacy save method""" - try: - torch.save(self.standardized_cnn.state_dict(), filepath) - logger.info(f"StandardizedCNN saved to {filepath}") - except Exception as e: - logger.error(f"Error saving model: {e}") +# # def save(self, filepath: str): +# # """Legacy save method""" +# # try: +# # torch.save(self.standardized_cnn.state_dict(), filepath) +# # logger.info(f"StandardizedCNN saved to {filepath}") +# # except Exception as e: +# # logger.error(f"Error saving model: {e}") -def create_enhanced_cnn_model(input_size: int = 60, - feature_dim: int = 50, - output_size: int = 3, - base_channels: int = 256, - device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]: - """Legacy compatibility function - returns StandardizedCNN""" - warnings.warn( - "create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.", - DeprecationWarning, - stacklevel=2 - ) +# def create_enhanced_cnn_model(input_size: int = 60, +# feature_dim: int = 50, +# output_size: int = 3, +# base_channels: int = 256, +# device: str = 'cuda') -> Tuple[StandardizedCNN, CNNModelTrainer]: +# """Legacy compatibility function - returns StandardizedCNN""" +# warnings.warn( +# "create_enhanced_cnn_model is deprecated. Use StandardizedCNN() directly.", +# DeprecationWarning, +# stacklevel=2 +# ) - model = StandardizedCNN() - trainer = CNNModelTrainer(model) +# model = StandardizedCNN() +# trainer = CNNModelTrainer(model) - logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly") - return model, trainer +# logger.warning("Legacy create_enhanced_cnn_model called - please use StandardizedCNN directly") +# return model, trainer -# Export compatibility symbols -__all__ = [ - 'EnhancedCNNModel', - 'CNNModelTrainer', - 'CNNModel', - 'create_enhanced_cnn_model' -] +# # Export compatibility symbols +# __all__ = [ +# 'EnhancedCNNModel', +# 'CNNModelTrainer', +# # 'CNNModel', +# 'create_enhanced_cnn_model' +# ] diff --git a/NN/models/dqn_agent.py b/NN/models/dqn_agent.py index 8b019e2..815858e 100644 --- a/NN/models/dqn_agent.py +++ b/NN/models/dqn_agent.py @@ -23,11 +23,11 @@ logger = logging.getLogger(__name__) class DQNNetwork(nn.Module): """ - Massive Deep Q-Network specifically designed for RL trading with unified BaseDataInput features + Configurable Deep Q-Network specifically designed for RL trading with unified BaseDataInput features Handles 7850 input features from multi-timeframe, multi-asset data - TARGET: 50M parameters for enhanced learning capacity + Architecture is configurable via config.yaml """ - def __init__(self, input_dim: int, n_actions: int): + def __init__(self, input_dim: int, n_actions: int, config: dict = None): super(DQNNetwork, self).__init__() # Handle different input dimension formats @@ -41,59 +41,65 @@ class DQNNetwork(nn.Module): self.n_actions = n_actions - # MASSIVE network architecture optimized for trading features - # Target: ~50M parameters - self.feature_extractor = nn.Sequential( - # Initial feature extraction with massive width - nn.Linear(self.input_size, 8192), # 7850 -> 8192 = ~64M weights - nn.LayerNorm(8192), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - - # Deep feature processing layers - nn.Linear(8192, 6144), # 8192 -> 6144 = ~50M weights - nn.LayerNorm(6144), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - - nn.Linear(6144, 4096), # 6144 -> 4096 = ~25M weights - nn.LayerNorm(4096), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - - nn.Linear(4096, 3072), # 4096 -> 3072 = ~12M weights - nn.LayerNorm(3072), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - - nn.Linear(3072, 2048), # 3072 -> 2048 = ~6M weights - nn.LayerNorm(2048), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - ) + # Get network architecture from config or use defaults + if config and 'network_architecture' in config: + arch_config = config['network_architecture'] + feature_layers = arch_config.get('feature_layers', [4096, 3072, 2048, 1536, 1024]) + regime_head = arch_config.get('regime_head', [512, 256]) + price_direction_head = arch_config.get('price_direction_head', [512, 256]) + volatility_head = arch_config.get('volatility_head', [512, 128]) + value_head = arch_config.get('value_head', [512, 256]) + advantage_head = arch_config.get('advantage_head', [512, 256]) + dropout_rate = arch_config.get('dropout_rate', 0.1) + use_layer_norm = arch_config.get('use_layer_norm', True) + else: + # Default reduced architecture (half the original size) + feature_layers = [4096, 3072, 2048, 1536, 1024] + regime_head = [512, 256] + price_direction_head = [512, 256] + volatility_head = [512, 128] + value_head = [512, 256] + advantage_head = [512, 256] + dropout_rate = 0.1 + use_layer_norm = True + + # Build configurable feature extractor + feature_layers_list = [] + prev_size = self.input_size + + for layer_size in feature_layers: + feature_layers_list.append(nn.Linear(prev_size, layer_size)) + if use_layer_norm: + feature_layers_list.append(nn.LayerNorm(layer_size)) + feature_layers_list.append(nn.ReLU(inplace=True)) + feature_layers_list.append(nn.Dropout(dropout_rate)) + prev_size = layer_size + + self.feature_extractor = nn.Sequential(*feature_layers_list) + self.feature_size = feature_layers[-1] # Final feature size + + # Build configurable network heads + def build_head_layers(input_size, layer_sizes, output_size): + layers = [] + prev_size = input_size + for layer_size in layer_sizes: + layers.append(nn.Linear(prev_size, layer_size)) + if use_layer_norm: + layers.append(nn.LayerNorm(layer_size)) + layers.append(nn.ReLU(inplace=True)) + layers.append(nn.Dropout(dropout_rate)) + prev_size = layer_size + layers.append(nn.Linear(prev_size, output_size)) + return nn.Sequential(*layers) # Market regime detection head - self.regime_head = nn.Sequential( - nn.Linear(2048, 1024), - nn.LayerNorm(1024), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - nn.Linear(1024, 512), - nn.LayerNorm(512), - nn.ReLU(inplace=True), - nn.Linear(512, 4) # trending, ranging, volatile, mixed + self.regime_head = build_head_layers( + self.feature_size, regime_head, 4 # trending, ranging, volatile, mixed ) # Price direction prediction head - outputs direction and confidence - self.price_direction_head = nn.Sequential( - nn.Linear(2048, 1024), - nn.LayerNorm(1024), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - nn.Linear(1024, 512), - nn.LayerNorm(512), - nn.ReLU(inplace=True), - nn.Linear(512, 2) # [direction, confidence] + self.price_direction_head = build_head_layers( + self.feature_size, price_direction_head, 2 # [direction, confidence] ) # Direction activation (tanh for -1 to 1) @@ -102,38 +108,18 @@ class DQNNetwork(nn.Module): self.confidence_activation = nn.Sigmoid() # Volatility prediction head - self.volatility_head = nn.Sequential( - nn.Linear(2048, 1024), - nn.LayerNorm(1024), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - nn.Linear(1024, 256), - nn.LayerNorm(256), - nn.ReLU(inplace=True), - nn.Linear(256, 4) # predicted volatility for 4 timeframes + self.volatility_head = build_head_layers( + self.feature_size, volatility_head, 4 # predicted volatility for 4 timeframes ) # Main Q-value head (dueling architecture) - self.value_head = nn.Sequential( - nn.Linear(2048, 1024), - nn.LayerNorm(1024), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - nn.Linear(1024, 512), - nn.LayerNorm(512), - nn.ReLU(inplace=True), - nn.Linear(512, 1) # State value + self.value_head = build_head_layers( + self.feature_size, value_head, 1 # Single value for dueling architecture ) - self.advantage_head = nn.Sequential( - nn.Linear(2048, 1024), - nn.LayerNorm(1024), - nn.ReLU(inplace=True), - nn.Dropout(0.1), - nn.Linear(1024, 512), - nn.LayerNorm(512), - nn.ReLU(inplace=True), - nn.Linear(512, n_actions) # Action advantages + # Advantage head (dueling architecture) + self.advantage_head = build_head_layers( + self.feature_size, advantage_head, n_actions # Action advantages ) # Initialize weights @@ -248,7 +234,8 @@ class DQNAgent: priority_memory: bool = True, device=None, model_name: str = "dqn_agent", - enable_checkpoints: bool = True): + enable_checkpoints: bool = True, + config: dict = None): # Checkpoint management self.model_name = model_name @@ -292,8 +279,8 @@ class DQNAgent: logger.info(f"DQN Agent using device: {self.device}") # Initialize models with RL-specific network architecture - self.policy_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device) - self.target_net = DQNNetwork(self.state_dim, self.n_actions).to(self.device) + self.policy_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device) + self.target_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device) # Ensure models are on the correct device self.policy_net = self.policy_net.to(self.device) diff --git a/config.yaml b/config.yaml index 91697d6..3b1c61f 100644 --- a/config.yaml +++ b/config.yaml @@ -88,119 +88,14 @@ data: market_regime_detection: true volatility_analysis: true -# Enhanced CNN Configuration -cnn: - window_size: 20 - features: ["open", "high", "low", "close", "volume"] - timeframes: ["1m", "5m", "15m", "1h", "4h", "1d"] - hidden_layers: [64, 128, 256] - dropout: 0.2 - learning_rate: 0.001 - batch_size: 32 - epochs: 100 - confidence_threshold: 0.6 - early_stopping_patience: 10 - model_dir: "models/enhanced_cnn" # Ultra-fast scalping weights (500x leverage) - timeframe_importance: - "1s": 0.60 # Primary scalping signal - "1m": 0.20 # Short-term confirmation - "1h": 0.15 # Medium-term trend - "1d": 0.05 # Long-term direction (minimal) - -# Enhanced RL Agent Configuration -rl: - state_size: 100 # Will be calculated dynamically based on features - action_space: 3 # BUY, HOLD, SELL - hidden_size: 256 - epsilon: 1.0 - epsilon_decay: 0.995 - epsilon_min: 0.01 - learning_rate: 0.0001 - gamma: 0.99 - memory_size: 10000 - batch_size: 64 - target_update_freq: 1000 - buffer_size: 10000 - model_dir: "models/enhanced_rl" - # Market regime adaptation - market_regime_weights: - trending: 1.2 # Higher confidence in trending markets - ranging: 0.8 # Lower confidence in ranging markets - volatile: 0.6 # Much lower confidence in volatile markets - # Prioritized experience replay - replay_alpha: 0.6 # Priority exponent - replay_beta: 0.4 # Importance sampling exponent - -# Enhanced Orchestrator Settings -orchestrator: - # Model weights for decision combination - cnn_weight: 0.7 # Weight for CNN predictions - rl_weight: 0.3 # Weight for RL decisions - confidence_threshold: 0.45 - confidence_threshold_close: 0.35 - decision_frequency: 30 - - # Multi-symbol coordination - symbol_correlation_matrix: - "ETH/USDT-BTC/USDT": 0.85 # ETH-BTC correlation - - # Perfect move marking - perfect_move_threshold: 0.02 # 2% price change to mark as significant - perfect_move_buffer_size: 10000 - - # RL evaluation settings - evaluation_delay: 3600 # Evaluate actions after 1 hour - reward_calculation: - success_multiplier: 10 # Reward for correct predictions - failure_penalty: 5 # Penalty for wrong predictions - confidence_scaling: true # Scale rewards by confidence - - # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades) - entry_aggressiveness: 0.5 - # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits) - exit_aggressiveness: 0.5 - - # Decision Fusion Configuration - decision_fusion: - enabled: true # Use neural network decision fusion instead of programmatic - mode: "neural" # "neural" or "programmatic" - input_size: 128 # Size of input features for decision fusion network - hidden_size: 256 # Hidden layer size - history_length: 20 # Number of recent decisions to include - training_interval: 10 # Train decision fusion every 10 decisions in programmatic mode - learning_rate: 0.001 # Learning rate for decision fusion network - batch_size: 32 # Training batch size - min_samples_for_training: 20 # Lower threshold for faster training in programmatic mode - -# Training Configuration -training: - learning_rate: 0.001 - batch_size: 32 - epochs: 100 - validation_split: 0.2 - early_stopping_patience: 10 - - # CNN specific training - cnn_training_interval: 3600 # Train CNN every hour (was 6 hours) - min_perfect_moves: 50 # Reduced from 200 for faster learning - - # RL specific training - rl_training_interval: 300 # Train RL every 5 minutes (was 1 hour) - min_experiences: 50 # Reduced from 100 for faster learning - training_steps_per_cycle: 20 # Increased from 10 for more learning - - model_type: "optimized_short_term" - use_realtime: true - use_ticks: true - checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints" - save_best_model: true - save_final_model: false # We only want to keep the best performing model - - # Continuous learning settings - continuous_learning: true - learning_from_trades: true - pattern_recognition: true - retrospective_learning: true +# Model configurations have been moved to models.yml for better organization +# See models.yml for all model-specific settings including: +# - CNN configuration +# - RL/DQN configuration +# - Orchestrator settings +# - Training configuration +# - Enhanced training system +# - Real-time RL COB trader # Universal Trading Configuration (applies to all exchanges) trading: @@ -227,69 +122,7 @@ memory: model_limit_gb: 4.0 # Per-model memory limit cleanup_interval: 1800 # Memory cleanup every 30 minutes -# Enhanced Training System Configuration -enhanced_training: - enabled: true # Enable enhanced real-time training - auto_start: true # Automatically start training when orchestrator starts - training_intervals: - cob_rl_training_interval: 1 # Train COB RL every 1 second (HIGHEST PRIORITY) - dqn_training_interval: 5 # Train DQN every 5 seconds - cnn_training_interval: 10 # Train CNN every 10 seconds - validation_interval: 60 # Validate every minute - batch_size: 64 # Training batch size - memory_size: 10000 # Experience buffer size - min_training_samples: 100 # Minimum samples before training starts - adaptation_threshold: 0.1 # Performance threshold for adaptation - forward_looking_predictions: true # Enable forward-looking prediction validation - - # COB RL Priority Settings (since order book imbalance predicts price moves) - cob_rl_priority: true # Enable COB RL as highest priority model - cob_rl_batch_size: 16 # Smaller batches for faster COB updates - cob_rl_min_samples: 5 # Lower threshold for COB training - -# Real-time RL COB Trader Configuration -realtime_rl: - # Model parameters for 400M parameter network (faster startup) - model: - input_size: 2000 # COB feature dimensions - hidden_size: 2048 # Optimized hidden layer size for 400M params - num_layers: 8 # Efficient transformer layers for faster training - learning_rate: 0.0001 # Higher learning rate for faster convergence - weight_decay: 0.00001 # Balanced L2 regularization - - # Inference configuration - inference_interval_ms: 200 # Inference every 200ms - min_confidence_threshold: 0.7 # Minimum confidence for signal accumulation - required_confident_predictions: 3 # Need 3 confident predictions for trade - - # Training configuration - training_interval_s: 1.0 # Train every second - batch_size: 32 # Training batch size - replay_buffer_size: 1000 # Store last 1000 predictions for training - - # Signal accumulation - signal_buffer_size: 10 # Buffer size for signal accumulation - consensus_threshold: 3 # Need 3 signals in same direction - - # Model checkpointing - model_checkpoint_dir: "models/realtime_rl_cob" - save_interval_s: 300 # Save models every 5 minutes - - # COB integration - symbols: ["BTC/USDT", "ETH/USDT"] # Symbols to trade - cob_feature_normalization: "robust" # Feature normalization method - - # Reward engineering for RL - reward_structure: - correct_direction_base: 1.0 # Base reward for correct prediction - confidence_scaling: true # Scale reward by confidence - magnitude_bonus: 0.5 # Bonus for predicting magnitude accurately - overconfidence_penalty: 1.5 # Penalty multiplier for wrong high-confidence predictions - trade_execution_multiplier: 10.0 # Higher weight for actual trade outcomes - - # Performance monitoring - statistics_interval_s: 60 # Print stats every minute - detailed_logging: true # Enable detailed performance logging +# Enhanced training and real-time RL configurations moved to models.yml # Web Dashboard web: diff --git a/core/config.py b/core/config.py index 2158236..6a0f027 100644 --- a/core/config.py +++ b/core/config.py @@ -24,16 +24,31 @@ class Config: self._setup_directories() def _load_config(self) -> Dict[str, Any]: - """Load configuration from YAML file""" + """Load configuration from YAML files (config.yaml + models.yml)""" try: + # Load main config if not self.config_path.exists(): logger.warning(f"Config file {self.config_path} not found, using defaults") - return self._get_default_config() - - with open(self.config_path, 'r') as f: - config = yaml.safe_load(f) - - logger.info(f"Loaded configuration from {self.config_path}") + config = self._get_default_config() + else: + with open(self.config_path, 'r') as f: + config = yaml.safe_load(f) + logger.info(f"Loaded main configuration from {self.config_path}") + + # Load models config + models_config_path = Path("models.yml") + if models_config_path.exists(): + try: + with open(models_config_path, 'r') as f: + models_config = yaml.safe_load(f) + # Merge models config into main config + config.update(models_config) + logger.info(f"Loaded models configuration from {models_config_path}") + except Exception as e: + logger.warning(f"Error loading models.yml: {e}, using main config only") + else: + logger.info("models.yml not found, using main config only") + return config except Exception as e: diff --git a/core/orchestrator.py b/core/orchestrator.py index 27ff60c..d7c5be7 100644 --- a/core/orchestrator.py +++ b/core/orchestrator.py @@ -605,7 +605,9 @@ class TradingOrchestrator: action_size = self.config.rl.get("action_space", 3) self.rl_agent = DQNAgent( - state_shape=actual_state_size, n_actions=action_size + state_shape=actual_state_size, + n_actions=action_size, + config=self.config.rl ) self.rl_agent.to(self.device) # Move DQN agent to the determined device diff --git a/data/ui_state.json b/data/ui_state.json index 3a20b14..26a1419 100644 --- a/data/ui_state.json +++ b/data/ui_state.json @@ -14,7 +14,7 @@ }, "decision_fusion": { "inference_enabled": false, - "training_enabled": true + "training_enabled": false }, "transformer": { "inference_enabled": false, @@ -25,5 +25,5 @@ "training_enabled": true } }, - "timestamp": "2025-07-29T18:37:29.759605" + "timestamp": "2025-07-29T19:17:32.971226" } \ No newline at end of file diff --git a/models.yml b/models.yml new file mode 100644 index 0000000..958f10d --- /dev/null +++ b/models.yml @@ -0,0 +1,198 @@ +# Model Configurations +# This file contains all model-specific configurations to keep the main config.yaml clean + +# Enhanced CNN Configuration ( does not use yml file now) +# cnn: +# window_size: 20 +# features: ["open", "high", "low", "close", "volume"] +# timeframes: ["1s", "1m", "1h", "1d"] +# hidden_layers: [64, 128, 256] +# dropout: 0.2 +# learning_rate: 0.001 +# batch_size: 32 +# epochs: 100 +# confidence_threshold: 0.6 +# early_stopping_patience: 10 +# model_dir: "models/enhanced_cnn" # Ultra-fast scalping weights (500x leverage) +# timeframe_importance: +# "1s": 0.60 # Primary scalping signal +# "1m": 0.20 # Short-term confirmation +# "1h": 0.15 # Medium-term trend +# "1d": 0.05 # Long-term direction (minimal) + +# Enhanced RL Agent Configuration +rl: + state_size: 100 # Will be calculated dynamically based on features + action_space: 3 # BUY, HOLD, SELL + hidden_size: 256 + epsilon: 1.0 + epsilon_decay: 0.995 + epsilon_min: 0.01 + learning_rate: 0.0001 + gamma: 0.99 + memory_size: 10000 + batch_size: 64 + target_update_freq: 1000 + buffer_size: 10000 + model_dir: "models/enhanced_rl" + + # DQN Network Architecture Configuration + network_architecture: + # Feature extractor layers (reduced by half from original) + feature_layers: [4096, 3072, 2048, 1536, 1024] # Reduced from [8192, 6144, 4096, 3072, 2048] + # Market regime detection head + regime_head: [512, 256] # Reduced from [1024, 512] + # Price direction prediction head + price_direction_head: [512, 256] # Reduced from [1024, 512] + # Volatility prediction head + volatility_head: [512, 128] # Reduced from [1024, 256] + # Main Q-value head (dueling architecture) + value_head: [512, 256] # Reduced from [1024, 512] + advantage_head: [512, 256] # Reduced from [1024, 512] + # Dropout rate + dropout_rate: 0.1 + # Layer normalization + use_layer_norm: true + + # Market regime adaptation + market_regime_weights: + trending: 1.2 # Higher confidence in trending markets + ranging: 0.8 # Lower confidence in ranging markets + volatile: 0.6 # Much lower confidence in volatile markets + # Prioritized experience replay + replay_alpha: 0.6 # Priority exponent + replay_beta: 0.4 # Importance sampling exponent + +# Real-time RL COB Trader Configuration +realtime_rl: + # Model parameters for 400M parameter network (faster startup) + model: + input_size: 2000 # COB feature dimensions + hidden_size: 2048 # Optimized hidden layer size for 400M params + num_layers: 8 # Efficient transformer layers for faster training + learning_rate: 0.0001 # Higher learning rate for faster convergence + weight_decay: 0.00001 # Balanced L2 regularization + + # Inference configuration + inference_interval_ms: 200 # Inference every 200ms + min_confidence_threshold: 0.7 # Minimum confidence for signal accumulation + required_confident_predictions: 3 # Need 3 confident predictions for trade + + # Training configuration + training_interval_s: 1.0 # Train every second + batch_size: 32 # Training batch size + replay_buffer_size: 1000 # Store last 1000 predictions for training + + # Signal accumulation + signal_buffer_size: 10 # Buffer size for signal accumulation + consensus_threshold: 3 # Need 3 signals in same direction + + # Model checkpointing + model_checkpoint_dir: "models/realtime_rl_cob" + save_interval_s: 300 # Save models every 5 minutes + + # COB integration + symbols: ["BTC/USDT", "ETH/USDT"] # Symbols to trade + cob_feature_normalization: "robust" # Feature normalization method + + # Reward engineering for RL + reward_structure: + correct_direction_base: 1.0 # Base reward for correct prediction + confidence_scaling: true # Scale reward by confidence + magnitude_bonus: 0.5 # Bonus for predicting magnitude accurately + overconfidence_penalty: 1.5 # Penalty multiplier for wrong high-confidence predictions + trade_execution_multiplier: 10.0 # Higher weight for actual trade outcomes + + # Performance monitoring + statistics_interval_s: 60 # Print stats every minute + detailed_logging: true # Enable detailed performance logging + +# Enhanced Orchestrator Settings +orchestrator: + # Model weights for decision combination + cnn_weight: 0.7 # Weight for CNN predictions + rl_weight: 0.3 # Weight for RL decisions + confidence_threshold: 0.45 + confidence_threshold_close: 0.35 + decision_frequency: 30 + + # Multi-symbol coordination + symbol_correlation_matrix: + "ETH/USDT-BTC/USDT": 0.85 # ETH-BTC correlation + + # Perfect move marking + perfect_move_threshold: 0.02 # 2% price change to mark as significant + perfect_move_buffer_size: 10000 + + # RL evaluation settings + evaluation_delay: 3600 # Evaluate actions after 1 hour + reward_calculation: + success_multiplier: 10 # Reward for correct predictions + failure_penalty: 5 # Penalty for wrong predictions + confidence_scaling: true # Scale rewards by confidence + + # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades) + entry_aggressiveness: 0.5 + # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits) + exit_aggressiveness: 0.5 + + # Decision Fusion Configuration + decision_fusion: + enabled: true # Use neural network decision fusion instead of programmatic + mode: "neural" # "neural" or "programmatic" + input_size: 128 # Size of input features for decision fusion network + hidden_size: 256 # Hidden layer size + history_length: 20 # Number of recent decisions to include + training_interval: 10 # Train decision fusion every 10 decisions in programmatic mode + learning_rate: 0.001 # Learning rate for decision fusion network + batch_size: 32 # Training batch size + min_samples_for_training: 20 # Lower threshold for faster training in programmatic mode + +# Training Configuration +training: + learning_rate: 0.001 + batch_size: 32 + epochs: 100 + validation_split: 0.2 + early_stopping_patience: 10 + + # CNN specific training + cnn_training_interval: 3600 # Train CNN every hour (was 6 hours) + min_perfect_moves: 50 # Reduced from 200 for faster learning + + # RL specific training + rl_training_interval: 300 # Train RL every 5 minutes (was 1 hour) + min_experiences: 50 # Reduced from 100 for faster learning + training_steps_per_cycle: 20 # Increased from 10 for more learning + + model_type: "optimized_short_term" + use_realtime: true + use_ticks: true + checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints" + save_best_model: true + save_final_model: false # We only want to keep the best performing model + + # Continuous learning settings + continuous_learning: true + adaptive_learning_rate: true + performance_threshold: 0.6 + +# Enhanced Training System Configuration +enhanced_training: + enabled: true # Enable enhanced real-time training + auto_start: true # Automatically start training when orchestrator starts + training_intervals: + cob_rl_training_interval: 1 # Train COB RL every 1 second (HIGHEST PRIORITY) + dqn_training_interval: 5 # Train DQN every 5 seconds + cnn_training_interval: 10 # Train CNN every 10 seconds + validation_interval: 60 # Validate every minute + batch_size: 64 # Training batch size + memory_size: 10000 # Experience buffer size + min_training_samples: 100 # Minimum samples before training starts + adaptation_threshold: 0.1 # Performance threshold for adaptation + forward_looking_predictions: true # Enable forward-looking prediction validation + + # COB RL Priority Settings (since order book imbalance predicts price moves) + cob_rl_priority: true # Enable COB RL as highest priority model + cob_rl_batch_size: 16 # Smaller batches for faster COB updates + cob_rl_min_samples: 5 # Lower threshold for COB training \ No newline at end of file