# Model Configurations # This file contains all model-specific configurations to keep the main config.yaml clean # Enhanced CNN Configuration ( does not use yml file now) # cnn: # window_size: 20 # features: ["open", "high", "low", "close", "volume"] # timeframes: ["1s", "1m", "1h", "1d"] # hidden_layers: [64, 128, 256] # dropout: 0.2 # learning_rate: 0.001 # batch_size: 32 # epochs: 100 # confidence_threshold: 0.6 # early_stopping_patience: 10 # model_dir: "models/enhanced_cnn" # Ultra-fast scalping weights (500x leverage) # timeframe_importance: # "1s": 0.60 # Primary scalping signal # "1m": 0.20 # Short-term confirmation # "1h": 0.15 # Medium-term trend # "1d": 0.05 # Long-term direction (minimal) # Enhanced RL Agent Configuration rl: state_size: 100 # Will be calculated dynamically based on features action_space: 3 # BUY, HOLD, SELL hidden_size: 256 epsilon: 1.0 epsilon_decay: 0.995 epsilon_min: 0.01 learning_rate: 0.0001 gamma: 0.99 memory_size: 10000 batch_size: 64 target_update_freq: 1000 buffer_size: 10000 model_dir: "models/enhanced_rl" # DQN Network Architecture Configuration network_architecture: # Feature extractor layers (reduced by half from original) feature_layers: [4096, 3072, 2048, 1536, 1024] # Reduced from [8192, 6144, 4096, 3072, 2048] # Market regime detection head regime_head: [512, 256] # Reduced from [1024, 512] # Price direction prediction head price_direction_head: [512, 256] # Reduced from [1024, 512] # Volatility prediction head volatility_head: [512, 128] # Reduced from [1024, 256] # Main Q-value head (dueling architecture) value_head: [512, 256] # Reduced from [1024, 512] advantage_head: [512, 256] # Reduced from [1024, 512] # Dropout rate dropout_rate: 0.1 # Layer normalization use_layer_norm: true # Market regime adaptation market_regime_weights: trending: 1.2 # Higher confidence in trending markets ranging: 0.8 # Lower confidence in ranging markets volatile: 0.6 # Much lower confidence in volatile markets # Prioritized experience replay replay_alpha: 0.6 # Priority exponent replay_beta: 0.4 # Importance sampling exponent # Real-time RL COB Trader Configuration realtime_rl: # Model parameters for 400M parameter network (faster startup) model: input_size: 2000 # COB feature dimensions hidden_size: 2048 # Optimized hidden layer size for 400M params num_layers: 8 # Efficient transformer layers for faster training learning_rate: 0.0001 # Higher learning rate for faster convergence weight_decay: 0.00001 # Balanced L2 regularization # Inference configuration inference_interval_ms: 200 # Inference every 200ms min_confidence_threshold: 0.7 # Minimum confidence for signal accumulation required_confident_predictions: 3 # Need 3 confident predictions for trade # Training configuration training_interval_s: 1.0 # Train every second batch_size: 32 # Training batch size replay_buffer_size: 1000 # Store last 1000 predictions for training # Signal accumulation signal_buffer_size: 10 # Buffer size for signal accumulation consensus_threshold: 3 # Need 3 signals in same direction # Model checkpointing model_checkpoint_dir: "models/realtime_rl_cob" save_interval_s: 300 # Save models every 5 minutes # COB integration symbols: ["BTC/USDT", "ETH/USDT"] # Symbols to trade cob_feature_normalization: "robust" # Feature normalization method # Reward engineering for RL reward_structure: correct_direction_base: 1.0 # Base reward for correct prediction confidence_scaling: true # Scale reward by confidence magnitude_bonus: 0.5 # Bonus for predicting magnitude accurately overconfidence_penalty: 1.5 # Penalty multiplier for wrong high-confidence predictions trade_execution_multiplier: 10.0 # Higher weight for actual trade outcomes # Performance monitoring statistics_interval_s: 60 # Print stats every minute detailed_logging: true # Enable detailed performance logging # Enhanced Orchestrator Settings orchestrator: # Model weights for decision combination cnn_weight: 0.7 # Weight for CNN predictions rl_weight: 0.3 # Weight for RL decisions confidence_threshold: 0.45 confidence_threshold_close: 0.35 decision_frequency: 30 # Multi-symbol coordination symbol_correlation_matrix: "ETH/USDT-BTC/USDT": 0.85 # ETH-BTC correlation # Perfect move marking perfect_move_threshold: 0.02 # 2% price change to mark as significant perfect_move_buffer_size: 10000 # RL evaluation settings evaluation_delay: 3600 # Evaluate actions after 1 hour reward_calculation: success_multiplier: 10 # Reward for correct predictions failure_penalty: 5 # Penalty for wrong predictions confidence_scaling: true # Scale rewards by confidence # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades) entry_aggressiveness: 0.5 # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits) exit_aggressiveness: 0.5 # Decision Fusion Configuration decision_fusion: enabled: true # Use neural network decision fusion instead of programmatic mode: "neural" # "neural" or "programmatic" input_size: 128 # Size of input features for decision fusion network hidden_size: 256 # Hidden layer size history_length: 20 # Number of recent decisions to include training_interval: 10 # Train decision fusion every 10 decisions in programmatic mode learning_rate: 0.001 # Learning rate for decision fusion network batch_size: 32 # Training batch size min_samples_for_training: 20 # Lower threshold for faster training in programmatic mode # Training Configuration training: learning_rate: 0.001 batch_size: 32 epochs: 100 validation_split: 0.2 early_stopping_patience: 10 # CNN specific training cnn_training_interval: 3600 # Train CNN every hour (was 6 hours) min_perfect_moves: 50 # Reduced from 200 for faster learning # RL specific training rl_training_interval: 300 # Train RL every 5 minutes (was 1 hour) min_experiences: 50 # Reduced from 100 for faster learning training_steps_per_cycle: 20 # Increased from 10 for more learning model_type: "optimized_short_term" use_realtime: true use_ticks: true checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints" save_best_model: true save_final_model: false # We only want to keep the best performing model # Continuous learning settings continuous_learning: true adaptive_learning_rate: true performance_threshold: 0.6 # Enhanced Training System Configuration enhanced_training: enabled: true # Enable enhanced real-time training auto_start: true # Automatically start training when orchestrator starts training_intervals: cob_rl_training_interval: 1 # Train COB RL every 1 second (HIGHEST PRIORITY) dqn_training_interval: 5 # Train DQN every 5 seconds cnn_training_interval: 10 # Train CNN every 10 seconds validation_interval: 60 # Validate every minute batch_size: 64 # Training batch size memory_size: 10000 # Experience buffer size min_training_samples: 100 # Minimum samples before training starts adaptation_threshold: 0.1 # Performance threshold for adaptation forward_looking_predictions: true # Enable forward-looking prediction validation # COB RL Priority Settings (since order book imbalance predicts price moves) cob_rl_priority: true # Enable COB RL as highest priority model cob_rl_batch_size: 16 # Smaller batches for faster COB updates cob_rl_min_samples: 5 # Lower threshold for COB training