models overhaul

2025-07-29 19:22:04 +03:00
parent 0b5fa07498
commit b1ae557843
7 changed files with 465 additions and 430 deletions
--- a/config.yaml
+++ b/config.yaml
@@ -88,119 +88,14 @@ data:
    market_regime_detection: true
    volatility_analysis: true

-# Enhanced CNN Configuration
-cnn:
-  window_size: 20
-  features: ["open", "high", "low", "close", "volume"]
-  timeframes: ["1m", "5m", "15m", "1h", "4h", "1d"]
-  hidden_layers: [64, 128, 256]
-  dropout: 0.2
-  learning_rate: 0.001
-  batch_size: 32
-  epochs: 100
-  confidence_threshold: 0.6
-  early_stopping_patience: 10
-  model_dir: "models/enhanced_cnn"  # Ultra-fast scalping weights (500x leverage)
-  timeframe_importance:
-    "1s": 0.60   # Primary scalping signal
-    "1m": 0.20   # Short-term confirmation
-    "1h": 0.15   # Medium-term trend
-    "1d": 0.05   # Long-term direction (minimal)
-
-# Enhanced RL Agent Configuration
-rl:
-  state_size: 100  # Will be calculated dynamically based on features
-  action_space: 3  # BUY, HOLD, SELL
-  hidden_size: 256
-  epsilon: 1.0
-  epsilon_decay: 0.995
-  epsilon_min: 0.01
-  learning_rate: 0.0001
-  gamma: 0.99
-  memory_size: 10000
-  batch_size: 64
-  target_update_freq: 1000
-  buffer_size: 10000
-  model_dir: "models/enhanced_rl"
-  # Market regime adaptation
-  market_regime_weights:
-    trending: 1.2    # Higher confidence in trending markets
-    ranging: 0.8     # Lower confidence in ranging markets
-    volatile: 0.6    # Much lower confidence in volatile markets
-  # Prioritized experience replay
-  replay_alpha: 0.6  # Priority exponent
-  replay_beta: 0.4   # Importance sampling exponent
-
-# Enhanced Orchestrator Settings
-orchestrator:
-  # Model weights for decision combination
-  cnn_weight: 0.7      # Weight for CNN predictions
-  rl_weight: 0.3       # Weight for RL decisions
-  confidence_threshold: 0.45
-  confidence_threshold_close: 0.35
-  decision_frequency: 30
-  
-  # Multi-symbol coordination
-  symbol_correlation_matrix:
-    "ETH/USDT-BTC/USDT": 0.85  # ETH-BTC correlation
-    
-  # Perfect move marking
-  perfect_move_threshold: 0.02  # 2% price change to mark as significant
-  perfect_move_buffer_size: 10000
-  
-  # RL evaluation settings
-  evaluation_delay: 3600  # Evaluate actions after 1 hour
-  reward_calculation:
-    success_multiplier: 10    # Reward for correct predictions
-    failure_penalty: 5        # Penalty for wrong predictions
-    confidence_scaling: true   # Scale rewards by confidence
-
-  # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
-  entry_aggressiveness: 0.5
-  # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
-  exit_aggressiveness: 0.5
-  
-  # Decision Fusion Configuration
-  decision_fusion:
-    enabled: true                    # Use neural network decision fusion instead of programmatic
-    mode: "neural"                   # "neural" or "programmatic"
-    input_size: 128                  # Size of input features for decision fusion network
-    hidden_size: 256                 # Hidden layer size
-    history_length: 20               # Number of recent decisions to include
-    training_interval: 10            # Train decision fusion every 10 decisions in programmatic mode
-    learning_rate: 0.001             # Learning rate for decision fusion network
-    batch_size: 32                   # Training batch size
-    min_samples_for_training: 20     # Lower threshold for faster training in programmatic mode
-
-# Training Configuration
-training:
-  learning_rate: 0.001
-  batch_size: 32
-  epochs: 100
-  validation_split: 0.2
-  early_stopping_patience: 10
-  
-  # CNN specific training
-  cnn_training_interval: 3600    # Train CNN every hour (was 6 hours)
-  min_perfect_moves: 50          # Reduced from 200 for faster learning
-  
-  # RL specific training  
-  rl_training_interval: 300      # Train RL every 5 minutes (was 1 hour)
-  min_experiences: 50            # Reduced from 100 for faster learning
-  training_steps_per_cycle: 20   # Increased from 10 for more learning
-
-  model_type: "optimized_short_term"
-  use_realtime: true
-  use_ticks: true
-  checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
-  save_best_model: true
-  save_final_model: false  # We only want to keep the best performing model
-  
-  # Continuous learning settings
-  continuous_learning: true
-  learning_from_trades: true
-  pattern_recognition: true
-  retrospective_learning: true
+# Model configurations have been moved to models.yml for better organization
+# See models.yml for all model-specific settings including:
+# - CNN configuration
+# - RL/DQN configuration  
+# - Orchestrator settings
+# - Training configuration
+# - Enhanced training system
+# - Real-time RL COB trader

 # Universal Trading Configuration (applies to all exchanges)
 trading:
@@ -227,69 +122,7 @@ memory:
  model_limit_gb: 4.0       # Per-model memory limit
  cleanup_interval: 1800    # Memory cleanup every 30 minutes
  
-# Enhanced Training System Configuration
-enhanced_training:
-  enabled: true                    # Enable enhanced real-time training
-  auto_start: true                 # Automatically start training when orchestrator starts
-  training_intervals:
-    cob_rl_training_interval: 1    # Train COB RL every 1 second (HIGHEST PRIORITY)
-    dqn_training_interval: 5       # Train DQN every 5 seconds
-    cnn_training_interval: 10      # Train CNN every 10 seconds
-    validation_interval: 60        # Validate every minute
-  batch_size: 64                   # Training batch size
-  memory_size: 10000              # Experience buffer size
-  min_training_samples: 100       # Minimum samples before training starts
-  adaptation_threshold: 0.1        # Performance threshold for adaptation
-  forward_looking_predictions: true # Enable forward-looking prediction validation
-  
-  # COB RL Priority Settings (since order book imbalance predicts price moves)
-  cob_rl_priority: true            # Enable COB RL as highest priority model
-  cob_rl_batch_size: 16           # Smaller batches for faster COB updates
-  cob_rl_min_samples: 5           # Lower threshold for COB training
-  
-# Real-time RL COB Trader Configuration
-realtime_rl:
-  # Model parameters for 400M parameter network (faster startup)
-  model:
-    input_size: 2000          # COB feature dimensions
-    hidden_size: 2048         # Optimized hidden layer size for 400M params
-    num_layers: 8             # Efficient transformer layers for faster training
-    learning_rate: 0.0001     # Higher learning rate for faster convergence
-    weight_decay: 0.00001     # Balanced L2 regularization
-    
-  # Inference configuration  
-  inference_interval_ms: 200  # Inference every 200ms
-  min_confidence_threshold: 0.7  # Minimum confidence for signal accumulation
-  required_confident_predictions: 3  # Need 3 confident predictions for trade
-  
-  # Training configuration
-  training_interval_s: 1.0    # Train every second
-  batch_size: 32              # Training batch size
-  replay_buffer_size: 1000    # Store last 1000 predictions for training
-  
-  # Signal accumulation
-  signal_buffer_size: 10      # Buffer size for signal accumulation
-  consensus_threshold: 3      # Need 3 signals in same direction
-  
-  # Model checkpointing
-  model_checkpoint_dir: "models/realtime_rl_cob"
-  save_interval_s: 300        # Save models every 5 minutes
-  
-  # COB integration
-  symbols: ["BTC/USDT", "ETH/USDT"]  # Symbols to trade
-  cob_feature_normalization: "robust"  # Feature normalization method
-  
-  # Reward engineering for RL
-  reward_structure:
-    correct_direction_base: 1.0     # Base reward for correct prediction
-    confidence_scaling: true        # Scale reward by confidence
-    magnitude_bonus: 0.5           # Bonus for predicting magnitude accurately
-    overconfidence_penalty: 1.5    # Penalty multiplier for wrong high-confidence predictions
-    trade_execution_multiplier: 10.0  # Higher weight for actual trade outcomes
-    
-  # Performance monitoring
-  statistics_interval_s: 60   # Print stats every minute
-  detailed_logging: true      # Enable detailed performance logging
+# Enhanced training and real-time RL configurations moved to models.yml

 # Web Dashboard
 web: