# Model Configurations
# This file contains all model-specific configurations to keep the main config.yaml clean

# Enhanced CNN Configuration ( does not use yml file now)
# cnn:
#   window_size: 20
#   features: ["open", "high", "low", "close", "volume"]
#   timeframes: ["1s", "1m", "1h", "1d"]
#   hidden_layers: [64, 128, 256]
#   dropout: 0.2
#   learning_rate: 0.001
#   batch_size: 32
#   epochs: 100
#   confidence_threshold: 0.6
#   early_stopping_patience: 10
#   model_dir: "models/enhanced_cnn"  # Ultra-fast scalping weights (500x leverage)
#   timeframe_importance:
#     "1s": 0.60   # Primary scalping signal
#     "1m": 0.20   # Short-term confirmation
#     "1h": 0.15   # Medium-term trend
#     "1d": 0.05   # Long-term direction (minimal)

# Enhanced RL Agent Configuration
rl:
  state_size: 100  # Will be calculated dynamically based on features
  action_space: 3  # BUY, HOLD, SELL
  hidden_size: 256
  epsilon: 1.0
  epsilon_decay: 0.995
  epsilon_min: 0.01
  learning_rate: 0.0001
  gamma: 0.99
  memory_size: 10000
  batch_size: 64
  target_update_freq: 1000
  buffer_size: 10000
  model_dir: "models/enhanced_rl"
  
  # DQN Network Architecture Configuration
  network_architecture:
    # Feature extractor layers (reduced by half from original)
    feature_layers: [4096, 3072, 2048, 1536, 1024]  # Reduced from [8192, 6144, 4096, 3072, 2048]
    # Market regime detection head
    regime_head: [512, 256]  # Reduced from [1024, 512]
    # Price direction prediction head
    price_direction_head: [512, 256]  # Reduced from [1024, 512]
    # Volatility prediction head
    volatility_head: [512, 128]  # Reduced from [1024, 256]
    # Main Q-value head (dueling architecture)
    value_head: [512, 256]  # Reduced from [1024, 512]
    advantage_head: [512, 256]  # Reduced from [1024, 512]
    # Dropout rate
    dropout_rate: 0.1
    # Layer normalization
    use_layer_norm: true
  
  # Market regime adaptation
  market_regime_weights:
    trending: 1.2    # Higher confidence in trending markets
    ranging: 0.8     # Lower confidence in ranging markets
    volatile: 0.6    # Much lower confidence in volatile markets
  # Prioritized experience replay
  replay_alpha: 0.6  # Priority exponent
  replay_beta: 0.4   # Importance sampling exponent

# Real-time RL COB Trader Configuration
realtime_rl:
  # Model parameters for 400M parameter network (faster startup)
  model:
    input_size: 2000          # COB feature dimensions
    hidden_size: 2048         # Optimized hidden layer size for 400M params
    num_layers: 8             # Efficient transformer layers for faster training
    learning_rate: 0.0001     # Higher learning rate for faster convergence
    weight_decay: 0.00001     # Balanced L2 regularization
    
  # Inference configuration  
  inference_interval_ms: 200  # Inference every 200ms
  min_confidence_threshold: 0.7  # Minimum confidence for signal accumulation
  required_confident_predictions: 3  # Need 3 confident predictions for trade
  
  # Training configuration
  training_interval_s: 1.0    # Train every second
  batch_size: 32              # Training batch size
  replay_buffer_size: 1000    # Store last 1000 predictions for training
  
  # Signal accumulation
  signal_buffer_size: 10      # Buffer size for signal accumulation
  consensus_threshold: 3      # Need 3 signals in same direction
  
  # Model checkpointing
  model_checkpoint_dir: "models/realtime_rl_cob"
  save_interval_s: 300        # Save models every 5 minutes
  
  # COB integration
  symbols: ["BTC/USDT", "ETH/USDT"]  # Symbols to trade
  cob_feature_normalization: "robust"  # Feature normalization method
  
  # Reward engineering for RL
  reward_structure:
    correct_direction_base: 1.0     # Base reward for correct prediction
    confidence_scaling: true        # Scale reward by confidence
    magnitude_bonus: 0.5           # Bonus for predicting magnitude accurately
    overconfidence_penalty: 1.5    # Penalty multiplier for wrong high-confidence predictions
    trade_execution_multiplier: 10.0  # Higher weight for actual trade outcomes
    
  # Performance monitoring
  statistics_interval_s: 60   # Print stats every minute
  detailed_logging: true      # Enable detailed performance logging

# Enhanced Orchestrator Settings
orchestrator:
  # Model weights for decision combination
  cnn_weight: 0.7      # Weight for CNN predictions
  rl_weight: 0.3       # Weight for RL decisions
  confidence_threshold: 0.45
  confidence_threshold_close: 0.35
  decision_frequency: 30
  
  # Multi-symbol coordination
  symbol_correlation_matrix:
    "ETH/USDT-BTC/USDT": 0.85  # ETH-BTC correlation
    
  # Perfect move marking
  perfect_move_threshold: 0.02  # 2% price change to mark as significant
  perfect_move_buffer_size: 10000
  
  # RL evaluation settings
  evaluation_delay: 3600  # Evaluate actions after 1 hour
  reward_calculation:
    success_multiplier: 10    # Reward for correct predictions
    failure_penalty: 5        # Penalty for wrong predictions
    confidence_scaling: true   # Scale rewards by confidence

  # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
  entry_aggressiveness: 0.5
  # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
  exit_aggressiveness: 0.5
  
  # Decision Fusion Configuration
  decision_fusion:
    enabled: true                    # Use neural network decision fusion instead of programmatic
    mode: "neural"                   # "neural" or "programmatic"
    input_size: 128                  # Size of input features for decision fusion network
    hidden_size: 256                 # Hidden layer size
    history_length: 20               # Number of recent decisions to include
    training_interval: 10            # Train decision fusion every 10 decisions in programmatic mode
    learning_rate: 0.001             # Learning rate for decision fusion network
    batch_size: 32                   # Training batch size
    min_samples_for_training: 20     # Lower threshold for faster training in programmatic mode

# Training Configuration
training:
  learning_rate: 0.001
  batch_size: 32
  epochs: 100
  validation_split: 0.2
  early_stopping_patience: 10
  
  # CNN specific training
  cnn_training_interval: 3600    # Train CNN every hour (was 6 hours)
  min_perfect_moves: 50          # Reduced from 200 for faster learning
  
  # RL specific training  
  rl_training_interval: 300      # Train RL every 5 minutes (was 1 hour)
  min_experiences: 50            # Reduced from 100 for faster learning
  training_steps_per_cycle: 20   # Increased from 10 for more learning

  model_type: "optimized_short_term"
  use_realtime: true
  use_ticks: true
  checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
  save_best_model: true
  save_final_model: false  # We only want to keep the best performing model
  
  # Continuous learning settings
  continuous_learning: true
  adaptive_learning_rate: true
  performance_threshold: 0.6

# Enhanced Training System Configuration
enhanced_training:
  enabled: true                    # Enable enhanced real-time training
  auto_start: true                 # Automatically start training when orchestrator starts
  training_intervals:
    cob_rl_training_interval: 1    # Train COB RL every 1 second (HIGHEST PRIORITY)
    dqn_training_interval: 5       # Train DQN every 5 seconds
    cnn_training_interval: 10      # Train CNN every 10 seconds
    validation_interval: 60        # Validate every minute
  batch_size: 64                   # Training batch size
  memory_size: 10000              # Experience buffer size
  min_training_samples: 100       # Minimum samples before training starts
  adaptation_threshold: 0.1        # Performance threshold for adaptation
  forward_looking_predictions: true # Enable forward-looking prediction validation
  
  # COB RL Priority Settings (since order book imbalance predicts price moves)
  cob_rl_priority: true            # Enable COB RL as highest priority model
  cob_rl_batch_size: 16           # Smaller batches for faster COB updates
  cob_rl_min_samples: 5           # Lower threshold for COB training