gogo2/config.yaml

# Enhanced Multi-Modal Trading System Configuration

# System Settings
system:
  timezone: "Europe/Sofia"     # Configurable timezone for all timestamps
  log_level: "INFO"            # DEBUG, INFO, WARNING, ERROR
  session_timeout: 3600        # Session timeout in seconds

# Cold Start Mode Configuration
cold_start:
  enabled: true                # Enable cold start mode logic
  inference_interval: 0.5      # Inference interval (seconds) during cold start
  training_interval: 2         # Training interval (seconds) during cold start
  heavy_adjustments: true      # Allow more aggressive parameter/training adjustments
  log_cold_start: true         # Log when in cold start mode

# Exchange Configuration
exchanges:
  primary: "bybit"  # Primary exchange: mexc, deribit, binance, bybit

  # Deribit Configuration
  deribit:
    enabled: true
    test_mode: true  # Use testnet for testing
    trading_mode: "live"  # simulation, testnet, live
    supported_symbols: ["BTC-PERPETUAL", "ETH-PERPETUAL"]
    base_position_percent: 5.0
    max_position_percent: 20.0
    leverage: 10.0  # Lower leverage for safer testing
    trading_fees:
      maker_fee: 0.0000    # 0.00% maker fee
      taker_fee: 0.0005    # 0.05% taker fee
      default_fee: 0.0005

  # MEXC Configuration (secondary/backup)
  mexc:
    enabled: false  # Disabled as secondary
    test_mode: true
    trading_mode: "simulation"
    supported_symbols: ["ETH/USDT"]  # MEXC-specific symbol format
    base_position_percent: 5.0
    max_position_percent: 20.0
    leverage: 50.0
    trading_fees:
      maker_fee: 0.0002
      taker_fee: 0.0006
      default_fee: 0.0006

  # Bybit Configuration
  bybit:
    enabled: true
    test_mode: false  # Use mainnet (your credentials are for live trading)
    trading_mode: "live"  # simulation, testnet, live - SWITCHED TO SIMULATION FOR TRAINING
    supported_symbols: ["BTCUSDT", "ETHUSDT"]  # Bybit perpetual format
    base_position_percent: 5.0
    max_position_percent: 20.0
    leverage: 10.0  # Conservative leverage for safety
    leverage_applied_by_exchange: true  # Broker already applies leverage to P&L
    trading_fees:
      maker_fee: 0.0001    # 0.01% maker fee
      taker_fee: 0.0006    # 0.06% taker fee
      default_fee: 0.0006

# Trading Symbols Configuration
# Primary trading pair: ETH/USDT (main signals generation)
# Reference pair: BTC/USDT (correlation analysis only, no trading signals)
symbols:
  - "ETH/USDT"  # MAIN TRADING PAIR - Generate signals and execute trades
  - "BTC/USDT"  # REFERENCE ONLY - For correlation analysis, no direct trading

# Timeframes for ultra-fast scalping (500x leverage)
timeframes:
  - "1s"  # Primary scalping timeframe
  - "1m"  # Short-term confirmation
  - "1h"  # Medium-term trend
  - "1d"  # Long-term direction

# Data Provider Settings
data:
  provider: "binance"
  cache_enabled: true
  cache_dir: "cache"
  historical_limit: 1000
  real_time_enabled: true
  websocket_reconnect: true
  feature_engineering:
    technical_indicators: true
    market_regime_detection: true
    volatility_analysis: true

# Enhanced CNN Configuration
cnn:
  window_size: 20
  features: ["open", "high", "low", "close", "volume"]
  timeframes: ["1m", "5m", "15m", "1h", "4h", "1d"]
  hidden_layers: [64, 128, 256]
  dropout: 0.2
  learning_rate: 0.001
  batch_size: 32
  epochs: 100
  confidence_threshold: 0.6
  early_stopping_patience: 10
  model_dir: "models/enhanced_cnn"  # Ultra-fast scalping weights (500x leverage)
  timeframe_importance:
    "1s": 0.60   # Primary scalping signal
    "1m": 0.20   # Short-term confirmation
    "1h": 0.15   # Medium-term trend
    "1d": 0.05   # Long-term direction (minimal)

# Enhanced RL Agent Configuration
rl:
  state_size: 100  # Will be calculated dynamically based on features
  action_space: 3  # BUY, HOLD, SELL
  hidden_size: 256
  epsilon: 1.0
  epsilon_decay: 0.995
  epsilon_min: 0.01
  learning_rate: 0.0001
  gamma: 0.99
  memory_size: 10000
  batch_size: 64
  target_update_freq: 1000
  buffer_size: 10000
  model_dir: "models/enhanced_rl"
  # Market regime adaptation
  market_regime_weights:
    trending: 1.2    # Higher confidence in trending markets
    ranging: 0.8     # Lower confidence in ranging markets
    volatile: 0.6    # Much lower confidence in volatile markets
  # Prioritized experience replay
  replay_alpha: 0.6  # Priority exponent
  replay_beta: 0.4   # Importance sampling exponent

# Enhanced Orchestrator Settings
orchestrator:
  # Model weights for decision combination
  cnn_weight: 0.7      # Weight for CNN predictions
  rl_weight: 0.3       # Weight for RL decisions
  confidence_threshold: 0.45
  confidence_threshold_close: 0.35
  decision_frequency: 30

  # Multi-symbol coordination
  symbol_correlation_matrix:
    "ETH/USDT-BTC/USDT": 0.85  # ETH-BTC correlation

  # Perfect move marking
  perfect_move_threshold: 0.02  # 2% price change to mark as significant
  perfect_move_buffer_size: 10000

  # RL evaluation settings
  evaluation_delay: 3600  # Evaluate actions after 1 hour
  reward_calculation:
    success_multiplier: 10    # Reward for correct predictions
    failure_penalty: 5        # Penalty for wrong predictions
    confidence_scaling: true   # Scale rewards by confidence

  # Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
  entry_aggressiveness: 0.5
  # Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
  exit_aggressiveness: 0.5

  # Decision Fusion Configuration
  decision_fusion:
    enabled: true                    # Use neural network decision fusion instead of programmatic
    mode: "programmatic"                   # "neural" or "programmatic"
    input_size: 128                  # Size of input features for decision fusion network
    hidden_size: 256                 # Hidden layer size
    history_length: 20               # Number of recent decisions to include
    training_interval: 10            # Train decision fusion every 10 decisions in programmatic mode
    learning_rate: 0.001             # Learning rate for decision fusion network
    batch_size: 32                   # Training batch size
    min_samples_for_training: 20     # Lower threshold for faster training in programmatic mode

# Training Configuration
training:
  learning_rate: 0.001
  batch_size: 32
  epochs: 100
  validation_split: 0.2
  early_stopping_patience: 10

  # CNN specific training
  cnn_training_interval: 3600    # Train CNN every hour (was 6 hours)
  min_perfect_moves: 50          # Reduced from 200 for faster learning

  # RL specific training
  rl_training_interval: 300      # Train RL every 5 minutes (was 1 hour)
  min_experiences: 50            # Reduced from 100 for faster learning
  training_steps_per_cycle: 20   # Increased from 10 for more learning

  model_type: "optimized_short_term"
  use_realtime: true
  use_ticks: true
  checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
  save_best_model: true
  save_final_model: false  # We only want to keep the best performing model

  # Continuous learning settings
  continuous_learning: true
  learning_from_trades: true
  pattern_recognition: true
  retrospective_learning: true

# Universal Trading Configuration (applies to all exchanges)
trading:
  enabled: true

  # Position sizing as percentage of account balance
  base_position_percent: 5.0     # 5% base position of account
  max_position_percent: 20.0     # 20% max position of account
  min_position_percent: 2.0      # 2% min position of account
  simulation_account_usd: 100.0  # $100 simulation account balance

  # Risk management
  max_daily_loss_usd: 200.0
  max_concurrent_positions: 3
  min_trade_interval_seconds: 5  # Minimum time between trades
  consecutive_loss_reduction_factor: 0.8 # Reduce position size by 20% after each consecutive loss

  # Order configuration (can be overridden by exchange-specific settings)
  order_type: market  # market or limit

# Memory Management
memory:
  total_limit_gb: 28.0       # Total system memory limit
  model_limit_gb: 4.0       # Per-model memory limit
  cleanup_interval: 1800    # Memory cleanup every 30 minutes

# Enhanced Training System Configuration
enhanced_training:
  enabled: true                    # Enable enhanced real-time training
  auto_start: true                 # Automatically start training when orchestrator starts
  training_intervals:
    cob_rl_training_interval: 1    # Train COB RL every 1 second (HIGHEST PRIORITY)
    dqn_training_interval: 5       # Train DQN every 5 seconds
    cnn_training_interval: 10      # Train CNN every 10 seconds
    validation_interval: 60        # Validate every minute
  batch_size: 64                   # Training batch size
  memory_size: 10000              # Experience buffer size
  min_training_samples: 100       # Minimum samples before training starts
  adaptation_threshold: 0.1        # Performance threshold for adaptation
  forward_looking_predictions: true # Enable forward-looking prediction validation

  # COB RL Priority Settings (since order book imbalance predicts price moves)
  cob_rl_priority: true            # Enable COB RL as highest priority model
  cob_rl_batch_size: 16           # Smaller batches for faster COB updates
  cob_rl_min_samples: 5           # Lower threshold for COB training

# Real-time RL COB Trader Configuration
realtime_rl:
  # Model parameters for 400M parameter network (faster startup)
  model:
    input_size: 2000          # COB feature dimensions
    hidden_size: 2048         # Optimized hidden layer size for 400M params
    num_layers: 8             # Efficient transformer layers for faster training
    learning_rate: 0.0001     # Higher learning rate for faster convergence
    weight_decay: 0.00001     # Balanced L2 regularization

  # Inference configuration
  inference_interval_ms: 200  # Inference every 200ms
  min_confidence_threshold: 0.7  # Minimum confidence for signal accumulation
  required_confident_predictions: 3  # Need 3 confident predictions for trade

  # Training configuration
  training_interval_s: 1.0    # Train every second
  batch_size: 32              # Training batch size
  replay_buffer_size: 1000    # Store last 1000 predictions for training

  # Signal accumulation
  signal_buffer_size: 10      # Buffer size for signal accumulation
  consensus_threshold: 3      # Need 3 signals in same direction

  # Model checkpointing
  model_checkpoint_dir: "models/realtime_rl_cob"
  save_interval_s: 300        # Save models every 5 minutes

  # COB integration
  symbols: ["BTC/USDT", "ETH/USDT"]  # Symbols to trade
  cob_feature_normalization: "robust"  # Feature normalization method

  # Reward engineering for RL
  reward_structure:
    correct_direction_base: 1.0     # Base reward for correct prediction
    confidence_scaling: true        # Scale reward by confidence
    magnitude_bonus: 0.5           # Bonus for predicting magnitude accurately
    overconfidence_penalty: 1.5    # Penalty multiplier for wrong high-confidence predictions
    trade_execution_multiplier: 10.0  # Higher weight for actual trade outcomes

  # Performance monitoring
  statistics_interval_s: 60   # Print stats every minute
  detailed_logging: true      # Enable detailed performance logging

# Web Dashboard
web:
  host: "127.0.0.1"
  port: 8050
  debug: false
  update_interval: 500  # Milliseconds
  chart_history: 200     # Number of candles to show

  # Enhanced dashboard features
  show_timeframe_analysis: true
  show_confidence_scores: true
  show_perfect_moves: true
  show_rl_metrics: true

# Logging
logging:
  level: "INFO"
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  file: "logs/enhanced_trading.log"
  max_size: 10485760  # 10MB
  backup_count: 5

  # Component-specific logging
  orchestrator_level: "INFO"
  cnn_level: "INFO"
  rl_level: "INFO"
  training_level: "INFO"

# Model Directories
model_dir: "models"
data_dir: "data"
cache_dir: "cache"
logs_dir: "logs"

# GPU/Performance
gpu:
  enabled: true
  memory_fraction: 0.8  # Use 80% of GPU memory
  allow_growth: true    # Allow dynamic memory allocation

# Monitoring and Alerting
monitoring:
  tensorboard_enabled: true
  tensorboard_log_dir: "logs/tensorboard"
  metrics_interval: 300     # Log metrics every 5 minutes
  performance_alerts: true

  # Performance thresholds
  min_confidence_threshold: 0.3
  max_memory_usage: 0.9     # 90% of available memory
  max_decision_latency: 10  # 10 seconds max per decision

# Backtesting (for future implementation)
backtesting:
  start_date: "2024-01-01"
  end_date: "2024-12-31"
  initial_balance: 10000
  commission: 0.0002
  slippage: 0.0001

model_paths:
  realtime_model: "NN/models/saved/optimized_short_term_model_realtime_best.pt"
  ticks_model: "NN/models/saved/optimized_short_term_model_ticks_best.pt"
  backup_model: "NN/models/saved/realtime_ticks_checkpoints/checkpoint_epoch_50449_backup/model.pt"