rl cob agent

2025-06-24 15:39:22 +03:00
parent 1f3166e1e5
commit ec420c2a5f
5 changed files with 1974 additions and 0 deletions
--- a/config.yaml
+++ b/config.yaml
@@ -196,6 +196,50 @@ memory:
  model_limit_gb: 4.0       # Per-model memory limit
  cleanup_interval: 1800    # Memory cleanup every 30 minutes
  
+# Real-time RL COB Trader Configuration
+realtime_rl:
+  # Model parameters for 1B parameter network
+  model:
+    input_size: 2000          # COB feature dimensions
+    hidden_size: 4096         # Massive hidden layer size
+    num_layers: 12            # Deep transformer layers
+    learning_rate: 0.00001    # Very low for stability
+    weight_decay: 0.000001    # L2 regularization
+    
+  # Inference configuration  
+  inference_interval_ms: 200  # Inference every 200ms
+  min_confidence_threshold: 0.7  # Minimum confidence for signal accumulation
+  required_confident_predictions: 3  # Need 3 confident predictions for trade
+  
+  # Training configuration
+  training_interval_s: 1.0    # Train every second
+  batch_size: 32              # Training batch size
+  replay_buffer_size: 1000    # Store last 1000 predictions for training
+  
+  # Signal accumulation
+  signal_buffer_size: 10      # Buffer size for signal accumulation
+  consensus_threshold: 3      # Need 3 signals in same direction
+  
+  # Model checkpointing
+  model_checkpoint_dir: "models/realtime_rl_cob"
+  save_interval_s: 300        # Save models every 5 minutes
+  
+  # COB integration
+  symbols: ["BTC/USDT", "ETH/USDT"]  # Symbols to trade
+  cob_feature_normalization: "robust"  # Feature normalization method
+  
+  # Reward engineering for RL
+  reward_structure:
+    correct_direction_base: 1.0     # Base reward for correct prediction
+    confidence_scaling: true        # Scale reward by confidence
+    magnitude_bonus: 0.5           # Bonus for predicting magnitude accurately
+    overconfidence_penalty: 1.5    # Penalty multiplier for wrong high-confidence predictions
+    trade_execution_multiplier: 10.0  # Higher weight for actual trade outcomes
+    
+  # Performance monitoring
+  statistics_interval_s: 60   # Print stats every minute
+  detailed_logging: true      # Enable detailed performance logging
+
 # Web Dashboard
 web:
  host: "127.0.0.1"