rl cob agent

This commit is contained in:
Dobromir Popov
2025-06-24 15:39:22 +03:00
parent 1f3166e1e5
commit ec420c2a5f
5 changed files with 1974 additions and 0 deletions

View File

@ -196,6 +196,50 @@ memory:
model_limit_gb: 4.0 # Per-model memory limit
cleanup_interval: 1800 # Memory cleanup every 30 minutes
# Real-time RL COB Trader Configuration
realtime_rl:
# Model parameters for 1B parameter network
model:
input_size: 2000 # COB feature dimensions
hidden_size: 4096 # Massive hidden layer size
num_layers: 12 # Deep transformer layers
learning_rate: 0.00001 # Very low for stability
weight_decay: 0.000001 # L2 regularization
# Inference configuration
inference_interval_ms: 200 # Inference every 200ms
min_confidence_threshold: 0.7 # Minimum confidence for signal accumulation
required_confident_predictions: 3 # Need 3 confident predictions for trade
# Training configuration
training_interval_s: 1.0 # Train every second
batch_size: 32 # Training batch size
replay_buffer_size: 1000 # Store last 1000 predictions for training
# Signal accumulation
signal_buffer_size: 10 # Buffer size for signal accumulation
consensus_threshold: 3 # Need 3 signals in same direction
# Model checkpointing
model_checkpoint_dir: "models/realtime_rl_cob"
save_interval_s: 300 # Save models every 5 minutes
# COB integration
symbols: ["BTC/USDT", "ETH/USDT"] # Symbols to trade
cob_feature_normalization: "robust" # Feature normalization method
# Reward engineering for RL
reward_structure:
correct_direction_base: 1.0 # Base reward for correct prediction
confidence_scaling: true # Scale reward by confidence
magnitude_bonus: 0.5 # Bonus for predicting magnitude accurately
overconfidence_penalty: 1.5 # Penalty multiplier for wrong high-confidence predictions
trade_execution_multiplier: 10.0 # Higher weight for actual trade outcomes
# Performance monitoring
statistics_interval_s: 60 # Print stats every minute
detailed_logging: true # Enable detailed performance logging
# Web Dashboard
web:
host: "127.0.0.1"