198 lines
8.0 KiB
YAML
198 lines
8.0 KiB
YAML
# Model Configurations
|
|
# This file contains all model-specific configurations to keep the main config.yaml clean
|
|
|
|
# Enhanced CNN Configuration ( does not use yml file now)
|
|
# cnn:
|
|
# window_size: 20
|
|
# features: ["open", "high", "low", "close", "volume"]
|
|
# timeframes: ["1s", "1m", "1h", "1d"]
|
|
# hidden_layers: [64, 128, 256]
|
|
# dropout: 0.2
|
|
# learning_rate: 0.001
|
|
# batch_size: 32
|
|
# epochs: 100
|
|
# confidence_threshold: 0.6
|
|
# early_stopping_patience: 10
|
|
# model_dir: "models/enhanced_cnn" # Ultra-fast scalping weights (500x leverage)
|
|
# timeframe_importance:
|
|
# "1s": 0.60 # Primary scalping signal
|
|
# "1m": 0.20 # Short-term confirmation
|
|
# "1h": 0.15 # Medium-term trend
|
|
# "1d": 0.05 # Long-term direction (minimal)
|
|
|
|
# Enhanced RL Agent Configuration
|
|
rl:
|
|
state_size: 100 # Will be calculated dynamically based on features
|
|
action_space: 3 # BUY, HOLD, SELL
|
|
hidden_size: 256
|
|
epsilon: 1.0
|
|
epsilon_decay: 0.995
|
|
epsilon_min: 0.01
|
|
learning_rate: 0.0001
|
|
gamma: 0.99
|
|
memory_size: 10000
|
|
batch_size: 64
|
|
target_update_freq: 1000
|
|
buffer_size: 10000
|
|
model_dir: "models/enhanced_rl"
|
|
|
|
# DQN Network Architecture Configuration
|
|
network_architecture:
|
|
# Feature extractor layers (reduced by half from original)
|
|
feature_layers: [4096, 3072, 2048, 1536, 1024] # Reduced from [8192, 6144, 4096, 3072, 2048]
|
|
# Market regime detection head
|
|
regime_head: [512, 256] # Reduced from [1024, 512]
|
|
# Price direction prediction head
|
|
price_direction_head: [512, 256] # Reduced from [1024, 512]
|
|
# Volatility prediction head
|
|
volatility_head: [512, 128] # Reduced from [1024, 256]
|
|
# Main Q-value head (dueling architecture)
|
|
value_head: [512, 256] # Reduced from [1024, 512]
|
|
advantage_head: [512, 256] # Reduced from [1024, 512]
|
|
# Dropout rate
|
|
dropout_rate: 0.1
|
|
# Layer normalization
|
|
use_layer_norm: true
|
|
|
|
# Market regime adaptation
|
|
market_regime_weights:
|
|
trending: 1.2 # Higher confidence in trending markets
|
|
ranging: 0.8 # Lower confidence in ranging markets
|
|
volatile: 0.6 # Much lower confidence in volatile markets
|
|
# Prioritized experience replay
|
|
replay_alpha: 0.6 # Priority exponent
|
|
replay_beta: 0.4 # Importance sampling exponent
|
|
|
|
# Real-time RL COB Trader Configuration
|
|
realtime_rl:
|
|
# Model parameters for 400M parameter network (faster startup)
|
|
model:
|
|
input_size: 2000 # COB feature dimensions
|
|
hidden_size: 2048 # Optimized hidden layer size for 400M params
|
|
num_layers: 8 # Efficient transformer layers for faster training
|
|
learning_rate: 0.0001 # Higher learning rate for faster convergence
|
|
weight_decay: 0.00001 # Balanced L2 regularization
|
|
|
|
# Inference configuration
|
|
inference_interval_ms: 200 # Inference every 200ms
|
|
min_confidence_threshold: 0.7 # Minimum confidence for signal accumulation
|
|
required_confident_predictions: 3 # Need 3 confident predictions for trade
|
|
|
|
# Training configuration
|
|
training_interval_s: 1.0 # Train every second
|
|
batch_size: 32 # Training batch size
|
|
replay_buffer_size: 1000 # Store last 1000 predictions for training
|
|
|
|
# Signal accumulation
|
|
signal_buffer_size: 10 # Buffer size for signal accumulation
|
|
consensus_threshold: 3 # Need 3 signals in same direction
|
|
|
|
# Model checkpointing
|
|
model_checkpoint_dir: "models/realtime_rl_cob"
|
|
save_interval_s: 300 # Save models every 5 minutes
|
|
|
|
# COB integration
|
|
symbols: ["BTC/USDT", "ETH/USDT"] # Symbols to trade
|
|
cob_feature_normalization: "robust" # Feature normalization method
|
|
|
|
# Reward engineering for RL
|
|
reward_structure:
|
|
correct_direction_base: 1.0 # Base reward for correct prediction
|
|
confidence_scaling: true # Scale reward by confidence
|
|
magnitude_bonus: 0.5 # Bonus for predicting magnitude accurately
|
|
overconfidence_penalty: 1.5 # Penalty multiplier for wrong high-confidence predictions
|
|
trade_execution_multiplier: 10.0 # Higher weight for actual trade outcomes
|
|
|
|
# Performance monitoring
|
|
statistics_interval_s: 60 # Print stats every minute
|
|
detailed_logging: true # Enable detailed performance logging
|
|
|
|
# Enhanced Orchestrator Settings
|
|
orchestrator:
|
|
# Model weights for decision combination
|
|
cnn_weight: 0.7 # Weight for CNN predictions
|
|
rl_weight: 0.3 # Weight for RL decisions
|
|
confidence_threshold: 0.45
|
|
confidence_threshold_close: 0.35
|
|
decision_frequency: 30
|
|
|
|
# Multi-symbol coordination
|
|
symbol_correlation_matrix:
|
|
"ETH/USDT-BTC/USDT": 0.85 # ETH-BTC correlation
|
|
|
|
# Perfect move marking
|
|
perfect_move_threshold: 0.02 # 2% price change to mark as significant
|
|
perfect_move_buffer_size: 10000
|
|
|
|
# RL evaluation settings
|
|
evaluation_delay: 3600 # Evaluate actions after 1 hour
|
|
reward_calculation:
|
|
success_multiplier: 10 # Reward for correct predictions
|
|
failure_penalty: 5 # Penalty for wrong predictions
|
|
confidence_scaling: true # Scale rewards by confidence
|
|
|
|
# Entry aggressiveness: 0.0 = very conservative (fewer, higher quality trades), 1.0 = very aggressive (more trades)
|
|
entry_aggressiveness: 0.5
|
|
# Exit aggressiveness: 0.0 = very conservative (let profits run), 1.0 = very aggressive (quick exits)
|
|
exit_aggressiveness: 0.5
|
|
|
|
# Decision Fusion Configuration
|
|
decision_fusion:
|
|
enabled: true # Use neural network decision fusion instead of programmatic
|
|
mode: "neural" # "neural" or "programmatic"
|
|
input_size: 128 # Size of input features for decision fusion network
|
|
hidden_size: 256 # Hidden layer size
|
|
history_length: 20 # Number of recent decisions to include
|
|
training_interval: 10 # Train decision fusion every 10 decisions in programmatic mode
|
|
learning_rate: 0.001 # Learning rate for decision fusion network
|
|
batch_size: 32 # Training batch size
|
|
min_samples_for_training: 20 # Lower threshold for faster training in programmatic mode
|
|
|
|
# Training Configuration
|
|
training:
|
|
learning_rate: 0.001
|
|
batch_size: 32
|
|
epochs: 100
|
|
validation_split: 0.2
|
|
early_stopping_patience: 10
|
|
|
|
# CNN specific training
|
|
cnn_training_interval: 3600 # Train CNN every hour (was 6 hours)
|
|
min_perfect_moves: 50 # Reduced from 200 for faster learning
|
|
|
|
# RL specific training
|
|
rl_training_interval: 300 # Train RL every 5 minutes (was 1 hour)
|
|
min_experiences: 50 # Reduced from 100 for faster learning
|
|
training_steps_per_cycle: 20 # Increased from 10 for more learning
|
|
|
|
model_type: "optimized_short_term"
|
|
use_realtime: true
|
|
use_ticks: true
|
|
checkpoint_dir: "NN/models/saved/realtime_ticks_checkpoints"
|
|
save_best_model: true
|
|
save_final_model: false # We only want to keep the best performing model
|
|
|
|
# Continuous learning settings
|
|
continuous_learning: true
|
|
adaptive_learning_rate: true
|
|
performance_threshold: 0.6
|
|
|
|
# Enhanced Training System Configuration
|
|
enhanced_training:
|
|
enabled: true # Enable enhanced real-time training
|
|
auto_start: true # Automatically start training when orchestrator starts
|
|
training_intervals:
|
|
cob_rl_training_interval: 1 # Train COB RL every 1 second (HIGHEST PRIORITY)
|
|
dqn_training_interval: 5 # Train DQN every 5 seconds
|
|
cnn_training_interval: 10 # Train CNN every 10 seconds
|
|
validation_interval: 60 # Validate every minute
|
|
batch_size: 64 # Training batch size
|
|
memory_size: 10000 # Experience buffer size
|
|
min_training_samples: 100 # Minimum samples before training starts
|
|
adaptation_threshold: 0.1 # Performance threshold for adaptation
|
|
forward_looking_predictions: true # Enable forward-looking prediction validation
|
|
|
|
# COB RL Priority Settings (since order book imbalance predicts price moves)
|
|
cob_rl_priority: true # Enable COB RL as highest priority model
|
|
cob_rl_batch_size: 16 # Smaller batches for faster COB updates
|
|
cob_rl_min_samples: 5 # Lower threshold for COB training |