RL trainer

This commit is contained in:
Dobromir Popov
2025-05-28 13:20:15 +03:00
parent d6a71c2b1a
commit a6eaa01735
8 changed files with 1476 additions and 132 deletions

View File

@ -9,7 +9,7 @@ import logging
import time
import os
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass
from threading import Lock
import sys
@ -20,6 +20,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'NN'))
from NN.exchanges import MEXCInterface
from .config import get_config
from .config_sync import ConfigSynchronizer
from .realtime_rl_trainer import RealTimeRLTrainer
logger = logging.getLogger(__name__)
@ -119,6 +120,29 @@ class TradingExecutor:
mexc_interface=self.exchange if self.trading_enabled else None
)
# Initialize real-time RL trainer for continuous learning
rl_config = {
'state_size': 100,
'learning_rate': 0.0001,
'gamma': 0.95,
'epsilon': 0.1, # Low exploration for live trading
'buffer_size': 10000,
'batch_size': 32,
'training_enabled': self.mexc_config.get('rl_learning_enabled', True),
'min_experiences': 10,
'training_frequency': 3, # Train every 3 trades
'save_frequency': 50, # Save every 50 trades
'model_save_path': 'models/realtime_rl'
}
self.rl_trainer = RealTimeRLTrainer(rl_config)
# Try to load existing RL model
if self.rl_trainer.load_model():
logger.info("TRADING EXECUTOR: Loaded existing RL model for continuous learning")
else:
logger.info("TRADING EXECUTOR: Starting with fresh RL model")
# Perform initial fee sync on startup if trading is enabled
if self.trading_enabled and self.exchange:
try:
@ -189,6 +213,29 @@ class TradingExecutor:
return False
current_price = ticker['last']
# Update RL trainer with market data (estimate volume from price movement)
estimated_volume = abs(current_price) * 1000 # Simple volume estimate
self.rl_trainer.update_market_data(symbol, current_price, estimated_volume)
# Get position info for RL trainer
position_info = None
if symbol in self.positions:
position = self.positions[symbol]
position_info = {
'side': position.side,
'unrealized_pnl': position.unrealized_pnl,
'account_balance': 1000.0 # Could get from exchange
}
# Record trade signal with RL trainer for learning
self.rl_trainer.record_trade_signal(
symbol=symbol,
action=action,
confidence=confidence,
current_price=current_price,
position_info=position_info
)
with self.lock:
try:
if action == 'BUY':
@ -348,6 +395,14 @@ class TradingExecutor:
self.trade_history.append(trade_record)
self.daily_loss += max(0, -pnl) # Add to daily loss if negative
# Record position closure with RL trainer for learning
self.rl_trainer.record_position_closure(
symbol=symbol,
exit_price=current_price,
pnl=pnl,
fees=0.0 # No fees in simulation
)
# Remove position
del self.positions[symbol]
self.last_trade_time[symbol] = datetime.now()
@ -397,6 +452,14 @@ class TradingExecutor:
self.trade_history.append(trade_record)
self.daily_loss += max(0, -(pnl - fees)) # Add to daily loss if negative
# Record position closure with RL trainer for learning
self.rl_trainer.record_position_closure(
symbol=symbol,
exit_price=current_price,
pnl=pnl,
fees=fees
)
# Remove position
del self.positions[symbol]
self.last_trade_time[symbol] = datetime.now()
@ -464,6 +527,9 @@ class TradingExecutor:
effective_fee_rate = (total_fees / max(0.01, total_volume)) if total_volume > 0 else 0
fee_impact_on_pnl = (total_fees / max(0.01, abs(gross_pnl))) * 100 if gross_pnl != 0 else 0
# Get RL training statistics
rl_stats = self.rl_trainer.get_training_stats() if hasattr(self, 'rl_trainer') else {}
return {
'daily_trades': self.daily_trades,
'daily_loss': self.daily_loss,
@ -490,6 +556,15 @@ class TradingExecutor:
'fee_impact_percent': fee_impact_on_pnl,
'is_fee_efficient': fee_impact_on_pnl < 5.0, # Less than 5% impact is good
'fee_savings_vs_market': (0.001 - effective_fee_rate) * total_volume if effective_fee_rate < 0.001 else 0
},
'rl_learning': {
'enabled': rl_stats.get('training_enabled', False),
'total_experiences': rl_stats.get('total_experiences', 0),
'rl_win_rate': rl_stats.get('win_rate', 0),
'avg_reward': rl_stats.get('avg_reward', 0),
'memory_size': rl_stats.get('memory_size', 0),
'epsilon': rl_stats.get('epsilon', 0),
'pending_trades': rl_stats.get('pending_trades', 0)
}
}
@ -803,3 +878,71 @@ class TradingExecutor:
'sync_available': False,
'error': str(e)
}
def get_rl_prediction(self, symbol: str) -> Tuple[str, float]:
"""Get RL agent prediction for the current market state
Args:
symbol: Trading symbol
Returns:
tuple: (action, confidence) where action is BUY/SELL/HOLD
"""
if not hasattr(self, 'rl_trainer'):
return 'HOLD', 0.5
try:
# Get current position info
current_position = 'NONE'
position_pnl = 0.0
account_balance = 1000.0
if symbol in self.positions:
position = self.positions[symbol]
current_position = position.side
position_pnl = position.unrealized_pnl
# Get RL prediction
action, confidence = self.rl_trainer.get_action_prediction(
symbol=symbol,
current_position=current_position,
position_pnl=position_pnl,
account_balance=account_balance
)
return action, confidence
except Exception as e:
logger.error(f"TRADING EXECUTOR: Error getting RL prediction: {e}")
return 'HOLD', 0.5
def enable_rl_training(self, enabled: bool = True):
"""Enable or disable real-time RL training
Args:
enabled: Whether to enable RL training
"""
if hasattr(self, 'rl_trainer'):
self.rl_trainer.enable_training(enabled)
logger.info(f"TRADING EXECUTOR: RL training {'enabled' if enabled else 'disabled'}")
else:
logger.warning("TRADING EXECUTOR: RL trainer not initialized")
def get_rl_training_stats(self) -> Dict[str, Any]:
"""Get comprehensive RL training statistics
Returns:
dict: RL training statistics and performance metrics
"""
if hasattr(self, 'rl_trainer'):
return self.rl_trainer.get_training_stats()
else:
return {'error': 'RL trainer not initialized'}
def save_rl_model(self):
"""Manually save the current RL model"""
if hasattr(self, 'rl_trainer'):
self.rl_trainer._save_model()
logger.info("TRADING EXECUTOR: RL model saved manually")
else:
logger.warning("TRADING EXECUTOR: RL trainer not initialized")