RL trainer

2025-05-28 13:20:15 +03:00
parent d6a71c2b1a
commit a6eaa01735
8 changed files with 1476 additions and 132 deletions
--- a/core/trading_executor.py
+++ b/core/trading_executor.py
@@ -9,7 +9,7 @@ import logging
 import time
 import os
 from datetime import datetime, timedelta
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Tuple
 from dataclasses import dataclass
 from threading import Lock
 import sys
@@ -20,6 +20,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'NN'))
 from NN.exchanges import MEXCInterface
 from .config import get_config
 from .config_sync import ConfigSynchronizer
+from .realtime_rl_trainer import RealTimeRLTrainer

 logger = logging.getLogger(__name__)

@@ -119,6 +120,29 @@ class TradingExecutor:
            mexc_interface=self.exchange if self.trading_enabled else None
        )
        
+        # Initialize real-time RL trainer for continuous learning
+        rl_config = {
+            'state_size': 100,
+            'learning_rate': 0.0001,
+            'gamma': 0.95,
+            'epsilon': 0.1,  # Low exploration for live trading
+            'buffer_size': 10000,
+            'batch_size': 32,
+            'training_enabled': self.mexc_config.get('rl_learning_enabled', True),
+            'min_experiences': 10,
+            'training_frequency': 3,  # Train every 3 trades
+            'save_frequency': 50,  # Save every 50 trades
+            'model_save_path': 'models/realtime_rl'
+        }
+        
+        self.rl_trainer = RealTimeRLTrainer(rl_config)
+        
+        # Try to load existing RL model
+        if self.rl_trainer.load_model():
+            logger.info("TRADING EXECUTOR: Loaded existing RL model for continuous learning")
+        else:
+            logger.info("TRADING EXECUTOR: Starting with fresh RL model")
+        
        # Perform initial fee sync on startup if trading is enabled
        if self.trading_enabled and self.exchange:
            try:
@@ -189,6 +213,29 @@ class TradingExecutor:
                return False
            current_price = ticker['last']
        
+        # Update RL trainer with market data (estimate volume from price movement)
+        estimated_volume = abs(current_price) * 1000  # Simple volume estimate
+        self.rl_trainer.update_market_data(symbol, current_price, estimated_volume)
+        
+        # Get position info for RL trainer
+        position_info = None
+        if symbol in self.positions:
+            position = self.positions[symbol]
+            position_info = {
+                'side': position.side,
+                'unrealized_pnl': position.unrealized_pnl,
+                'account_balance': 1000.0  # Could get from exchange
+            }
+        
+        # Record trade signal with RL trainer for learning
+        self.rl_trainer.record_trade_signal(
+            symbol=symbol,
+            action=action,
+            confidence=confidence,
+            current_price=current_price,
+            position_info=position_info
+        )
+        
        with self.lock:
            try:
                if action == 'BUY':
@@ -348,6 +395,14 @@ class TradingExecutor:
            self.trade_history.append(trade_record)
            self.daily_loss += max(0, -pnl)  # Add to daily loss if negative
            
+            # Record position closure with RL trainer for learning
+            self.rl_trainer.record_position_closure(
+                symbol=symbol,
+                exit_price=current_price,
+                pnl=pnl,
+                fees=0.0  # No fees in simulation
+            )
+            
            # Remove position
            del self.positions[symbol]
            self.last_trade_time[symbol] = datetime.now()
@@ -397,6 +452,14 @@ class TradingExecutor:
                self.trade_history.append(trade_record)
                self.daily_loss += max(0, -(pnl - fees))  # Add to daily loss if negative
                
+                # Record position closure with RL trainer for learning
+                self.rl_trainer.record_position_closure(
+                    symbol=symbol,
+                    exit_price=current_price,
+                    pnl=pnl,
+                    fees=fees
+                )
+                
                # Remove position
                del self.positions[symbol]
                self.last_trade_time[symbol] = datetime.now()
@@ -464,6 +527,9 @@ class TradingExecutor:
        effective_fee_rate = (total_fees / max(0.01, total_volume)) if total_volume > 0 else 0
        fee_impact_on_pnl = (total_fees / max(0.01, abs(gross_pnl))) * 100 if gross_pnl != 0 else 0
        
+        # Get RL training statistics
+        rl_stats = self.rl_trainer.get_training_stats() if hasattr(self, 'rl_trainer') else {}
+        
        return {
            'daily_trades': self.daily_trades,
            'daily_loss': self.daily_loss,
@@ -490,6 +556,15 @@ class TradingExecutor:
                'fee_impact_percent': fee_impact_on_pnl,
                'is_fee_efficient': fee_impact_on_pnl < 5.0,  # Less than 5% impact is good
                'fee_savings_vs_market': (0.001 - effective_fee_rate) * total_volume if effective_fee_rate < 0.001 else 0
+            },
+            'rl_learning': {
+                'enabled': rl_stats.get('training_enabled', False),
+                'total_experiences': rl_stats.get('total_experiences', 0),
+                'rl_win_rate': rl_stats.get('win_rate', 0),
+                'avg_reward': rl_stats.get('avg_reward', 0),
+                'memory_size': rl_stats.get('memory_size', 0),
+                'epsilon': rl_stats.get('epsilon', 0),
+                'pending_trades': rl_stats.get('pending_trades', 0)
            }
        }
    
@@ -803,3 +878,71 @@ class TradingExecutor:
                'sync_available': False,
                'error': str(e)
            }
+    
+    def get_rl_prediction(self, symbol: str) -> Tuple[str, float]:
+        """Get RL agent prediction for the current market state
+        
+        Args:
+            symbol: Trading symbol
+            
+        Returns:
+            tuple: (action, confidence) where action is BUY/SELL/HOLD
+        """
+        if not hasattr(self, 'rl_trainer'):
+            return 'HOLD', 0.5
+        
+        try:
+            # Get current position info
+            current_position = 'NONE'
+            position_pnl = 0.0
+            account_balance = 1000.0
+            
+            if symbol in self.positions:
+                position = self.positions[symbol]
+                current_position = position.side
+                position_pnl = position.unrealized_pnl
+            
+            # Get RL prediction
+            action, confidence = self.rl_trainer.get_action_prediction(
+                symbol=symbol,
+                current_position=current_position,
+                position_pnl=position_pnl,
+                account_balance=account_balance
+            )
+            
+            return action, confidence
+            
+        except Exception as e:
+            logger.error(f"TRADING EXECUTOR: Error getting RL prediction: {e}")
+            return 'HOLD', 0.5
+    
+    def enable_rl_training(self, enabled: bool = True):
+        """Enable or disable real-time RL training
+        
+        Args:
+            enabled: Whether to enable RL training
+        """
+        if hasattr(self, 'rl_trainer'):
+            self.rl_trainer.enable_training(enabled)
+            logger.info(f"TRADING EXECUTOR: RL training {'enabled' if enabled else 'disabled'}")
+        else:
+            logger.warning("TRADING EXECUTOR: RL trainer not initialized")
+    
+    def get_rl_training_stats(self) -> Dict[str, Any]:
+        """Get comprehensive RL training statistics
+        
+        Returns:
+            dict: RL training statistics and performance metrics
+        """
+        if hasattr(self, 'rl_trainer'):
+            return self.rl_trainer.get_training_stats()
+        else:
+            return {'error': 'RL trainer not initialized'}
+    
+    def save_rl_model(self):
+        """Manually save the current RL model"""
+        if hasattr(self, 'rl_trainer'):
+            self.rl_trainer._save_model()
+            logger.info("TRADING EXECUTOR: RL model saved manually")
+        else:
+            logger.warning("TRADING EXECUTOR: RL trainer not initialized")