fixes

2025-06-24 20:07:44 +03:00
parent 36d4c543c3
commit 06fbbeb81e
3 changed files with 1054 additions and 335 deletions
--- a/core/enhanced_orchestrator.py
+++ b/core/enhanced_orchestrator.py
--- a/core/multi_exchange_cob_provider.py
+++ b/core/multi_exchange_cob_provider.py
@@ -23,7 +23,11 @@ import asyncio
 import json
 import logging
 import time
-import websockets
+try:
+    import websockets
+except ImportError:
+    # Fallback for environments where websockets is not available
+    websockets = None
 import numpy as np
 import pandas as pd
 from datetime import datetime, timedelta
@@ -106,7 +110,7 @@ class MultiExchangeCOBProvider:
    to create a consolidated view of market liquidity and pricing.
    """
    
-    def __init__(self, symbols: List[str] = None, bucket_size_bps: float = 1.0):
+    def __init__(self, symbols: Optional[List[str]] = None, bucket_size_bps: float = 1.0):
        """
        Initialize Multi-Exchange COB Provider
        
@@ -461,6 +465,8 @@ class MultiExchangeCOBProvider:
            ws_url = f"{config.websocket_url}{config.symbols_mapping[symbol].lower()}@depth20@100ms"
            logger.info(f"Connecting to Binance WebSocket: {ws_url}")
            
+            if websockets is None:
+                raise ImportError("websockets module not available")
            async with websockets.connect(ws_url) as websocket:
                self.exchange_order_books[symbol]['binance']['connected'] = True
                logger.info(f"Connected to Binance order book stream for {symbol}")
@@ -537,7 +543,7 @@ class MultiExchangeCOBProvider:
        except Exception as e:
            logger.error(f"Error adding trade to SVP: {e}")
    
-    def get_session_volume_profile(self, symbol: str, bucket_size: float = None) -> Dict:
+    def get_session_volume_profile(self, symbol: str, bucket_size: Optional[float] = None) -> Dict:
        """Get session volume profile for a symbol"""
        try:
            if bucket_size is None:
@@ -690,6 +696,8 @@ class MultiExchangeCOBProvider:
            ws_url = f"{config.websocket_url}{config.symbols_mapping[symbol].lower()}@trade"
            logger.info(f"Connecting to Binance trade stream: {ws_url}")
            
+            if websockets is None:
+                raise ImportError("websockets module not available")
            async with websockets.connect(ws_url) as websocket:
                logger.info(f"Connected to Binance trade stream for {symbol}")
                
--- a/core/realtime_rl_cob_trader.py
+++ b/core/realtime_rl_cob_trader.py
@@ -301,6 +301,13 @@ class RealtimeRLCOBTrader:
                'last_inference_time': None
            }
        
+        # PnL tracking for loss cutting optimization
+        self.pnl_history: Dict[str, deque] = {
+            symbol: deque(maxlen=1000) for symbol in self.symbols
+        }
+        self.position_peak_pnl: Dict[str, float] = {symbol: 0.0 for symbol in self.symbols}
+        self.trade_history: Dict[str, List] = {symbol: [] for symbol in self.symbols}
+        
        # Threading
        self.running = False
        self.inference_lock = Lock()
@@ -961,8 +968,10 @@ class RealtimeRLCOBTrader:
                                   actual_direction: int,
                                   confidence: float,
                                   predicted_change: float,
-                                   actual_change: float) -> float:
-        """Calculate reward for a prediction"""
+                                   actual_change: float,
+                                   current_pnl: float = 0.0,
+                                   position_duration: float = 0.0) -> float:
+        """Calculate reward for a prediction with PnL-aware loss cutting optimization"""
        try:
            # Base reward for correct direction
            if predicted_direction == actual_direction:
@@ -983,7 +992,42 @@ class RealtimeRLCOBTrader:
            if base_reward < 0 and confidence > 0.8:
                confidence_scaled_reward *= 1.5  # Increase penalty
            
-            return float(confidence_scaled_reward)
+            # === PnL-AWARE LOSS CUTTING REWARDS ===
+            
+            pnl_reward = 0.0
+            
+            # Reward cutting losses early (SIDEWAYS when losing)
+            if current_pnl < -10.0:  # In significant loss
+                if predicted_direction == 1:  # SIDEWAYS (exit signal)
+                    # Reward cutting losses before they get worse
+                    loss_cutting_bonus = min(1.0, abs(current_pnl) / 100.0) * confidence
+                    pnl_reward += loss_cutting_bonus
+                elif predicted_direction != 1:  # Continuing to trade while in loss
+                    # Penalty for not cutting losses
+                    pnl_reward -= 0.5 * confidence
+            
+            # Reward protecting profits (SIDEWAYS when in profit and market turning)
+            elif current_pnl > 10.0:  # In profit
+                if predicted_direction == 1 and base_reward > 0:  # Correct SIDEWAYS prediction
+                    # Reward protecting profits from reversal
+                    profit_protection_bonus = min(0.5, current_pnl / 200.0) * confidence
+                    pnl_reward += profit_protection_bonus
+                
+            # Duration penalty for holding losing positions
+            if current_pnl < 0 and position_duration > 3600:  # Losing for > 1 hour
+                duration_penalty = min(1.0, position_duration / 7200.0) * 0.3  # Up to 30% penalty
+                confidence_scaled_reward -= duration_penalty
+            
+            # Severe penalty for letting small losses become big losses
+            if current_pnl < -50.0:  # Large loss
+                drawdown_penalty = min(2.0, abs(current_pnl) / 100.0) * confidence
+                confidence_scaled_reward -= drawdown_penalty
+            
+            # Total reward
+            total_reward = confidence_scaled_reward + pnl_reward
+            
+            # Clamp final reward
+            return max(-5.0, min(5.0, float(total_reward)))
            
        except Exception as e:
            logger.error(f"Error calculating reward: {e}")