fixes

2025-06-24 20:07:44 +03:00
parent 36d4c543c3
commit 06fbbeb81e
3 changed files with 1054 additions and 335 deletions
--- a/core/realtime_rl_cob_trader.py
+++ b/core/realtime_rl_cob_trader.py
@@ -301,6 +301,13 @@ class RealtimeRLCOBTrader:
                'last_inference_time': None
            }
        
+        # PnL tracking for loss cutting optimization
+        self.pnl_history: Dict[str, deque] = {
+            symbol: deque(maxlen=1000) for symbol in self.symbols
+        }
+        self.position_peak_pnl: Dict[str, float] = {symbol: 0.0 for symbol in self.symbols}
+        self.trade_history: Dict[str, List] = {symbol: [] for symbol in self.symbols}
+        
        # Threading
        self.running = False
        self.inference_lock = Lock()
@@ -961,8 +968,10 @@ class RealtimeRLCOBTrader:
                                   actual_direction: int,
                                   confidence: float,
                                   predicted_change: float,
-                                   actual_change: float) -> float:
-        """Calculate reward for a prediction"""
+                                   actual_change: float,
+                                   current_pnl: float = 0.0,
+                                   position_duration: float = 0.0) -> float:
+        """Calculate reward for a prediction with PnL-aware loss cutting optimization"""
        try:
            # Base reward for correct direction
            if predicted_direction == actual_direction:
@@ -983,7 +992,42 @@ class RealtimeRLCOBTrader:
            if base_reward < 0 and confidence > 0.8:
                confidence_scaled_reward *= 1.5  # Increase penalty
            
-            return float(confidence_scaled_reward)
+            # === PnL-AWARE LOSS CUTTING REWARDS ===
+            
+            pnl_reward = 0.0
+            
+            # Reward cutting losses early (SIDEWAYS when losing)
+            if current_pnl < -10.0:  # In significant loss
+                if predicted_direction == 1:  # SIDEWAYS (exit signal)
+                    # Reward cutting losses before they get worse
+                    loss_cutting_bonus = min(1.0, abs(current_pnl) / 100.0) * confidence
+                    pnl_reward += loss_cutting_bonus
+                elif predicted_direction != 1:  # Continuing to trade while in loss
+                    # Penalty for not cutting losses
+                    pnl_reward -= 0.5 * confidence
+            
+            # Reward protecting profits (SIDEWAYS when in profit and market turning)
+            elif current_pnl > 10.0:  # In profit
+                if predicted_direction == 1 and base_reward > 0:  # Correct SIDEWAYS prediction
+                    # Reward protecting profits from reversal
+                    profit_protection_bonus = min(0.5, current_pnl / 200.0) * confidence
+                    pnl_reward += profit_protection_bonus
+                
+            # Duration penalty for holding losing positions
+            if current_pnl < 0 and position_duration > 3600:  # Losing for > 1 hour
+                duration_penalty = min(1.0, position_duration / 7200.0) * 0.3  # Up to 30% penalty
+                confidence_scaled_reward -= duration_penalty
+            
+            # Severe penalty for letting small losses become big losses
+            if current_pnl < -50.0:  # Large loss
+                drawdown_penalty = min(2.0, abs(current_pnl) / 100.0) * confidence
+                confidence_scaled_reward -= drawdown_penalty
+            
+            # Total reward
+            total_reward = confidence_scaled_reward + pnl_reward
+            
+            # Clamp final reward
+            return max(-5.0, min(5.0, float(total_reward)))
            
        except Exception as e:
            logger.error(f"Error calculating reward: {e}")