This commit is contained in:
Dobromir Popov
2025-06-24 20:07:44 +03:00
parent 36d4c543c3
commit 06fbbeb81e
3 changed files with 1054 additions and 335 deletions

View File

@ -301,6 +301,13 @@ class RealtimeRLCOBTrader:
'last_inference_time': None
}
# PnL tracking for loss cutting optimization
self.pnl_history: Dict[str, deque] = {
symbol: deque(maxlen=1000) for symbol in self.symbols
}
self.position_peak_pnl: Dict[str, float] = {symbol: 0.0 for symbol in self.symbols}
self.trade_history: Dict[str, List] = {symbol: [] for symbol in self.symbols}
# Threading
self.running = False
self.inference_lock = Lock()
@ -961,8 +968,10 @@ class RealtimeRLCOBTrader:
actual_direction: int,
confidence: float,
predicted_change: float,
actual_change: float) -> float:
"""Calculate reward for a prediction"""
actual_change: float,
current_pnl: float = 0.0,
position_duration: float = 0.0) -> float:
"""Calculate reward for a prediction with PnL-aware loss cutting optimization"""
try:
# Base reward for correct direction
if predicted_direction == actual_direction:
@ -983,7 +992,42 @@ class RealtimeRLCOBTrader:
if base_reward < 0 and confidence > 0.8:
confidence_scaled_reward *= 1.5 # Increase penalty
return float(confidence_scaled_reward)
# === PnL-AWARE LOSS CUTTING REWARDS ===
pnl_reward = 0.0
# Reward cutting losses early (SIDEWAYS when losing)
if current_pnl < -10.0: # In significant loss
if predicted_direction == 1: # SIDEWAYS (exit signal)
# Reward cutting losses before they get worse
loss_cutting_bonus = min(1.0, abs(current_pnl) / 100.0) * confidence
pnl_reward += loss_cutting_bonus
elif predicted_direction != 1: # Continuing to trade while in loss
# Penalty for not cutting losses
pnl_reward -= 0.5 * confidence
# Reward protecting profits (SIDEWAYS when in profit and market turning)
elif current_pnl > 10.0: # In profit
if predicted_direction == 1 and base_reward > 0: # Correct SIDEWAYS prediction
# Reward protecting profits from reversal
profit_protection_bonus = min(0.5, current_pnl / 200.0) * confidence
pnl_reward += profit_protection_bonus
# Duration penalty for holding losing positions
if current_pnl < 0 and position_duration > 3600: # Losing for > 1 hour
duration_penalty = min(1.0, position_duration / 7200.0) * 0.3 # Up to 30% penalty
confidence_scaled_reward -= duration_penalty
# Severe penalty for letting small losses become big losses
if current_pnl < -50.0: # Large loss
drawdown_penalty = min(2.0, abs(current_pnl) / 100.0) * confidence
confidence_scaled_reward -= drawdown_penalty
# Total reward
total_reward = confidence_scaled_reward + pnl_reward
# Clamp final reward
return max(-5.0, min(5.0, float(total_reward)))
except Exception as e:
logger.error(f"Error calculating reward: {e}")