fixes
This commit is contained in:
@ -301,6 +301,13 @@ class RealtimeRLCOBTrader:
|
||||
'last_inference_time': None
|
||||
}
|
||||
|
||||
# PnL tracking for loss cutting optimization
|
||||
self.pnl_history: Dict[str, deque] = {
|
||||
symbol: deque(maxlen=1000) for symbol in self.symbols
|
||||
}
|
||||
self.position_peak_pnl: Dict[str, float] = {symbol: 0.0 for symbol in self.symbols}
|
||||
self.trade_history: Dict[str, List] = {symbol: [] for symbol in self.symbols}
|
||||
|
||||
# Threading
|
||||
self.running = False
|
||||
self.inference_lock = Lock()
|
||||
@ -961,8 +968,10 @@ class RealtimeRLCOBTrader:
|
||||
actual_direction: int,
|
||||
confidence: float,
|
||||
predicted_change: float,
|
||||
actual_change: float) -> float:
|
||||
"""Calculate reward for a prediction"""
|
||||
actual_change: float,
|
||||
current_pnl: float = 0.0,
|
||||
position_duration: float = 0.0) -> float:
|
||||
"""Calculate reward for a prediction with PnL-aware loss cutting optimization"""
|
||||
try:
|
||||
# Base reward for correct direction
|
||||
if predicted_direction == actual_direction:
|
||||
@ -983,7 +992,42 @@ class RealtimeRLCOBTrader:
|
||||
if base_reward < 0 and confidence > 0.8:
|
||||
confidence_scaled_reward *= 1.5 # Increase penalty
|
||||
|
||||
return float(confidence_scaled_reward)
|
||||
# === PnL-AWARE LOSS CUTTING REWARDS ===
|
||||
|
||||
pnl_reward = 0.0
|
||||
|
||||
# Reward cutting losses early (SIDEWAYS when losing)
|
||||
if current_pnl < -10.0: # In significant loss
|
||||
if predicted_direction == 1: # SIDEWAYS (exit signal)
|
||||
# Reward cutting losses before they get worse
|
||||
loss_cutting_bonus = min(1.0, abs(current_pnl) / 100.0) * confidence
|
||||
pnl_reward += loss_cutting_bonus
|
||||
elif predicted_direction != 1: # Continuing to trade while in loss
|
||||
# Penalty for not cutting losses
|
||||
pnl_reward -= 0.5 * confidence
|
||||
|
||||
# Reward protecting profits (SIDEWAYS when in profit and market turning)
|
||||
elif current_pnl > 10.0: # In profit
|
||||
if predicted_direction == 1 and base_reward > 0: # Correct SIDEWAYS prediction
|
||||
# Reward protecting profits from reversal
|
||||
profit_protection_bonus = min(0.5, current_pnl / 200.0) * confidence
|
||||
pnl_reward += profit_protection_bonus
|
||||
|
||||
# Duration penalty for holding losing positions
|
||||
if current_pnl < 0 and position_duration > 3600: # Losing for > 1 hour
|
||||
duration_penalty = min(1.0, position_duration / 7200.0) * 0.3 # Up to 30% penalty
|
||||
confidence_scaled_reward -= duration_penalty
|
||||
|
||||
# Severe penalty for letting small losses become big losses
|
||||
if current_pnl < -50.0: # Large loss
|
||||
drawdown_penalty = min(2.0, abs(current_pnl) / 100.0) * confidence
|
||||
confidence_scaled_reward -= drawdown_penalty
|
||||
|
||||
# Total reward
|
||||
total_reward = confidence_scaled_reward + pnl_reward
|
||||
|
||||
# Clamp final reward
|
||||
return max(-5.0, min(5.0, float(total_reward)))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating reward: {e}")
|
||||
|
Reference in New Issue
Block a user