diff --git a/core/enhanced_reward_calculator.py b/core/enhanced_reward_calculator.py index 75044bc..a2401ea 100644 --- a/core/enhanced_reward_calculator.py +++ b/core/enhanced_reward_calculator.py @@ -302,12 +302,13 @@ class EnhancedRewardCalculator: normalized_mse = min(mse / max_mse, 1.0) mse_reward = np.exp(-5 * normalized_mse) # Exponential decay, range [exp(-5), 1] - # Direction accuracy bonus/penalty + # Direction accuracy bonus/penalty (stronger punishment for wrong direction) direction_correct = (prediction.predicted_direction == actual_direction) - direction_bonus = 0.5 if direction_correct else -0.5 + # Increase wrong-direction penalty; reduce correct-direction bonus slightly + direction_bonus = 0.25 if direction_correct else -1.0 - # Confidence scaling - confidence_weight = prediction.confidence + # Confidence scaling (apply floor to avoid near-zero scaling) + confidence_weight = max(prediction.confidence, 0.2) # Final reward calculation base_reward = mse_reward + direction_bonus