edit prediction penalties

2025-08-26 22:26:42 +03:00
parent 300cf3eb2c
commit fc1ac2061f
1 changed files with 5 additions and 4 deletions
--- a/core/enhanced_reward_calculator.py
+++ b/core/enhanced_reward_calculator.py
@@ -302,12 +302,13 @@ class EnhancedRewardCalculator:
        normalized_mse = min(mse / max_mse, 1.0)
        mse_reward = np.exp(-5 * normalized_mse)  # Exponential decay, range [exp(-5), 1]
        
-        # Direction accuracy bonus/penalty
+        # Direction accuracy bonus/penalty (stronger punishment for wrong direction)
        direction_correct = (prediction.predicted_direction == actual_direction)
-        direction_bonus = 0.5 if direction_correct else -0.5
+        # Increase wrong-direction penalty; reduce correct-direction bonus slightly
+        direction_bonus = 0.25 if direction_correct else -1.0
        
-        # Confidence scaling
-        confidence_weight = prediction.confidence
+        # Confidence scaling (apply floor to avoid near-zero scaling)
+        confidence_weight = max(prediction.confidence, 0.2)
        
        # Final reward calculation
        base_reward = mse_reward + direction_bonus