edit prediction penalties

This commit is contained in:
Dobromir Popov
2025-08-26 22:26:42 +03:00
parent 300cf3eb2c
commit fc1ac2061f

View File

@@ -302,12 +302,13 @@ class EnhancedRewardCalculator:
normalized_mse = min(mse / max_mse, 1.0)
mse_reward = np.exp(-5 * normalized_mse) # Exponential decay, range [exp(-5), 1]
# Direction accuracy bonus/penalty
# Direction accuracy bonus/penalty (stronger punishment for wrong direction)
direction_correct = (prediction.predicted_direction == actual_direction)
direction_bonus = 0.5 if direction_correct else -0.5
# Increase wrong-direction penalty; reduce correct-direction bonus slightly
direction_bonus = 0.25 if direction_correct else -1.0
# Confidence scaling
confidence_weight = prediction.confidence
# Confidence scaling (apply floor to avoid near-zero scaling)
confidence_weight = max(prediction.confidence, 0.2)
# Final reward calculation
base_reward = mse_reward + direction_bonus