edit prediction penalties

This commit is contained in:
Dobromir Popov
2025-08-26 22:26:42 +03:00
parent 300cf3eb2c
commit fc1ac2061f

View File

@@ -302,12 +302,13 @@ class EnhancedRewardCalculator:
normalized_mse = min(mse / max_mse, 1.0) normalized_mse = min(mse / max_mse, 1.0)
mse_reward = np.exp(-5 * normalized_mse) # Exponential decay, range [exp(-5), 1] mse_reward = np.exp(-5 * normalized_mse) # Exponential decay, range [exp(-5), 1]
# Direction accuracy bonus/penalty # Direction accuracy bonus/penalty (stronger punishment for wrong direction)
direction_correct = (prediction.predicted_direction == actual_direction) direction_correct = (prediction.predicted_direction == actual_direction)
direction_bonus = 0.5 if direction_correct else -0.5 # Increase wrong-direction penalty; reduce correct-direction bonus slightly
direction_bonus = 0.25 if direction_correct else -1.0
# Confidence scaling # Confidence scaling (apply floor to avoid near-zero scaling)
confidence_weight = prediction.confidence confidence_weight = max(prediction.confidence, 0.2)
# Final reward calculation # Final reward calculation
base_reward = mse_reward + direction_bonus base_reward = mse_reward + direction_bonus