edit prediction penalties
This commit is contained in:
@@ -302,12 +302,13 @@ class EnhancedRewardCalculator:
|
||||
normalized_mse = min(mse / max_mse, 1.0)
|
||||
mse_reward = np.exp(-5 * normalized_mse) # Exponential decay, range [exp(-5), 1]
|
||||
|
||||
# Direction accuracy bonus/penalty
|
||||
# Direction accuracy bonus/penalty (stronger punishment for wrong direction)
|
||||
direction_correct = (prediction.predicted_direction == actual_direction)
|
||||
direction_bonus = 0.5 if direction_correct else -0.5
|
||||
# Increase wrong-direction penalty; reduce correct-direction bonus slightly
|
||||
direction_bonus = 0.25 if direction_correct else -1.0
|
||||
|
||||
# Confidence scaling
|
||||
confidence_weight = prediction.confidence
|
||||
# Confidence scaling (apply floor to avoid near-zero scaling)
|
||||
confidence_weight = max(prediction.confidence, 0.2)
|
||||
|
||||
# Final reward calculation
|
||||
base_reward = mse_reward + direction_bonus
|
||||
|
Reference in New Issue
Block a user