normalize by unified price range

This commit is contained in:
Dobromir Popov
2025-07-29 22:05:28 +03:00
parent aa2a1bf7ee
commit ab5784b890
2 changed files with 89 additions and 71 deletions

View File

@ -3541,6 +3541,7 @@ class TradingOrchestrator:
"""
Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
Now considers position status and current P&L when evaluating decisions
NOISE REDUCTION: Treats neutral/low-confidence signals as HOLD to reduce training noise
Args:
predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@ -3556,8 +3557,15 @@ class TradingOrchestrator:
tuple: (reward, was_correct)
"""
try:
# NOISE REDUCTION: Treat low-confidence signals as HOLD
confidence_threshold = 0.6 # Only consider BUY/SELL if confidence > 60%
if prediction_confidence < confidence_threshold:
predicted_action = "HOLD"
logger.debug(f"Low confidence ({prediction_confidence:.2f}) - treating as HOLD for noise reduction")
# Base thresholds for determining correctness
movement_threshold = 0.1 # 0.1% minimum movement to consider significant
movement_threshold = 0.15 # Increased from 0.1% to 0.15% for stronger signals
strong_movement_threshold = 0.5 # 0.5% for strong movements
# Determine current position status if not provided
if has_position is None and symbol:
@ -3573,58 +3581,62 @@ class TradingOrchestrator:
directional_accuracy = 0.0
if predicted_action == "BUY":
# BUY signals need stronger confirmation for higher rewards
was_correct = price_change_pct > movement_threshold
directional_accuracy = max(
0, price_change_pct
) # Positive for upward movement
if price_change_pct > strong_movement_threshold:
directional_accuracy = price_change_pct * 2.0 # Bonus for strong moves
else:
directional_accuracy = max(0, price_change_pct) # Standard reward
elif predicted_action == "SELL":
# SELL signals need stronger confirmation for higher rewards
was_correct = price_change_pct < -movement_threshold
directional_accuracy = max(
0, -price_change_pct
) # Positive for downward movement
if price_change_pct < -strong_movement_threshold:
directional_accuracy = abs(price_change_pct) * 2.0 # Bonus for strong moves
else:
directional_accuracy = max(0, -price_change_pct) # Standard reward
elif predicted_action == "HOLD":
# HOLD evaluation now considers position status AND current P&L
# HOLD evaluation with noise reduction - smaller rewards to reduce training noise
if has_position:
# If we have a position, HOLD evaluation depends on P&L and price movement
if current_position_pnl > 0: # Currently profitable position
# Holding a profitable position is good if price continues favorably
if price_change_pct > 0: # Price went up while holding profitable position - excellent
was_correct = True
directional_accuracy = price_change_pct * 1.5 # Bonus for holding winners
directional_accuracy = price_change_pct * 0.8 # Reduced from 1.5 to reduce noise
elif abs(price_change_pct) < movement_threshold: # Price stable - good
was_correct = True
directional_accuracy = movement_threshold + (current_position_pnl / 100.0) # Reward based on existing profit
directional_accuracy = movement_threshold * 0.5 # Reduced reward to reduce noise
else: # Price dropped while holding profitable position - still okay but less reward
was_correct = True
directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.3)
elif current_position_pnl < 0: # Currently losing position
# Holding a losing position is generally bad - should consider closing
if price_change_pct > movement_threshold: # Price recovered - good hold
was_correct = True
directional_accuracy = price_change_pct * 0.8 # Reduced reward for recovery
directional_accuracy = price_change_pct * 0.6 # Reduced reward
else: # Price continued down or stayed flat - bad hold
was_correct = False
# Penalty proportional to loss magnitude
directional_accuracy = abs(current_position_pnl / 100.0) * 0.5 # Penalty for holding losers
directional_accuracy = abs(current_position_pnl / 100.0) * 0.3 # Reduced penalty
else: # Breakeven position
# Standard HOLD evaluation for breakeven positions
if abs(price_change_pct) < movement_threshold: # Price stable - good
was_correct = True
directional_accuracy = movement_threshold - abs(price_change_pct)
directional_accuracy = movement_threshold * 0.4 # Reduced reward
else: # Price moved significantly - missed opportunity
was_correct = False
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
else:
# If we don't have a position, HOLD is correct if price stayed relatively stable
was_correct = abs(price_change_pct) < movement_threshold
directional_accuracy = max(
0, movement_threshold - abs(price_change_pct)
) # Positive for stability
directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.4 # Reduced reward
# Calculate magnitude-based multiplier (higher rewards for larger correct movements)
magnitude_multiplier = min(
abs(price_change_pct) / 2.0, 3.0
) # Cap at 3x for 6% moves
abs(price_change_pct) / 2.0, 2.5 # Reduced from 3.0 to 2.5 to reduce noise
) # Cap at 2.5x for 5% moves
# Calculate confidence-based reward adjustment
if was_correct: