normalize by unified price range

2025-07-29 22:05:28 +03:00
parent aa2a1bf7ee
commit ab5784b890
2 changed files with 89 additions and 71 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -3541,6 +3541,7 @@ class TradingOrchestrator:
        """
        Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
        Now considers position status and current P&L when evaluating decisions
+        NOISE REDUCTION: Treats neutral/low-confidence signals as HOLD to reduce training noise

        Args:
            predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@@ -3556,8 +3557,15 @@ class TradingOrchestrator:
            tuple: (reward, was_correct)
        """
        try:
+            # NOISE REDUCTION: Treat low-confidence signals as HOLD
+            confidence_threshold = 0.6  # Only consider BUY/SELL if confidence > 60%
+            if prediction_confidence < confidence_threshold:
+                predicted_action = "HOLD"
+                logger.debug(f"Low confidence ({prediction_confidence:.2f}) - treating as HOLD for noise reduction")
+            
            # Base thresholds for determining correctness
-            movement_threshold = 0.1  # 0.1% minimum movement to consider significant
+            movement_threshold = 0.15  # Increased from 0.1% to 0.15% for stronger signals
+            strong_movement_threshold = 0.5  # 0.5% for strong movements

            # Determine current position status if not provided
            if has_position is None and symbol:
@@ -3573,58 +3581,62 @@ class TradingOrchestrator:
            directional_accuracy = 0.0

            if predicted_action == "BUY":
+                # BUY signals need stronger confirmation for higher rewards
                was_correct = price_change_pct > movement_threshold
-                directional_accuracy = max(
-                    0, price_change_pct
-                )  # Positive for upward movement
+                if price_change_pct > strong_movement_threshold:
+                    directional_accuracy = price_change_pct * 2.0  # Bonus for strong moves
+                else:
+                    directional_accuracy = max(0, price_change_pct)  # Standard reward
+                    
            elif predicted_action == "SELL":
+                # SELL signals need stronger confirmation for higher rewards
                was_correct = price_change_pct < -movement_threshold
-                directional_accuracy = max(
-                    0, -price_change_pct
-                )  # Positive for downward movement
+                if price_change_pct < -strong_movement_threshold:
+                    directional_accuracy = abs(price_change_pct) * 2.0  # Bonus for strong moves
+                else:
+                    directional_accuracy = max(0, -price_change_pct)  # Standard reward
+                    
            elif predicted_action == "HOLD":
-                # HOLD evaluation now considers position status AND current P&L
+                # HOLD evaluation with noise reduction - smaller rewards to reduce training noise
                if has_position:
                    # If we have a position, HOLD evaluation depends on P&L and price movement
                    if current_position_pnl > 0:  # Currently profitable position
                        # Holding a profitable position is good if price continues favorably
                        if price_change_pct > 0:  # Price went up while holding profitable position - excellent
                            was_correct = True
-                            directional_accuracy = price_change_pct * 1.5  # Bonus for holding winners
+                            directional_accuracy = price_change_pct * 0.8  # Reduced from 1.5 to reduce noise
                        elif abs(price_change_pct) < movement_threshold:  # Price stable - good
                            was_correct = True
-                            directional_accuracy = movement_threshold + (current_position_pnl / 100.0)  # Reward based on existing profit
+                            directional_accuracy = movement_threshold * 0.5  # Reduced reward to reduce noise
                        else:  # Price dropped while holding profitable position - still okay but less reward
                            was_correct = True
-                            directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
+                            directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.3)
                    elif current_position_pnl < 0:  # Currently losing position
                        # Holding a losing position is generally bad - should consider closing
                        if price_change_pct > movement_threshold:  # Price recovered - good hold
                            was_correct = True
-                            directional_accuracy = price_change_pct * 0.8  # Reduced reward for recovery
+                            directional_accuracy = price_change_pct * 0.6  # Reduced reward
                        else:  # Price continued down or stayed flat - bad hold
                            was_correct = False
                            # Penalty proportional to loss magnitude
-                            directional_accuracy = abs(current_position_pnl / 100.0) * 0.5  # Penalty for holding losers
+                            directional_accuracy = abs(current_position_pnl / 100.0) * 0.3  # Reduced penalty
                    else:  # Breakeven position
                        # Standard HOLD evaluation for breakeven positions
                        if abs(price_change_pct) < movement_threshold:  # Price stable - good
                            was_correct = True
-                            directional_accuracy = movement_threshold - abs(price_change_pct)
+                            directional_accuracy = movement_threshold * 0.4  # Reduced reward
                        else:  # Price moved significantly - missed opportunity
                            was_correct = False
-                            directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
+                            directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
                else:
                    # If we don't have a position, HOLD is correct if price stayed relatively stable
                    was_correct = abs(price_change_pct) < movement_threshold
-                    directional_accuracy = max(
-                        0, movement_threshold - abs(price_change_pct)
-                    )  # Positive for stability
+                    directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.4  # Reduced reward

            # Calculate magnitude-based multiplier (higher rewards for larger correct movements)
            magnitude_multiplier = min(
-                abs(price_change_pct) / 2.0, 3.0
-            )  # Cap at 3x for 6% moves
+                abs(price_change_pct) / 2.0, 2.5  # Reduced from 3.0 to 2.5 to reduce noise
+            )  # Cap at 2.5x for 5% moves

            # Calculate confidence-based reward adjustment
            if was_correct: