normalize by unified price range

2025-07-29 22:05:28 +03:00
parent aa2a1bf7ee
commit ab5784b890
2 changed files with 89 additions and 71 deletions
--- a/core/data_provider.py
+++ b/core/data_provider.py
@@ -3117,87 +3117,86 @@ class DataProvider:
            return basic_cols  # Fallback to basic OHLCV
    def _normalize_features(self, df: pd.DataFrame, symbol: str = None) -> Optional[pd.DataFrame]:
-        """Normalize features for CNN training using pivot-based bounds when available"""
+        """Normalize features for CNN training using unified normalization across all timeframes"""
        try:
            df_norm = df.copy()
-            # Try to use pivot-based normalization if available
+            # Get unified normalization bounds for all timeframes
            if symbol and symbol in self.pivot_bounds:
                bounds = self.pivot_bounds[symbol]
                price_range = bounds.get_price_range()
                volume_range = bounds.volume_max - bounds.volume_min
-                # Normalize price-based features using pivot bounds
+                logger.debug(f"Using unified pivot-based normalization for {symbol} (price_range: {price_range:.2f})")
            else:
                # Fallback: calculate unified bounds from available data
                price_range = self._get_price_range_for_symbol(symbol) if symbol else 1000.0
                volume_range = 1000000.0  # Default volume range
                logger.debug(f"Using fallback unified normalization for {symbol} (price_range: {price_range:.2f})")
            # UNIFIED NORMALIZATION: All timeframes use the same normalization range
            # This preserves relationships between different timeframes
            # Price-based features (OHLCV + indicators)
            price_cols = ['open', 'high', 'low', 'close', 'sma_10', 'sma_20', 'sma_50', 
                         'ema_12', 'ema_26', 'ema_50', 'bb_upper', 'bb_lower', 'bb_middle',
                         'keltner_upper', 'keltner_lower', 'keltner_middle', 'psar', 'vwap']
            for col in price_cols:
                if col in df_norm.columns:
-                        # Use pivot bounds for normalization
+                    if symbol and symbol in self.pivot_bounds:
                        # Use pivot bounds for unified normalization
                        df_norm[col] = (df_norm[col] - bounds.price_min) / price_range
                # Normalize volume using pivot bounds
                if 'volume' in df_norm.columns:
                    volume_range = bounds.volume_max - bounds.volume_min
                    if volume_range > 0:
                        df_norm['volume'] = (df_norm['volume'] - bounds.volume_min) / volume_range
                    else:
-                        df_norm['volume'] = 0.5  # Default to middle if no volume range
+                        # Fallback: normalize by current price range
                logger.debug(f"Applied pivot-based normalization for {symbol}")
            else:
                # Fallback to traditional normalization when pivot bounds not available
                logger.debug("Using traditional normalization (no pivot bounds available)")
                for col in df_norm.columns:
                    if col in ['open', 'high', 'low', 'close', 'sma_10', 'sma_20', 'sma_50', 
                              'ema_12', 'ema_26', 'ema_50', 'bb_upper', 'bb_lower', 'bb_middle',
                              'keltner_upper', 'keltner_lower', 'keltner_middle', 'psar', 'vwap']:
                        # Price-based indicators: normalize by close price
                        if 'close' in df_norm.columns:
-                            base_price = df_norm['close'].iloc[-1]  # Use latest close as reference
+                            base_price = df_norm['close'].iloc[-1]
                            if base_price > 0:
                                df_norm[col] = df_norm[col] / base_price
-                    elif col == 'volume':
+            # Volume normalization (unified across timeframes)
-                        # Volume: normalize by its own rolling mean
+            if 'volume' in df_norm.columns:
-                        volume_mean = df_norm[col].rolling(window=min(20, len(df_norm))).mean().iloc[-1]
+                if symbol and symbol in self.pivot_bounds and volume_range > 0:
                    df_norm['volume'] = (df_norm['volume'] - bounds.volume_min) / volume_range
                else:
                    # Fallback: normalize by rolling mean
                    volume_mean = df_norm['volume'].rolling(window=min(20, len(df_norm))).mean().iloc[-1]
                    if volume_mean > 0:
-                            df_norm[col] = df_norm[col] / volume_mean
+                        df_norm['volume'] = df_norm['volume'] / volume_mean
                    else:
                        df_norm['volume'] = 0.5
-            # Normalize indicators that have standard ranges (regardless of pivot bounds)
+            # Standard range indicators (already 0-1 or 0-100)
            for col in df_norm.columns:
                if col in ['rsi_14', 'rsi_7', 'rsi_21']:
-                    # RSI: already 0-100, normalize to 0-1
+                    # RSI: 0-100 -> 0-1
                    df_norm[col] = df_norm[col] / 100.0
                elif col in ['stoch_k', 'stoch_d']:
-                    # Stochastic: already 0-100, normalize to 0-1
+                    # Stochastic: 0-100 -> 0-1
                    df_norm[col] = df_norm[col] / 100.0
                elif col == 'williams_r':
-                    # Williams %R: -100 to 0, normalize to 0-1
+                    # Williams %R: -100 to 0 -> 0-1
                    df_norm[col] = (df_norm[col] + 100) / 100.0
                elif col in ['macd', 'macd_signal', 'macd_histogram']:
-                    # MACD: normalize by ATR or close price
+                    # MACD: normalize by unified price range
-                    if 'atr' in df_norm.columns and df_norm['atr'].iloc[-1] > 0:
+                    if symbol and symbol in self.pivot_bounds:
-                        df_norm[col] = df_norm[col] / df_norm['atr'].iloc[-1]
+                        df_norm[col] = df_norm[col] / price_range
                    elif 'close' in df_norm.columns and df_norm['close'].iloc[-1] > 0:
                        df_norm[col] = df_norm[col] / df_norm['close'].iloc[-1]
                elif col in ['bb_width', 'bb_percent', 'price_position', 'trend_strength', 
                            'momentum_composite', 'volatility_regime', 'pivot_price_position',
                            'pivot_support_distance', 'pivot_resistance_distance']:
-                    # Already normalized indicators: ensure 0-1 range
+                    # Already normalized: ensure 0-1 range
                    df_norm[col] = np.clip(df_norm[col], 0, 1)
                elif col in ['atr', 'true_range']:
-                    # Volatility indicators: normalize by close price or pivot range
+                    # Volatility: normalize by unified price range
                    if symbol and symbol in self.pivot_bounds:
-                        bounds = self.pivot_bounds[symbol]
+                        df_norm[col] = df_norm[col] / price_range
                        df_norm[col] = df_norm[col] / bounds.get_price_range()
                    elif 'close' in df_norm.columns and df_norm['close'].iloc[-1] > 0:
                        df_norm[col] = df_norm[col] / df_norm['close'].iloc[-1]
@@ -3210,12 +3209,19 @@ class DataProvider:
                    else:
                        df_norm[col] = 0
-            # Replace inf/-inf with 0
+            # Clean up any invalid values
            df_norm = df_norm.replace([np.inf, -np.inf], 0)
            # Fill any remaining NaN values
            df_norm = df_norm.fillna(0)
            # Ensure all values are in reasonable range for neural networks
            df_norm = np.clip(df_norm, -10, 10)
            return df_norm
        except Exception as e:
            logger.error(f"Error in unified feature normalization: {e}")
            return None
            return df_norm
        except Exception as e:
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -3541,6 +3541,7 @@ class TradingOrchestrator:
        """
        Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
        Now considers position status and current P&L when evaluating decisions
        NOISE REDUCTION: Treats neutral/low-confidence signals as HOLD to reduce training noise
        Args:
            predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@@ -3556,8 +3557,15 @@ class TradingOrchestrator:
            tuple: (reward, was_correct)
        """
        try:
            # NOISE REDUCTION: Treat low-confidence signals as HOLD
            confidence_threshold = 0.6  # Only consider BUY/SELL if confidence > 60%
            if prediction_confidence < confidence_threshold:
                predicted_action = "HOLD"
                logger.debug(f"Low confidence ({prediction_confidence:.2f}) - treating as HOLD for noise reduction")
            # Base thresholds for determining correctness
-            movement_threshold = 0.1  # 0.1% minimum movement to consider significant
+            movement_threshold = 0.15  # Increased from 0.1% to 0.15% for stronger signals
            strong_movement_threshold = 0.5  # 0.5% for strong movements
            # Determine current position status if not provided
            if has_position is None and symbol:
@@ -3573,58 +3581,62 @@ class TradingOrchestrator:
            directional_accuracy = 0.0
            if predicted_action == "BUY":
                # BUY signals need stronger confirmation for higher rewards
                was_correct = price_change_pct > movement_threshold
-                directional_accuracy = max(
+                if price_change_pct > strong_movement_threshold:
-                    0, price_change_pct
+                    directional_accuracy = price_change_pct * 2.0  # Bonus for strong moves
-                )  # Positive for upward movement
+                else:
                    directional_accuracy = max(0, price_change_pct)  # Standard reward
            elif predicted_action == "SELL":
                # SELL signals need stronger confirmation for higher rewards
                was_correct = price_change_pct < -movement_threshold
-                directional_accuracy = max(
+                if price_change_pct < -strong_movement_threshold:
-                    0, -price_change_pct
+                    directional_accuracy = abs(price_change_pct) * 2.0  # Bonus for strong moves
-                )  # Positive for downward movement
+                else:
                    directional_accuracy = max(0, -price_change_pct)  # Standard reward
            elif predicted_action == "HOLD":
-                # HOLD evaluation now considers position status AND current P&L
+                # HOLD evaluation with noise reduction - smaller rewards to reduce training noise
                if has_position:
                    # If we have a position, HOLD evaluation depends on P&L and price movement
                    if current_position_pnl > 0:  # Currently profitable position
                        # Holding a profitable position is good if price continues favorably
                        if price_change_pct > 0:  # Price went up while holding profitable position - excellent
                            was_correct = True
-                            directional_accuracy = price_change_pct * 1.5  # Bonus for holding winners
+                            directional_accuracy = price_change_pct * 0.8  # Reduced from 1.5 to reduce noise
                        elif abs(price_change_pct) < movement_threshold:  # Price stable - good
                            was_correct = True
-                            directional_accuracy = movement_threshold + (current_position_pnl / 100.0)  # Reward based on existing profit
+                            directional_accuracy = movement_threshold * 0.5  # Reduced reward to reduce noise
                        else:  # Price dropped while holding profitable position - still okay but less reward
                            was_correct = True
-                            directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.5)
+                            directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.3)
                    elif current_position_pnl < 0:  # Currently losing position
                        # Holding a losing position is generally bad - should consider closing
                        if price_change_pct > movement_threshold:  # Price recovered - good hold
                            was_correct = True
-                            directional_accuracy = price_change_pct * 0.8  # Reduced reward for recovery
+                            directional_accuracy = price_change_pct * 0.6  # Reduced reward
                        else:  # Price continued down or stayed flat - bad hold
                            was_correct = False
                            # Penalty proportional to loss magnitude
-                            directional_accuracy = abs(current_position_pnl / 100.0) * 0.5  # Penalty for holding losers
+                            directional_accuracy = abs(current_position_pnl / 100.0) * 0.3  # Reduced penalty
                    else:  # Breakeven position
                        # Standard HOLD evaluation for breakeven positions
                        if abs(price_change_pct) < movement_threshold:  # Price stable - good
                            was_correct = True
-                            directional_accuracy = movement_threshold - abs(price_change_pct)
+                            directional_accuracy = movement_threshold * 0.4  # Reduced reward
                        else:  # Price moved significantly - missed opportunity
                            was_correct = False
-                            directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.7
+                            directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
                else:
                    # If we don't have a position, HOLD is correct if price stayed relatively stable
                    was_correct = abs(price_change_pct) < movement_threshold
-                    directional_accuracy = max(
+                    directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.4  # Reduced reward
                        0, movement_threshold - abs(price_change_pct)
                    )  # Positive for stability
            # Calculate magnitude-based multiplier (higher rewards for larger correct movements)
            magnitude_multiplier = min(
-                abs(price_change_pct) / 2.0, 3.0
+                abs(price_change_pct) / 2.0, 2.5  # Reduced from 3.0 to 2.5 to reduce noise
-            )  # Cap at 3x for 6% moves
+            )  # Cap at 2.5x for 5% moves
            # Calculate confidence-based reward adjustment
            if was_correct: