price vector predictions

2025-07-29 23:45:57 +03:00
parent 3fad2caeb8
commit 29382ac0db
2 changed files with 209 additions and 29 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -2230,6 +2230,13 @@ class TradingOrchestrator:
            # Add training samples for CNN predictions using sophisticated reward system
            for prediction in predictions:
                if "cnn" in prediction.model_name.lower():
                    # Extract price vector information if available
                    predicted_price_vector = None
                    if hasattr(prediction, 'price_direction') and prediction.price_direction:
                        predicted_price_vector = prediction.price_direction
                    elif hasattr(prediction, 'metadata') and prediction.metadata and 'price_direction' in prediction.metadata:
                        predicted_price_vector = prediction.metadata['price_direction']
                    # Calculate sophisticated reward using the new PnL penalty/reward system
                    sophisticated_reward, was_correct = self._calculate_sophisticated_reward(
                        predicted_action=prediction.action,
@@ -2239,7 +2246,8 @@ class TradingOrchestrator:
                        has_price_prediction=False,
                        symbol=symbol,
                        has_position=has_position,
-                        current_position_pnl=current_position_pnl
+                        current_position_pnl=current_position_pnl,
                        predicted_price_vector=predicted_price_vector
                    )
                    # Create training record for the new training system
@@ -3323,6 +3331,12 @@ class TradingOrchestrator:
            # Calculate reward for logging
            current_pnl = self._get_current_position_pnl(self.symbol)
            # Extract price vector from prediction metadata if available
            predicted_price_vector = None
            if "price_direction" in prediction and prediction["price_direction"]:
                predicted_price_vector = prediction["price_direction"]
            reward, _ = self._calculate_sophisticated_reward(
                predicted_action,
                predicted_confidence,
@@ -3331,6 +3345,7 @@ class TradingOrchestrator:
                has_price_prediction=predicted_price is not None,
                symbol=self.symbol,
                current_position_pnl=current_pnl,
                predicted_price_vector=predicted_price_vector,
            )
            # Enhanced logging with detailed information
@@ -3420,6 +3435,12 @@ class TradingOrchestrator:
            # Calculate sophisticated reward based on multiple factors
            current_pnl = self._get_current_position_pnl(symbol)
            # Extract price vector from prediction metadata if available
            predicted_price_vector = None
            if "price_direction" in prediction and prediction["price_direction"]:
                predicted_price_vector = prediction["price_direction"]
            reward, was_correct = self._calculate_sophisticated_reward(
                predicted_action,
                prediction_confidence,
@@ -3429,6 +3450,7 @@ class TradingOrchestrator:
                symbol,  # Pass symbol for position lookup
                None,  # Let method determine position status
                current_position_pnl=current_pnl,
                predicted_price_vector=predicted_price_vector,
            )
            # Update model performance tracking
@@ -3537,11 +3559,13 @@ class TradingOrchestrator:
        symbol: str = None,
        has_position: bool = None,
        current_position_pnl: float = 0.0,
        predicted_price_vector: dict = None,
    ) -> tuple[float, bool]:
        """
        Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
        Now considers position status and current P&L when evaluating decisions
        NOISE REDUCTION: Treats neutral/low-confidence signals as HOLD to reduce training noise
        PRICE VECTOR BONUS: Rewards accurate price direction and magnitude predictions
        Args:
            predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
@@ -3552,6 +3576,7 @@ class TradingOrchestrator:
            symbol: Trading symbol (for position lookup)
            has_position: Whether we currently have a position (if None, will be looked up)
            current_position_pnl: Current unrealized P&L of open position (0.0 if no position)
            predicted_price_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1)
        Returns:
            tuple: (reward, was_correct)
@@ -3563,9 +3588,12 @@ class TradingOrchestrator:
                predicted_action = "HOLD"
                logger.debug(f"Low confidence ({prediction_confidence:.2f}) - treating as HOLD for noise reduction")
-            # Base thresholds for determining correctness
+            # FEE-AWARE THRESHOLDS: Account for trading fees (0.05-0.06% per trade, ~0.12% round trip)
-            movement_threshold = 0.15  # Increased from 0.1% to 0.15% for stronger signals
+            fee_cost = 0.12  # 0.12% round trip fee cost
-            strong_movement_threshold = 0.5  # 0.5% for strong movements
+            movement_threshold = 0.15  # Minimum movement to be profitable after fees
            strong_movement_threshold = 0.5  # Strong movements - good profit potential
            rapid_movement_threshold = 1.0  # Rapid movements - excellent profit potential
            massive_movement_threshold = 2.0  # Massive movements - extraordinary profit potential
            # Determine current position status if not provided
            if has_position is None and symbol:
@@ -3581,20 +3609,49 @@ class TradingOrchestrator:
            directional_accuracy = 0.0
            if predicted_action == "BUY":
-                # BUY signals need stronger confirmation for higher rewards
+                # BUY signals need to overcome fee costs for profitability
                was_correct = price_change_pct > movement_threshold
-                if price_change_pct > strong_movement_threshold:
+                
-                    directional_accuracy = price_change_pct * 2.0  # Bonus for strong moves
+                # ENHANCED FEE-AWARE REWARD STRUCTURE
                if price_change_pct > massive_movement_threshold:
                    # Massive movements (2%+) - EXTRAORDINARY rewards for high confidence
                    directional_accuracy = price_change_pct * 5.0  # 5x multiplier for massive moves
                    if prediction_confidence > 0.8:
                        directional_accuracy *= 2.0  # Additional 2x for high confidence (10x total)
                elif price_change_pct > rapid_movement_threshold:
                    # Rapid movements (1%+) - EXCELLENT rewards for high confidence
                    directional_accuracy = price_change_pct * 3.0  # 3x multiplier for rapid moves
                    if prediction_confidence > 0.7:
                        directional_accuracy *= 1.5  # Additional 1.5x for good confidence (4.5x total)
                elif price_change_pct > strong_movement_threshold:
                    # Strong movements (0.5%+) - GOOD rewards
                    directional_accuracy = price_change_pct * 2.0  # 2x multiplier for strong moves
                else:
-                    directional_accuracy = max(0, price_change_pct)  # Standard reward
+                    # Small movements - minimal rewards (fees eat most profit)
                    directional_accuracy = max(0, (price_change_pct - fee_cost)) * 0.5  # Penalty for fee cost
            elif predicted_action == "SELL":
-                # SELL signals need stronger confirmation for higher rewards
+                # SELL signals need to overcome fee costs for profitability
                was_correct = price_change_pct < -movement_threshold
-                if price_change_pct < -strong_movement_threshold:
+                
-                    directional_accuracy = abs(price_change_pct) * 2.0  # Bonus for strong moves
+                # ENHANCED FEE-AWARE REWARD STRUCTURE (symmetric to BUY)
                abs_change = abs(price_change_pct)
                if abs_change > massive_movement_threshold:
                    # Massive movements (2%+) - EXTRAORDINARY rewards for high confidence
                    directional_accuracy = abs_change * 5.0  # 5x multiplier for massive moves
                    if prediction_confidence > 0.8:
                        directional_accuracy *= 2.0  # Additional 2x for high confidence (10x total)
                elif abs_change > rapid_movement_threshold:
                    # Rapid movements (1%+) - EXCELLENT rewards for high confidence
                    directional_accuracy = abs_change * 3.0  # 3x multiplier for rapid moves
                    if prediction_confidence > 0.7:
                        directional_accuracy *= 1.5  # Additional 1.5x for good confidence (4.5x total)
                elif abs_change > strong_movement_threshold:
                    # Strong movements (0.5%+) - GOOD rewards
                    directional_accuracy = abs_change * 2.0  # 2x multiplier for strong moves
                else:
-                    directional_accuracy = max(0, -price_change_pct)  # Standard reward
+                    # Small movements - minimal rewards (fees eat most profit)
                    directional_accuracy = max(0, (abs_change - fee_cost)) * 0.5  # Penalty for fee cost
            elif predicted_action == "HOLD":
                # HOLD evaluation with noise reduction - smaller rewards to reduce training noise
@@ -3633,10 +3690,17 @@ class TradingOrchestrator:
                    was_correct = abs(price_change_pct) < movement_threshold
                    directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.4  # Reduced reward
-            # Calculate magnitude-based multiplier (higher rewards for larger correct movements)
+            # Calculate FEE-AWARE magnitude-based multiplier (aggressive rewards for profitable movements)
-            magnitude_multiplier = min(
+            abs_movement = abs(price_change_pct)
-                abs(price_change_pct) / 2.0, 2.5  # Reduced from 3.0 to 2.5 to reduce noise
+            if abs_movement > massive_movement_threshold:
-            )  # Cap at 2.5x for 5% moves
+                magnitude_multiplier = min(abs_movement / 1.0, 8.0)  # Up to 8x for massive moves (8% = 8x)
            elif abs_movement > rapid_movement_threshold:
                magnitude_multiplier = min(abs_movement / 1.5, 4.0)  # Up to 4x for rapid moves (6% = 4x)
            elif abs_movement > strong_movement_threshold:
                magnitude_multiplier = min(abs_movement / 2.0, 2.0)  # Up to 2x for strong moves (4% = 2x)
            else:
                # Small movements get minimal multiplier due to fees
                magnitude_multiplier = max(0.1, (abs_movement - fee_cost) / 2.0)  # Penalty for fee cost
            # Calculate confidence-based reward adjustment
            if was_correct:
@@ -3648,22 +3712,61 @@ class TradingOrchestrator:
                    directional_accuracy * magnitude_multiplier * confidence_multiplier
                )
-                # Bonus for high-confidence correct predictions with large movements
+                # ENHANCED HIGH-CONFIDENCE BONUSES for profitable movements
-                if prediction_confidence > 0.8 and abs(price_change_pct) > 1.0:
+                abs_movement = abs(price_change_pct)
-                    base_reward *= 1.5  # 50% bonus for very confident + large movement
+                
                # Extraordinary confidence bonus for massive movements
                if prediction_confidence > 0.9 and abs_movement > massive_movement_threshold:
                    base_reward *= 3.0  # 300% bonus for ultra-confident massive moves
                    logger.info(f"ULTRA CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 3x reward")
                # Excellent confidence bonus for rapid movements
                elif prediction_confidence > 0.8 and abs_movement > rapid_movement_threshold:
                    base_reward *= 2.0  # 200% bonus for very confident rapid moves
                    logger.info(f"HIGH CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 2x reward")
                # Good confidence bonus for strong movements
                elif prediction_confidence > 0.7 and abs_movement > strong_movement_threshold:
                    base_reward *= 1.5  # 150% bonus for confident strong moves
                    logger.info(f"CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 1.5x reward")
                # Rapid movement detection bonus (speed matters for fees)
                if time_diff_minutes < 5.0 and abs_movement > rapid_movement_threshold:
                    base_reward *= 1.3  # 30% bonus for rapid detection of big moves
                    logger.info(f"RAPID DETECTION BONUS: {abs_movement:.2f}% movement in {time_diff_minutes:.1f}m = 1.3x reward")
                # PRICE VECTOR ACCURACY BONUS - Reward models for accurate price direction/magnitude predictions
                if predicted_price_vector and isinstance(predicted_price_vector, dict):
                    vector_bonus = self._calculate_price_vector_bonus(
                        predicted_price_vector, price_change_pct, abs_movement, prediction_confidence
                    )
                    if vector_bonus > 0:
                        base_reward += vector_bonus
                        logger.info(f"PRICE VECTOR BONUS: +{vector_bonus:.3f} for accurate direction/magnitude prediction")
            else:
-                # Penalize incorrect predictions more severely if they were confident
+                # ENHANCED PENALTY SYSTEM: Discourage fee-losing trades
-                confidence_penalty = 0.5 + (
+                abs_movement = abs(price_change_pct)
                    prediction_confidence * 1.5
                )  # Higher confidence = higher penalty
                base_penalty = abs(price_change_pct) * confidence_penalty
-                # Extra penalty for very confident wrong predictions
+                # Penalize incorrect predictions more severely if they were confident
-                if prediction_confidence > 0.8:
+                confidence_penalty = 0.5 + (prediction_confidence * 1.5)  # Higher confidence = higher penalty
-                    base_penalty *= (
+                base_penalty = abs_movement * confidence_penalty
-                        2.0  # Double penalty for overconfident wrong predictions
+
-                    )
+                # SEVERE penalties for confident wrong predictions on big moves
                if prediction_confidence > 0.8 and abs_movement > rapid_movement_threshold:
                    base_penalty *= 5.0  # 5x penalty for very confident wrong on big moves
                    logger.warning(f"SEVERE PENALTY: {prediction_confidence:.2f} confidence wrong on {abs_movement:.2f}% movement = 5x penalty")
                elif prediction_confidence > 0.7 and abs_movement > strong_movement_threshold:
                    base_penalty *= 3.0  # 3x penalty for confident wrong on strong moves
                    logger.warning(f"HIGH PENALTY: {prediction_confidence:.2f} confidence wrong on {abs_movement:.2f}% movement = 3x penalty")
                elif prediction_confidence > 0.8:
                    base_penalty *= 2.0  # 2x penalty for overconfident wrong predictions
                # ADDITIONAL penalty for predictions that would lose money to fees
                if abs_movement < fee_cost and prediction_confidence > 0.5:
                    fee_loss_penalty = (fee_cost - abs_movement) * 2.0  # Penalty for fee-losing trades
                    base_penalty += fee_loss_penalty
                    logger.warning(f"FEE LOSS PENALTY: {abs_movement:.2f}% movement < {fee_cost:.2f}% fees = +{fee_loss_penalty:.3f} penalty")
                base_reward = -base_penalty
@@ -3706,6 +3809,78 @@ class TradingOrchestrator:
                )
            return (1.0 if simple_correct else -0.5, simple_correct)
    def _calculate_price_vector_bonus(
        self, 
        predicted_vector: dict, 
        actual_price_change_pct: float, 
        abs_movement: float,
        prediction_confidence: float
    ) -> float:
        """
        Calculate bonus reward for accurate price direction and magnitude predictions
        Args:
            predicted_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1)
            actual_price_change_pct: Actual price change percentage
            abs_movement: Absolute value of price movement
            prediction_confidence: Overall model confidence
        Returns:
            Bonus reward value (0 or positive)
        """
        try:
            predicted_direction = predicted_vector.get('direction', 0.0)
            vector_confidence = predicted_vector.get('confidence', 0.0)
            # Skip if vector prediction is too weak
            if abs(predicted_direction) < 0.1 or vector_confidence < 0.3:
                return 0.0
            # Calculate direction accuracy
            actual_direction = 1.0 if actual_price_change_pct > 0 else -1.0 if actual_price_change_pct < 0 else 0.0
            direction_accuracy = 0.0
            if actual_direction != 0.0:  # Only if there was actual movement
                # Check if predicted direction matches actual direction
                if (predicted_direction > 0 and actual_direction > 0) or (predicted_direction < 0 and actual_direction < 0):
                    direction_accuracy = min(abs(predicted_direction), 1.0)  # Stronger prediction = higher bonus
                    # MAGNITUDE ACCURACY BONUS
                    # Convert predicted direction to expected magnitude (scaled by confidence)
                    predicted_magnitude = abs(predicted_direction) * vector_confidence * 2.0  # Scale to ~2% max
                    magnitude_error = abs(predicted_magnitude - abs_movement)
                    # Bonus for accurate magnitude prediction (lower error = higher bonus)
                    if magnitude_error < 1.0:  # Within 1% error
                        magnitude_accuracy = max(0, 1.0 - magnitude_error)  # 0 to 1.0
                        # COMBINED BONUS CALCULATION
                        base_vector_bonus = direction_accuracy * magnitude_accuracy * vector_confidence
                        # Scale bonus based on movement size (bigger movements get bigger bonuses)
                        if abs_movement > 2.0:  # Massive movements
                            scale_factor = 3.0
                        elif abs_movement > 1.0:  # Rapid movements
                            scale_factor = 2.0
                        elif abs_movement > 0.5:  # Strong movements
                            scale_factor = 1.5
                        else:
                            scale_factor = 1.0
                        final_bonus = base_vector_bonus * scale_factor * prediction_confidence
                        logger.debug(f"VECTOR ANALYSIS: pred_dir={predicted_direction:.3f}, actual_dir={actual_direction:.3f}, "
                                   f"pred_mag={predicted_magnitude:.3f}, actual_mag={abs_movement:.3f}, "
                                   f"dir_acc={direction_accuracy:.3f}, mag_acc={magnitude_accuracy:.3f}, bonus={final_bonus:.3f}")
                        return min(final_bonus, 2.0)  # Cap bonus at 2.0
            return 0.0
        except Exception as e:
            logger.error(f"Error calculating price vector bonus: {e}")
            return 0.0
    async def _train_model_on_outcome(
        self,
        record: Dict,
@@ -3724,6 +3899,10 @@ class TradingOrchestrator:
            if sophisticated_reward is None:
                symbol = record.get("symbol", self.symbol)
                current_pnl = self._get_current_position_pnl(symbol)
                # Extract price vector from record if available
                predicted_price_vector = record.get("price_direction") or record.get("predicted_price_vector")
                sophisticated_reward, _ = self._calculate_sophisticated_reward(
                    record.get("action", "HOLD"),
                    record.get("confidence", 0.5),
@@ -3732,6 +3911,7 @@ class TradingOrchestrator:
                    record.get("has_price_prediction", False),
                    symbol=symbol,
                    current_position_pnl=current_pnl,
                    predicted_price_vector=predicted_price_vector,
                )
            # Train decision fusion model if it's the model being evaluated
--- a/data/ui_state.json
+++ b/data/ui_state.json
@@ -25,5 +25,5 @@
            "training_enabled": true
        }
    },
-    "timestamp": "2025-07-29T23:22:58.380697"
+    "timestamp": "2025-07-29T23:33:51.882579"
 }