diff --git a/core/orchestrator.py b/core/orchestrator.py index 48a90fc..719e8b8 100644 --- a/core/orchestrator.py +++ b/core/orchestrator.py @@ -2230,6 +2230,13 @@ class TradingOrchestrator: # Add training samples for CNN predictions using sophisticated reward system for prediction in predictions: if "cnn" in prediction.model_name.lower(): + # Extract price vector information if available + predicted_price_vector = None + if hasattr(prediction, 'price_direction') and prediction.price_direction: + predicted_price_vector = prediction.price_direction + elif hasattr(prediction, 'metadata') and prediction.metadata and 'price_direction' in prediction.metadata: + predicted_price_vector = prediction.metadata['price_direction'] + # Calculate sophisticated reward using the new PnL penalty/reward system sophisticated_reward, was_correct = self._calculate_sophisticated_reward( predicted_action=prediction.action, @@ -2239,7 +2246,8 @@ class TradingOrchestrator: has_price_prediction=False, symbol=symbol, has_position=has_position, - current_position_pnl=current_position_pnl + current_position_pnl=current_position_pnl, + predicted_price_vector=predicted_price_vector ) # Create training record for the new training system @@ -3323,6 +3331,12 @@ class TradingOrchestrator: # Calculate reward for logging current_pnl = self._get_current_position_pnl(self.symbol) + + # Extract price vector from prediction metadata if available + predicted_price_vector = None + if "price_direction" in prediction and prediction["price_direction"]: + predicted_price_vector = prediction["price_direction"] + reward, _ = self._calculate_sophisticated_reward( predicted_action, predicted_confidence, @@ -3331,6 +3345,7 @@ class TradingOrchestrator: has_price_prediction=predicted_price is not None, symbol=self.symbol, current_position_pnl=current_pnl, + predicted_price_vector=predicted_price_vector, ) # Enhanced logging with detailed information @@ -3420,6 +3435,12 @@ class TradingOrchestrator: # Calculate sophisticated reward based on multiple factors current_pnl = self._get_current_position_pnl(symbol) + + # Extract price vector from prediction metadata if available + predicted_price_vector = None + if "price_direction" in prediction and prediction["price_direction"]: + predicted_price_vector = prediction["price_direction"] + reward, was_correct = self._calculate_sophisticated_reward( predicted_action, prediction_confidence, @@ -3429,6 +3450,7 @@ class TradingOrchestrator: symbol, # Pass symbol for position lookup None, # Let method determine position status current_position_pnl=current_pnl, + predicted_price_vector=predicted_price_vector, ) # Update model performance tracking @@ -3537,11 +3559,13 @@ class TradingOrchestrator: symbol: str = None, has_position: bool = None, current_position_pnl: float = 0.0, + predicted_price_vector: dict = None, ) -> tuple[float, bool]: """ Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude Now considers position status and current P&L when evaluating decisions NOISE REDUCTION: Treats neutral/low-confidence signals as HOLD to reduce training noise + PRICE VECTOR BONUS: Rewards accurate price direction and magnitude predictions Args: predicted_action: The predicted action ('BUY', 'SELL', 'HOLD') @@ -3552,6 +3576,7 @@ class TradingOrchestrator: symbol: Trading symbol (for position lookup) has_position: Whether we currently have a position (if None, will be looked up) current_position_pnl: Current unrealized P&L of open position (0.0 if no position) + predicted_price_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1) Returns: tuple: (reward, was_correct) @@ -3563,9 +3588,12 @@ class TradingOrchestrator: predicted_action = "HOLD" logger.debug(f"Low confidence ({prediction_confidence:.2f}) - treating as HOLD for noise reduction") - # Base thresholds for determining correctness - movement_threshold = 0.15 # Increased from 0.1% to 0.15% for stronger signals - strong_movement_threshold = 0.5 # 0.5% for strong movements + # FEE-AWARE THRESHOLDS: Account for trading fees (0.05-0.06% per trade, ~0.12% round trip) + fee_cost = 0.12 # 0.12% round trip fee cost + movement_threshold = 0.15 # Minimum movement to be profitable after fees + strong_movement_threshold = 0.5 # Strong movements - good profit potential + rapid_movement_threshold = 1.0 # Rapid movements - excellent profit potential + massive_movement_threshold = 2.0 # Massive movements - extraordinary profit potential # Determine current position status if not provided if has_position is None and symbol: @@ -3581,20 +3609,49 @@ class TradingOrchestrator: directional_accuracy = 0.0 if predicted_action == "BUY": - # BUY signals need stronger confirmation for higher rewards + # BUY signals need to overcome fee costs for profitability was_correct = price_change_pct > movement_threshold - if price_change_pct > strong_movement_threshold: - directional_accuracy = price_change_pct * 2.0 # Bonus for strong moves + + # ENHANCED FEE-AWARE REWARD STRUCTURE + if price_change_pct > massive_movement_threshold: + # Massive movements (2%+) - EXTRAORDINARY rewards for high confidence + directional_accuracy = price_change_pct * 5.0 # 5x multiplier for massive moves + if prediction_confidence > 0.8: + directional_accuracy *= 2.0 # Additional 2x for high confidence (10x total) + elif price_change_pct > rapid_movement_threshold: + # Rapid movements (1%+) - EXCELLENT rewards for high confidence + directional_accuracy = price_change_pct * 3.0 # 3x multiplier for rapid moves + if prediction_confidence > 0.7: + directional_accuracy *= 1.5 # Additional 1.5x for good confidence (4.5x total) + elif price_change_pct > strong_movement_threshold: + # Strong movements (0.5%+) - GOOD rewards + directional_accuracy = price_change_pct * 2.0 # 2x multiplier for strong moves else: - directional_accuracy = max(0, price_change_pct) # Standard reward + # Small movements - minimal rewards (fees eat most profit) + directional_accuracy = max(0, (price_change_pct - fee_cost)) * 0.5 # Penalty for fee cost elif predicted_action == "SELL": - # SELL signals need stronger confirmation for higher rewards + # SELL signals need to overcome fee costs for profitability was_correct = price_change_pct < -movement_threshold - if price_change_pct < -strong_movement_threshold: - directional_accuracy = abs(price_change_pct) * 2.0 # Bonus for strong moves + + # ENHANCED FEE-AWARE REWARD STRUCTURE (symmetric to BUY) + abs_change = abs(price_change_pct) + if abs_change > massive_movement_threshold: + # Massive movements (2%+) - EXTRAORDINARY rewards for high confidence + directional_accuracy = abs_change * 5.0 # 5x multiplier for massive moves + if prediction_confidence > 0.8: + directional_accuracy *= 2.0 # Additional 2x for high confidence (10x total) + elif abs_change > rapid_movement_threshold: + # Rapid movements (1%+) - EXCELLENT rewards for high confidence + directional_accuracy = abs_change * 3.0 # 3x multiplier for rapid moves + if prediction_confidence > 0.7: + directional_accuracy *= 1.5 # Additional 1.5x for good confidence (4.5x total) + elif abs_change > strong_movement_threshold: + # Strong movements (0.5%+) - GOOD rewards + directional_accuracy = abs_change * 2.0 # 2x multiplier for strong moves else: - directional_accuracy = max(0, -price_change_pct) # Standard reward + # Small movements - minimal rewards (fees eat most profit) + directional_accuracy = max(0, (abs_change - fee_cost)) * 0.5 # Penalty for fee cost elif predicted_action == "HOLD": # HOLD evaluation with noise reduction - smaller rewards to reduce training noise @@ -3633,10 +3690,17 @@ class TradingOrchestrator: was_correct = abs(price_change_pct) < movement_threshold directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.4 # Reduced reward - # Calculate magnitude-based multiplier (higher rewards for larger correct movements) - magnitude_multiplier = min( - abs(price_change_pct) / 2.0, 2.5 # Reduced from 3.0 to 2.5 to reduce noise - ) # Cap at 2.5x for 5% moves + # Calculate FEE-AWARE magnitude-based multiplier (aggressive rewards for profitable movements) + abs_movement = abs(price_change_pct) + if abs_movement > massive_movement_threshold: + magnitude_multiplier = min(abs_movement / 1.0, 8.0) # Up to 8x for massive moves (8% = 8x) + elif abs_movement > rapid_movement_threshold: + magnitude_multiplier = min(abs_movement / 1.5, 4.0) # Up to 4x for rapid moves (6% = 4x) + elif abs_movement > strong_movement_threshold: + magnitude_multiplier = min(abs_movement / 2.0, 2.0) # Up to 2x for strong moves (4% = 2x) + else: + # Small movements get minimal multiplier due to fees + magnitude_multiplier = max(0.1, (abs_movement - fee_cost) / 2.0) # Penalty for fee cost # Calculate confidence-based reward adjustment if was_correct: @@ -3648,22 +3712,61 @@ class TradingOrchestrator: directional_accuracy * magnitude_multiplier * confidence_multiplier ) - # Bonus for high-confidence correct predictions with large movements - if prediction_confidence > 0.8 and abs(price_change_pct) > 1.0: - base_reward *= 1.5 # 50% bonus for very confident + large movement + # ENHANCED HIGH-CONFIDENCE BONUSES for profitable movements + abs_movement = abs(price_change_pct) + + # Extraordinary confidence bonus for massive movements + if prediction_confidence > 0.9 and abs_movement > massive_movement_threshold: + base_reward *= 3.0 # 300% bonus for ultra-confident massive moves + logger.info(f"ULTRA CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 3x reward") + + # Excellent confidence bonus for rapid movements + elif prediction_confidence > 0.8 and abs_movement > rapid_movement_threshold: + base_reward *= 2.0 # 200% bonus for very confident rapid moves + logger.info(f"HIGH CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 2x reward") + + # Good confidence bonus for strong movements + elif prediction_confidence > 0.7 and abs_movement > strong_movement_threshold: + base_reward *= 1.5 # 150% bonus for confident strong moves + logger.info(f"CONFIDENCE BONUS: {prediction_confidence:.2f} confidence + {abs_movement:.2f}% movement = 1.5x reward") + + # Rapid movement detection bonus (speed matters for fees) + if time_diff_minutes < 5.0 and abs_movement > rapid_movement_threshold: + base_reward *= 1.3 # 30% bonus for rapid detection of big moves + logger.info(f"RAPID DETECTION BONUS: {abs_movement:.2f}% movement in {time_diff_minutes:.1f}m = 1.3x reward") + + # PRICE VECTOR ACCURACY BONUS - Reward models for accurate price direction/magnitude predictions + if predicted_price_vector and isinstance(predicted_price_vector, dict): + vector_bonus = self._calculate_price_vector_bonus( + predicted_price_vector, price_change_pct, abs_movement, prediction_confidence + ) + if vector_bonus > 0: + base_reward += vector_bonus + logger.info(f"PRICE VECTOR BONUS: +{vector_bonus:.3f} for accurate direction/magnitude prediction") else: + # ENHANCED PENALTY SYSTEM: Discourage fee-losing trades + abs_movement = abs(price_change_pct) + # Penalize incorrect predictions more severely if they were confident - confidence_penalty = 0.5 + ( - prediction_confidence * 1.5 - ) # Higher confidence = higher penalty - base_penalty = abs(price_change_pct) * confidence_penalty + confidence_penalty = 0.5 + (prediction_confidence * 1.5) # Higher confidence = higher penalty + base_penalty = abs_movement * confidence_penalty - # Extra penalty for very confident wrong predictions - if prediction_confidence > 0.8: - base_penalty *= ( - 2.0 # Double penalty for overconfident wrong predictions - ) + # SEVERE penalties for confident wrong predictions on big moves + if prediction_confidence > 0.8 and abs_movement > rapid_movement_threshold: + base_penalty *= 5.0 # 5x penalty for very confident wrong on big moves + logger.warning(f"SEVERE PENALTY: {prediction_confidence:.2f} confidence wrong on {abs_movement:.2f}% movement = 5x penalty") + elif prediction_confidence > 0.7 and abs_movement > strong_movement_threshold: + base_penalty *= 3.0 # 3x penalty for confident wrong on strong moves + logger.warning(f"HIGH PENALTY: {prediction_confidence:.2f} confidence wrong on {abs_movement:.2f}% movement = 3x penalty") + elif prediction_confidence > 0.8: + base_penalty *= 2.0 # 2x penalty for overconfident wrong predictions + + # ADDITIONAL penalty for predictions that would lose money to fees + if abs_movement < fee_cost and prediction_confidence > 0.5: + fee_loss_penalty = (fee_cost - abs_movement) * 2.0 # Penalty for fee-losing trades + base_penalty += fee_loss_penalty + logger.warning(f"FEE LOSS PENALTY: {abs_movement:.2f}% movement < {fee_cost:.2f}% fees = +{fee_loss_penalty:.3f} penalty") base_reward = -base_penalty @@ -3706,6 +3809,78 @@ class TradingOrchestrator: ) return (1.0 if simple_correct else -0.5, simple_correct) + def _calculate_price_vector_bonus( + self, + predicted_vector: dict, + actual_price_change_pct: float, + abs_movement: float, + prediction_confidence: float + ) -> float: + """ + Calculate bonus reward for accurate price direction and magnitude predictions + + Args: + predicted_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1) + actual_price_change_pct: Actual price change percentage + abs_movement: Absolute value of price movement + prediction_confidence: Overall model confidence + + Returns: + Bonus reward value (0 or positive) + """ + try: + predicted_direction = predicted_vector.get('direction', 0.0) + vector_confidence = predicted_vector.get('confidence', 0.0) + + # Skip if vector prediction is too weak + if abs(predicted_direction) < 0.1 or vector_confidence < 0.3: + return 0.0 + + # Calculate direction accuracy + actual_direction = 1.0 if actual_price_change_pct > 0 else -1.0 if actual_price_change_pct < 0 else 0.0 + direction_accuracy = 0.0 + + if actual_direction != 0.0: # Only if there was actual movement + # Check if predicted direction matches actual direction + if (predicted_direction > 0 and actual_direction > 0) or (predicted_direction < 0 and actual_direction < 0): + direction_accuracy = min(abs(predicted_direction), 1.0) # Stronger prediction = higher bonus + + # MAGNITUDE ACCURACY BONUS + # Convert predicted direction to expected magnitude (scaled by confidence) + predicted_magnitude = abs(predicted_direction) * vector_confidence * 2.0 # Scale to ~2% max + magnitude_error = abs(predicted_magnitude - abs_movement) + + # Bonus for accurate magnitude prediction (lower error = higher bonus) + if magnitude_error < 1.0: # Within 1% error + magnitude_accuracy = max(0, 1.0 - magnitude_error) # 0 to 1.0 + + # COMBINED BONUS CALCULATION + base_vector_bonus = direction_accuracy * magnitude_accuracy * vector_confidence + + # Scale bonus based on movement size (bigger movements get bigger bonuses) + if abs_movement > 2.0: # Massive movements + scale_factor = 3.0 + elif abs_movement > 1.0: # Rapid movements + scale_factor = 2.0 + elif abs_movement > 0.5: # Strong movements + scale_factor = 1.5 + else: + scale_factor = 1.0 + + final_bonus = base_vector_bonus * scale_factor * prediction_confidence + + logger.debug(f"VECTOR ANALYSIS: pred_dir={predicted_direction:.3f}, actual_dir={actual_direction:.3f}, " + f"pred_mag={predicted_magnitude:.3f}, actual_mag={abs_movement:.3f}, " + f"dir_acc={direction_accuracy:.3f}, mag_acc={magnitude_accuracy:.3f}, bonus={final_bonus:.3f}") + + return min(final_bonus, 2.0) # Cap bonus at 2.0 + + return 0.0 + + except Exception as e: + logger.error(f"Error calculating price vector bonus: {e}") + return 0.0 + async def _train_model_on_outcome( self, record: Dict, @@ -3724,6 +3899,10 @@ class TradingOrchestrator: if sophisticated_reward is None: symbol = record.get("symbol", self.symbol) current_pnl = self._get_current_position_pnl(symbol) + + # Extract price vector from record if available + predicted_price_vector = record.get("price_direction") or record.get("predicted_price_vector") + sophisticated_reward, _ = self._calculate_sophisticated_reward( record.get("action", "HOLD"), record.get("confidence", 0.5), @@ -3732,6 +3911,7 @@ class TradingOrchestrator: record.get("has_price_prediction", False), symbol=symbol, current_position_pnl=current_pnl, + predicted_price_vector=predicted_price_vector, ) # Train decision fusion model if it's the model being evaluated diff --git a/data/ui_state.json b/data/ui_state.json index b66f752..3b167b4 100644 --- a/data/ui_state.json +++ b/data/ui_state.json @@ -25,5 +25,5 @@ "training_enabled": true } }, - "timestamp": "2025-07-29T23:22:58.380697" + "timestamp": "2025-07-29T23:33:51.882579" } \ No newline at end of file