behaviour/agressiveness sliders, fix cob data using provider

2025-07-07 01:37:04 +03:00
parent 9101448e78
commit c2c0e12a4b
4 changed files with 380 additions and 35 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -67,6 +67,10 @@ class TradingDecision:
    timestamp: datetime
    reasoning: Dict[str, Any]  # Why this decision was made
    memory_usage: Dict[str, int]  # Memory usage of models
+    # NEW: Aggressiveness parameters
+    entry_aggressiveness: float = 0.5  # 0.0 = conservative, 1.0 = very aggressive
+    exit_aggressiveness: float = 0.5   # 0.0 = conservative, 1.0 = very aggressive
+    current_position_pnl: float = 0.0  # Current open position P&L for RL feedback

 class TradingOrchestrator:
    """
@@ -90,6 +94,14 @@ class TradingOrchestrator:
        self.decision_frequency = self.config.orchestrator.get('decision_frequency', 30)
        self.symbols = self.config.get('symbols', ['ETH/USDT', 'BTC/USDT'])  # Enhanced to support multiple symbols
        
+        # NEW: Aggressiveness parameters
+        self.entry_aggressiveness = self.config.orchestrator.get('entry_aggressiveness', 0.5)  # 0.0 = conservative, 1.0 = very aggressive
+        self.exit_aggressiveness = self.config.orchestrator.get('exit_aggressiveness', 0.5)   # 0.0 = conservative, 1.0 = very aggressive
+        
+        # Position tracking for P&L feedback
+        self.current_positions: Dict[str, Dict] = {}  # {symbol: {side, size, entry_price, entry_time, pnl}}
+        self.trading_executor = None  # Will be set by dashboard or external system
+        
        # Dynamic weights (will be adapted based on performance)
        self.model_weights: Dict[str, float] = {}  # {model_name: weight}
        self._initialize_default_weights()
@@ -1483,7 +1495,7 @@ class TradingOrchestrator:
    def _combine_predictions(self, symbol: str, price: float, 
                           predictions: List[Prediction],
                           timestamp: datetime) -> TradingDecision:
-        """Combine all predictions into a final decision"""
+        """Combine all predictions into a final decision with aggressiveness and P&L feedback"""
        try:
            reasoning = {
                'predictions': len(predictions),
@@ -1491,6 +1503,9 @@ class TradingOrchestrator:
                'models_used': [pred.model_name for pred in predictions]
            }
            
+            # Get current position P&L for feedback
+            current_position_pnl = self._get_current_position_pnl(symbol, price)
+            
            # Initialize action scores
            action_scores = {'BUY': 0.0, 'SELL': 0.0, 'HOLD': 0.0}
            total_weight = 0.0
@@ -1516,10 +1531,35 @@ class TradingOrchestrator:
            best_action = max(action_scores, key=action_scores.get)
            best_confidence = action_scores[best_action]
            
-            # Apply confidence threshold
-            if best_confidence < self.confidence_threshold:
-                best_action = 'HOLD'
-                reasoning['threshold_applied'] = True
+            # Calculate aggressiveness-adjusted thresholds
+            entry_threshold, exit_threshold = self._calculate_aggressiveness_thresholds(
+                current_position_pnl, symbol
+            )
+            
+            # Apply aggressiveness-based confidence thresholds
+            if best_action in ['BUY', 'SELL']:
+                # For entry signals, use entry aggressiveness
+                if not self._has_open_position(symbol):
+                    if best_confidence < entry_threshold:
+                        best_action = 'HOLD'
+                        reasoning['entry_threshold_applied'] = True
+                        reasoning['entry_threshold'] = entry_threshold
+                # For exit signals, use exit aggressiveness
+                else:
+                    if best_confidence < exit_threshold:
+                        best_action = 'HOLD'
+                        reasoning['exit_threshold_applied'] = True
+                        reasoning['exit_threshold'] = exit_threshold
+            else:
+                # Standard threshold for HOLD
+                if best_confidence < self.confidence_threshold:
+                    best_action = 'HOLD'
+                    reasoning['threshold_applied'] = True
+            
+            # Add P&L-based decision adjustment
+            best_action, best_confidence = self._apply_pnl_feedback(
+                best_action, best_confidence, current_position_pnl, symbol, reasoning
+            )
            
            # Get memory usage stats
            try:
@@ -1527,6 +1567,10 @@ class TradingOrchestrator:
            except Exception:
                memory_usage = {}
            
+            # Calculate dynamic aggressiveness based on recent performance
+            entry_aggressiveness = self._calculate_dynamic_entry_aggressiveness(symbol)
+            exit_aggressiveness = self._calculate_dynamic_exit_aggressiveness(symbol, current_position_pnl)
+            
            # Create final decision
            decision = TradingDecision(
                action=best_action,
@@ -1535,12 +1579,15 @@ class TradingOrchestrator:
                price=price,
                timestamp=timestamp,
                reasoning=reasoning,
-                memory_usage=memory_usage.get('models', {}) if memory_usage else {}
+                memory_usage=memory_usage.get('models', {}) if memory_usage else {},
+                entry_aggressiveness=entry_aggressiveness,
+                exit_aggressiveness=exit_aggressiveness,
+                current_position_pnl=current_position_pnl
            )
            
-            logger.info(f"Decision for {symbol}: {best_action} (confidence: {best_confidence:.3f})")
-            if memory_usage and 'total_used_mb' in memory_usage:
-                logger.debug(f"Memory usage: {memory_usage['total_used_mb']:.1f}MB / {memory_usage['total_limit_mb']:.1f}MB")
+            logger.info(f"Decision for {symbol}: {best_action} (confidence: {best_confidence:.3f}, "
+                       f"entry_agg: {entry_aggressiveness:.2f}, exit_agg: {exit_aggressiveness:.2f}, "
+                       f"pnl: ${current_position_pnl:.2f})")
            
            return decision
            
@@ -1554,7 +1601,10 @@ class TradingOrchestrator:
                price=price,
                timestamp=timestamp,
                reasoning={'error': str(e)},
-                memory_usage={}
+                memory_usage={},
+                entry_aggressiveness=0.5,
+                exit_aggressiveness=0.5,
+                current_position_pnl=0.0
            )
    
    def _get_timeframe_weight(self, timeframe: str) -> float:
@@ -1918,12 +1968,75 @@ class TradingOrchestrator:
                logger.warning(f"Microstructure features fallback: {e}")
                comprehensive_features.extend([0.0] * 100)
            
-            # Final validation - now includes COB features (13,400 + 400 = 13,800)
+            # === NEW: P&L FEEDBACK AND AGGRESSIVENESS FEATURES (50) ===
+            try:
+                current_price = self._get_current_price(symbol) or 3500.0
+                current_pnl = self._get_current_position_pnl(symbol, current_price)
+                
+                # P&L feedback features (25)
+                pnl_features = [
+                    current_pnl,  # Current P&L
+                    max(-1.0, min(1.0, current_pnl / 100.0)),  # Normalized P&L (-1 to 1)
+                    1.0 if current_pnl > 0 else 0.0,  # Is profitable
+                    1.0 if current_pnl < -10.0 else 0.0,  # Is losing significantly
+                    1.0 if current_pnl > 20.0 else 0.0,  # Is winning significantly
+                    1.0 if self._has_open_position(symbol) else 0.0,  # Has open position
+                ]
+                
+                # Recent performance features (10)
+                recent_decisions = self.get_recent_decisions(symbol, limit=10)
+                if recent_decisions:
+                    win_rate = sum(1 for d in recent_decisions if d.reasoning.get('was_profitable', False)) / len(recent_decisions)
+                    avg_confidence = sum(d.confidence for d in recent_decisions) / len(recent_decisions)
+                    recent_pnl_changes = [d.current_position_pnl for d in recent_decisions if hasattr(d, 'current_position_pnl')]
+                    avg_recent_pnl = sum(recent_pnl_changes) / len(recent_pnl_changes) if recent_pnl_changes else 0.0
+                else:
+                    win_rate = 0.5
+                    avg_confidence = 0.5
+                    avg_recent_pnl = 0.0
+                
+                pnl_features.extend([
+                    win_rate,
+                    avg_confidence,
+                    max(-1.0, min(1.0, avg_recent_pnl / 50.0)),  # Normalized recent P&L
+                    len(recent_decisions) / 10.0,  # Decision frequency
+                ])
+                
+                # Aggressiveness features (15)
+                entry_agg = getattr(self, 'entry_aggressiveness', 0.5)
+                exit_agg = getattr(self, 'exit_aggressiveness', 0.5)
+                
+                aggressiveness_features = [
+                    entry_agg,
+                    exit_agg,
+                    entry_agg * 2.0 - 1.0,  # Scaled entry aggressiveness (-1 to 1)
+                    exit_agg * 2.0 - 1.0,   # Scaled exit aggressiveness (-1 to 1)
+                    entry_agg * exit_agg,   # Combined aggressiveness
+                    abs(entry_agg - exit_agg),  # Aggressiveness difference
+                    1.0 if entry_agg > 0.7 else 0.0,  # Is very aggressive entry
+                    1.0 if exit_agg > 0.7 else 0.0,   # Is very aggressive exit
+                    1.0 if entry_agg < 0.3 else 0.0,  # Is very conservative entry
+                    1.0 if exit_agg < 0.3 else 0.0,   # Is very conservative exit
+                ]
+                
+                # Pad to 50 features total
+                all_feedback_features = pnl_features + aggressiveness_features
+                while len(all_feedback_features) < 50:
+                    all_feedback_features.append(0.0)
+                
+                comprehensive_features.extend(all_feedback_features[:50])
+                logger.debug("P&L feedback and aggressiveness features: 50 added")
+                
+            except Exception as e:
+                logger.warning(f"P&L feedback features fallback: {e}")
+                comprehensive_features.extend([0.0] * 50)
+            
+            # Final validation - now includes P&L feedback (13,400 + 400 + 50 = 13,850)
            total_features = len(comprehensive_features)
-            expected_features = 13800  # Updated to include 400 COB features
+            expected_features = 13850  # Updated to include P&L feedback features
            
            if total_features >= expected_features - 100:  # Allow small tolerance
-                # logger.info(f"TRAINING: Comprehensive RL state built successfully: {total_features} features (including COB)")
+                # logger.info(f"TRAINING: Comprehensive RL state built successfully: {total_features} features (including P&L feedback)")
                return comprehensive_features
            else:
                logger.warning(f"⚠️ Comprehensive RL state incomplete: {total_features} features (expected {expected_features}+)")
@@ -2651,4 +2764,145 @@ class TradingOrchestrator:
            return None
        except Exception as e:
            logger.error(f"Error getting universal data for {model_type}: {e}")
-            return None
+            return None
+
+    def _get_current_position_pnl(self, symbol: str, current_price: float) -> float:
+        """Get current position P&L for the symbol"""
+        try:
+            if self.trading_executor and hasattr(self.trading_executor, 'get_current_position'):
+                position = self.trading_executor.get_current_position(symbol)
+                if position:
+                    entry_price = position.get('price', 0)
+                    size = position.get('size', 0)
+                    side = position.get('side', 'LONG')
+                    
+                    if entry_price and size > 0:
+                        if side.upper() == 'LONG':
+                            pnl = (current_price - entry_price) * size
+                        else:  # SHORT
+                            pnl = (entry_price - current_price) * size
+                        return pnl
+            return 0.0
+        except Exception as e:
+            logger.debug(f"Error getting position P&L for {symbol}: {e}")
+            return 0.0
+    
+    def _has_open_position(self, symbol: str) -> bool:
+        """Check if there's an open position for the symbol"""
+        try:
+            if self.trading_executor and hasattr(self.trading_executor, 'get_current_position'):
+                position = self.trading_executor.get_current_position(symbol)
+                return position is not None and position.get('size', 0) > 0
+            return False
+        except Exception:
+            return False
+    
+    def _calculate_aggressiveness_thresholds(self, current_pnl: float, symbol: str) -> tuple:
+        """Calculate confidence thresholds based on aggressiveness settings"""
+        # Base thresholds
+        base_entry_threshold = self.confidence_threshold
+        base_exit_threshold = self.confidence_threshold_close
+        
+        # Get aggressiveness settings (could be from config or adaptive)
+        entry_agg = getattr(self, 'entry_aggressiveness', 0.5)
+        exit_agg = getattr(self, 'exit_aggressiveness', 0.5)
+        
+        # Adjust thresholds based on aggressiveness
+        # More aggressive = lower threshold (more trades)
+        # Less aggressive = higher threshold (fewer, higher quality trades)
+        entry_threshold = base_entry_threshold * (1.5 - entry_agg)  # 0.5 agg = 1.0x, 1.0 agg = 0.5x
+        exit_threshold = base_exit_threshold * (1.5 - exit_agg)
+        
+        # Ensure minimum thresholds
+        entry_threshold = max(0.05, entry_threshold)
+        exit_threshold = max(0.02, exit_threshold)
+        
+        return entry_threshold, exit_threshold
+    
+    def _apply_pnl_feedback(self, action: str, confidence: float, current_pnl: float, 
+                           symbol: str, reasoning: dict) -> tuple:
+        """Apply P&L-based feedback to decision making"""
+        try:
+            # If we have a losing position, be more aggressive about cutting losses
+            if current_pnl < -10.0:  # Losing more than $10
+                if action == 'SELL' and self._has_open_position(symbol):
+                    # Boost confidence for exit signals when losing
+                    confidence = min(1.0, confidence * 1.2)
+                    reasoning['pnl_loss_cut_boost'] = True
+                elif action == 'BUY':
+                    # Reduce confidence for new entries when losing
+                    confidence *= 0.8
+                    reasoning['pnl_loss_entry_reduction'] = True
+            
+            # If we have a winning position, be more conservative about exits
+            elif current_pnl > 5.0:  # Winning more than $5
+                if action == 'SELL' and self._has_open_position(symbol):
+                    # Reduce confidence for exit signals when winning (let profits run)
+                    confidence *= 0.9
+                    reasoning['pnl_profit_hold'] = True
+                elif action == 'BUY':
+                    # Slightly boost confidence for entries when on a winning streak
+                    confidence = min(1.0, confidence * 1.05)
+                    reasoning['pnl_winning_streak_boost'] = True
+            
+            reasoning['current_pnl'] = current_pnl
+            return action, confidence
+            
+        except Exception as e:
+            logger.debug(f"Error applying P&L feedback: {e}")
+            return action, confidence
+    
+    def _calculate_dynamic_entry_aggressiveness(self, symbol: str) -> float:
+        """Calculate dynamic entry aggressiveness based on recent performance"""
+        try:
+            # Start with base aggressiveness
+            base_agg = getattr(self, 'entry_aggressiveness', 0.5)
+            
+            # Get recent decisions for this symbol
+            recent_decisions = self.get_recent_decisions(symbol, limit=10)
+            if len(recent_decisions) < 3:
+                return base_agg
+            
+            # Calculate win rate
+            winning_decisions = sum(1 for d in recent_decisions 
+                                  if d.reasoning.get('was_profitable', False))
+            win_rate = winning_decisions / len(recent_decisions)
+            
+            # Adjust aggressiveness based on performance
+            if win_rate > 0.7:  # High win rate - be more aggressive
+                return min(1.0, base_agg + 0.2)
+            elif win_rate < 0.3:  # Low win rate - be more conservative
+                return max(0.1, base_agg - 0.2)
+            else:
+                return base_agg
+                
+        except Exception as e:
+            logger.debug(f"Error calculating dynamic entry aggressiveness: {e}")
+            return 0.5
+    
+    def _calculate_dynamic_exit_aggressiveness(self, symbol: str, current_pnl: float) -> float:
+        """Calculate dynamic exit aggressiveness based on P&L and market conditions"""
+        try:
+            # Start with base aggressiveness
+            base_agg = getattr(self, 'exit_aggressiveness', 0.5)
+            
+            # Adjust based on current P&L
+            if current_pnl < -20.0:  # Large loss - be very aggressive about cutting
+                return min(1.0, base_agg + 0.3)
+            elif current_pnl < -5.0:  # Small loss - be more aggressive
+                return min(1.0, base_agg + 0.1)
+            elif current_pnl > 20.0:  # Large profit - be less aggressive (let it run)
+                return max(0.1, base_agg - 0.2)
+            elif current_pnl > 5.0:  # Small profit - slightly less aggressive
+                return max(0.2, base_agg - 0.1)
+            else:
+                return base_agg
+                
+        except Exception as e:
+            logger.debug(f"Error calculating dynamic exit aggressiveness: {e}")
+            return 0.5
+    
+    def set_trading_executor(self, trading_executor):
+        """Set the trading executor for position tracking"""
+        self.trading_executor = trading_executor
+        logger.info("Trading executor set for position tracking and P&L feedback")