COB WS fix

2025-07-20 20:38:42 +03:00
parent 9c56ea238e
commit 330f0de053
6 changed files with 1260 additions and 30 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -1369,15 +1369,31 @@ class TradingOrchestrator:
            reasoning['models_aggregated'] = [pred.model_name for pred in predictions]
            reasoning['aggregated_confidence'] = best_confidence
            
-            # Apply confidence thresholds for signal confirmation
+            # Calculate dynamic aggressiveness based on recent performance
+            entry_aggressiveness = self._calculate_dynamic_entry_aggressiveness(symbol)
+            
+            # Adjust confidence threshold based on entry aggressiveness
+            # Higher aggressiveness = lower threshold (more trades)
+            # entry_aggressiveness: 0.0 = very conservative, 1.0 = very aggressive
+            base_threshold = self.confidence_threshold
+            aggressiveness_factor = 1.0 - entry_aggressiveness  # Invert: high agg = low factor
+            dynamic_threshold = base_threshold * aggressiveness_factor
+            
+            # Ensure minimum threshold for safety (don't go below 1% confidence)
+            dynamic_threshold = max(0.01, dynamic_threshold)
+            
+            # Apply dynamic confidence threshold for signal confirmation
            if best_action != 'HOLD':
-                if best_confidence < self.confidence_threshold:
-                    logger.debug(f"Signal below confidence threshold: {best_action} {symbol} "
-                               f"(confidence: {best_confidence:.3f} < {self.confidence_threshold})")
+                if best_confidence < dynamic_threshold:
+                    logger.debug(f"Signal below dynamic confidence threshold: {best_action} {symbol} "
+                               f"(confidence: {best_confidence:.3f} < {dynamic_threshold:.3f}, "
+                               f"base: {base_threshold:.3f}, aggressiveness: {entry_aggressiveness:.2f})")
                    best_action = 'HOLD'
                    best_confidence = 0.0
-                    reasoning['rejected_reason'] = 'low_confidence'
                else:
+                    logger.info(f"SIGNAL ACCEPTED: {best_action} {symbol} "
+                               f"(confidence: {best_confidence:.3f} >= {dynamic_threshold:.3f}, "
+                               f"aggressiveness: {entry_aggressiveness:.2f})")
                    # Add signal to accumulator for trend confirmation
                    signal_data = {
                        'action': best_action,
@@ -1418,8 +1434,7 @@ class TradingOrchestrator:
            except Exception:
                memory_usage = {}
            
-            # Calculate dynamic aggressiveness based on recent performance
-            entry_aggressiveness = self._calculate_dynamic_entry_aggressiveness(symbol)
+            # Get exit aggressiveness (entry aggressiveness already calculated above)
            exit_aggressiveness = self._calculate_dynamic_exit_aggressiveness(symbol, current_position_pnl)
            
            # Create final decision
@@ -1440,6 +1455,9 @@ class TradingOrchestrator:
                       f"entry_agg: {entry_aggressiveness:.2f}, exit_agg: {exit_aggressiveness:.2f}, "
                       f"pnl: ${current_position_pnl:.2f})")
            
+            # Trigger training on each decision (especially for executed trades)
+            self._trigger_training_on_decision(decision, price)
+            
            return decision
            
        except Exception as e:
@@ -2032,6 +2050,253 @@ class TradingOrchestrator:
            logger.error(f"Error calculating enhanced reward: {e}")
            return base_pnl
    
+    def _trigger_training_on_decision(self, decision: TradingDecision, current_price: float):
+        """Trigger training on each decision, especially executed trades
+        
+        This ensures models learn from every signal outcome, giving more weight
+        to executed trades as they have real market feedback.
+        """
+        try:
+            # Only train if training is enabled and we have the enhanced training system
+            if not self.training_enabled or not self.enhanced_training_system:
+                return
+            
+            symbol = decision.symbol
+            action = decision.action
+            confidence = decision.confidence
+            
+            # Create training data from the decision
+            training_data = {
+                'symbol': symbol,
+                'action': action,
+                'confidence': confidence,
+                'price': current_price,
+                'timestamp': decision.timestamp,
+                'executed': action != 'HOLD',  # Assume non-HOLD actions are executed
+                'entry_aggressiveness': decision.entry_aggressiveness,
+                'exit_aggressiveness': decision.exit_aggressiveness,
+                'reasoning': decision.reasoning
+            }
+            
+            # Add to enhanced training system for immediate learning
+            if hasattr(self.enhanced_training_system, 'add_decision_for_training'):
+                self.enhanced_training_system.add_decision_for_training(training_data)
+                logger.debug(f"🎓 Added decision to training queue: {action} {symbol} (conf: {confidence:.3f})")
+            
+            # Trigger immediate training for executed trades (higher priority)
+            if action != 'HOLD':
+                if hasattr(self.enhanced_training_system, 'trigger_immediate_training'):
+                    self.enhanced_training_system.trigger_immediate_training(
+                        symbol=symbol,
+                        priority='high' if confidence > 0.7 else 'medium'
+                    )
+                    logger.info(f"🚀 Triggered immediate training for executed trade: {action} {symbol}")
+            
+            # Train all models on the decision outcome
+            self._train_models_on_decision(decision, current_price)
+            
+        except Exception as e:
+            logger.error(f"Error triggering training on decision: {e}")
+    
+    def _train_models_on_decision(self, decision: TradingDecision, current_price: float):
+        """Train all models on the decision outcome
+        
+        This provides immediate feedback to models about their predictions,
+        allowing them to learn from each signal they generate.
+        """
+        try:
+            symbol = decision.symbol
+            action = decision.action
+            confidence = decision.confidence
+            
+            # Get current market data for training context
+            market_data = self._get_current_market_data(symbol)
+            if not market_data:
+                return
+            
+            # Train DQN agent if available
+            if self.rl_agent and hasattr(self.rl_agent, 'add_experience'):
+                try:
+                    # Create state representation
+                    state = self._create_state_for_training(symbol, market_data)
+                    
+                    # Map action to DQN action space
+                    action_mapping = {'BUY': 0, 'SELL': 1, 'HOLD': 2}
+                    dqn_action = action_mapping.get(action, 2)
+                    
+                    # Calculate immediate reward based on confidence and execution
+                    immediate_reward = confidence if action != 'HOLD' else 0.0
+                    
+                    # Add experience to DQN
+                    self.rl_agent.add_experience(
+                        state=state,
+                        action=dqn_action,
+                        reward=immediate_reward,
+                        next_state=state,  # Will be updated with actual outcome later
+                        done=False
+                    )
+                    
+                    logger.debug(f"🧠 Added DQN experience: {action} {symbol} (reward: {immediate_reward:.3f})")
+                    
+                except Exception as e:
+                    logger.debug(f"Error training DQN on decision: {e}")
+            
+            # Train CNN model if available
+            if self.cnn_model and hasattr(self.cnn_model, 'add_training_sample'):
+                try:
+                    # Create CNN input features
+                    cnn_features = self._create_cnn_features_for_training(symbol, market_data)
+                    
+                    # Create target based on action
+                    target_mapping = {'BUY': [1, 0, 0], 'SELL': [0, 1, 0], 'HOLD': [0, 0, 1]}
+                    target = target_mapping.get(action, [0, 0, 1])
+                    
+                    # Add training sample
+                    self.cnn_model.add_training_sample(cnn_features, target, weight=confidence)
+                    
+                    logger.debug(f"🔍 Added CNN training sample: {action} {symbol}")
+                    
+                except Exception as e:
+                    logger.debug(f"Error training CNN on decision: {e}")
+            
+            # Train COB RL model if available and we have COB data
+            if self.cob_rl_agent and symbol in self.latest_cob_data:
+                try:
+                    cob_data = self.latest_cob_data[symbol]
+                    if hasattr(self.cob_rl_agent, 'add_experience'):
+                        # Create COB state representation
+                        cob_state = self._create_cob_state_for_training(symbol, cob_data)
+                        
+                        # Add COB experience
+                        self.cob_rl_agent.add_experience(
+                            state=cob_state,
+                            action=action,
+                            reward=confidence,
+                            symbol=symbol
+                        )
+                        
+                        logger.debug(f"📊 Added COB RL experience: {action} {symbol}")
+                        
+                except Exception as e:
+                    logger.debug(f"Error training COB RL on decision: {e}")
+            
+        except Exception as e:
+            logger.error(f"Error training models on decision: {e}")
+    
+    def _get_current_market_data(self, symbol: str) -> Optional[Dict]:
+        """Get current market data for training context"""
+        try:
+            if self.data_provider:
+                # Get recent data for training
+                df = self.data_provider.get_historical_data(symbol, '1m', limit=100)
+                if df is not None and not df.empty:
+                    return {
+                        'ohlcv': df.tail(50).to_dict('records'),  # Last 50 candles
+                        'current_price': float(df['close'].iloc[-1]),
+                        'volume': float(df['volume'].iloc[-1]),
+                        'timestamp': df.index[-1]
+                    }
+            return None
+        except Exception as e:
+            logger.debug(f"Error getting market data for training: {e}")
+            return None
+    
+    def _create_state_for_training(self, symbol: str, market_data: Dict) -> np.ndarray:
+        """Create state representation for DQN training"""
+        try:
+            # Create a basic state representation
+            ohlcv_data = market_data.get('ohlcv', [])
+            if not ohlcv_data:
+                return np.zeros(100)  # Default state size
+            
+            # Extract features from recent candles
+            features = []
+            for candle in ohlcv_data[-20:]:  # Last 20 candles
+                features.extend([
+                    candle.get('open', 0),
+                    candle.get('high', 0),
+                    candle.get('low', 0),
+                    candle.get('close', 0),
+                    candle.get('volume', 0)
+                ])
+            
+            # Pad or truncate to expected size
+            state = np.array(features[:100])
+            if len(state) < 100:
+                state = np.pad(state, (0, 100 - len(state)), 'constant')
+            
+            return state
+            
+        except Exception as e:
+            logger.debug(f"Error creating state for training: {e}")
+            return np.zeros(100)
+    
+    def _create_cnn_features_for_training(self, symbol: str, market_data: Dict) -> np.ndarray:
+        """Create CNN features for training"""
+        try:
+            # Similar to state creation but formatted for CNN
+            ohlcv_data = market_data.get('ohlcv', [])
+            if not ohlcv_data:
+                return np.zeros((1, 100))
+            
+            # Create feature matrix
+            features = []
+            for candle in ohlcv_data[-20:]:
+                features.extend([
+                    candle.get('open', 0),
+                    candle.get('high', 0),
+                    candle.get('low', 0),
+                    candle.get('close', 0),
+                    candle.get('volume', 0)
+                ])
+            
+            # Reshape for CNN input
+            cnn_features = np.array(features[:100]).reshape(1, -1)
+            if cnn_features.shape[1] < 100:
+                cnn_features = np.pad(cnn_features, ((0, 0), (0, 100 - cnn_features.shape[1])), 'constant')
+            
+            return cnn_features
+            
+        except Exception as e:
+            logger.debug(f"Error creating CNN features for training: {e}")
+            return np.zeros((1, 100))
+    
+    def _create_cob_state_for_training(self, symbol: str, cob_data: Dict) -> np.ndarray:
+        """Create COB state representation for training"""
+        try:
+            # Extract COB features for training
+            features = []
+            
+            # Add bid/ask data
+            bids = cob_data.get('bids', [])[:10]  # Top 10 bids
+            asks = cob_data.get('asks', [])[:10]  # Top 10 asks
+            
+            for bid in bids:
+                features.extend([bid.get('price', 0), bid.get('size', 0)])
+            for ask in asks:
+                features.extend([ask.get('price', 0), ask.get('size', 0)])
+            
+            # Add market stats
+            stats = cob_data.get('stats', {})
+            features.extend([
+                stats.get('spread', 0),
+                stats.get('mid_price', 0),
+                stats.get('bid_volume', 0),
+                stats.get('ask_volume', 0),
+                stats.get('imbalance', 0)
+            ])
+            
+            # Pad to expected COB state size (2000 features)
+            cob_state = np.array(features[:2000])
+            if len(cob_state) < 2000:
+                cob_state = np.pad(cob_state, (0, 2000 - len(cob_state)), 'constant')
+            
+            return cob_state
+            
+        except Exception as e:
+            logger.debug(f"Error creating COB state for training: {e}")
+            return np.zeros(2000)
+    
    def _check_signal_confirmation(self, symbol: str, signal_data: Dict) -> Optional[str]:
        """Check if we have enough signal confirmations for trend confirmation with rate limiting"""
        try: