immedite training imp

2025-09-09 02:57:03 +03:00
parent 729e0bccb1
commit 8c17082643
2 changed files with 272 additions and 14 deletions
--- a/web/clean_dashboard.py
+++ b/web/clean_dashboard.py
@@ -175,6 +175,17 @@ class CleanTradingDashboard:
            'ETH/USDT': {},
            'BTC/USDT': {}
        }
        # Confidence calibration tracking
        self.confidence_calibration: Dict[str, Dict] = {
            'cob_liquidity_imbalance': {
                'total_predictions': 0,
                'correct_predictions': 0,
                'accuracy_by_confidence': {},  # Track accuracy by confidence ranges
                'confidence_adjustment': 1.0,  # Multiplier for future confidence levels
                'last_calibration': None
            }
        }
        # Initialize timezone
        timezone_name = self.config.get('system', {}).get('timezone', 'Europe/Sofia')
@@ -3821,7 +3832,10 @@ class CleanTradingDashboard:
            # Train ALL models on the signal (if executed)
            if signal['executed']:
                self._train_all_models_on_signal(signal)
-            
+
            # Immediate price feedback training (always runs if enabled, regardless of execution)
            self._immediate_price_feedback_training(signal)
            # Log signal processing
            status = "EXECUTED" if signal['executed'] else ("BLOCKED" if signal['blocked'] else "PENDING")
            logger.info(f"[{status}] {signal['action']} signal for {signal['symbol']} "
@@ -3829,7 +3843,249 @@ class CleanTradingDashboard:
        except Exception as e:
            logger.error(f"Error processing dashboard signal: {e}")
-    
+
    # immediate price feedback training 
    # ToDo: review/revise
    def _immediate_price_feedback_training(self, signal: Dict):
        """Immediate training fine-tuning based on current price feedback - rewards profitable predictions"""
        try:
            # Check if any model training is enabled - immediate training is part of core training
            training_enabled = (
                getattr(self, 'dqn_training_enabled', True) or
                getattr(self, 'cnn_training_enabled', True) or
                (hasattr(self.orchestrator, 'cob_rl_agent') and self.orchestrator.cob_rl_agent is not None) or
                (hasattr(self.orchestrator, 'model_manager') and self.orchestrator.model_manager is not None)
            )
            if not training_enabled:
                return
            symbol = signal.get('symbol', 'ETH/USDT')
            signal_price = signal.get('price', 0)
            predicted_action = signal.get('action', 'HOLD')
            signal_confidence = signal.get('confidence', 0.5)
            signal_timestamp = signal.get('timestamp')
            if signal_price == 0 or predicted_action == 'HOLD':
                return
            # Get current price for immediate feedback
            current_price = self._get_current_price(symbol)
            if current_price == 0:
                return
            # Calculate immediate price movement since signal generation
            price_change_pct = (current_price - signal_price) / signal_price
            price_change_abs = abs(price_change_pct)
            # Determine if prediction was correct
            predicted_direction = 1 if predicted_action == 'BUY' else -1
            actual_direction = 1 if price_change_pct > 0 else -1
            prediction_correct = predicted_direction == actual_direction
            # Calculate reward based on prediction accuracy and price movement
            base_reward = price_change_abs * 1000  # Scale by price movement
            if prediction_correct:
                # Reward correct predictions
                reward = base_reward
                confidence_bonus = signal_confidence * base_reward * 0.5  # Bonus for high confidence correct predictions
                reward += confidence_bonus
            else:
                # Punish incorrect predictions
                reward = -base_reward
                confidence_penalty = (1 - signal_confidence) * base_reward * 0.3  # Less penalty for low confidence wrong predictions
                reward -= confidence_penalty
            # Scale reward by time elapsed (more recent = higher weight)
            time_elapsed = (datetime.now() - signal_timestamp).total_seconds() if signal_timestamp else 0
            time_weight = max(0.1, 1.0 - (time_elapsed / 300))  # Decay over 5 minutes
            final_reward = reward * time_weight
            # Create immediate training data
            training_data = {
                'symbol': symbol,
                'signal_price': signal_price,
                'current_price': current_price,
                'price_change_pct': price_change_pct,
                'predicted_action': predicted_action,
                'actual_direction': 'UP' if actual_direction > 0 else 'DOWN',
                'prediction_correct': prediction_correct,
                'signal_confidence': signal_confidence,
                'reward': final_reward,
                'time_elapsed': time_elapsed,
                'timestamp': datetime.now()
            }
            # Train models immediately with price feedback
            self._train_models_on_immediate_feedback(signal, training_data, final_reward)
            # Update confidence calibration
            self._update_confidence_calibration(signal, prediction_correct, price_change_abs)
            logger.debug(f"💰 IMMEDIATE TRAINING: {symbol} {predicted_action} signal - "
                        f"Price: {signal_price:.2f} → {current_price:.2f} ({price_change_pct:+.2%}) - "
                        f"{'✅' if prediction_correct else '❌'} Correct - Reward: {final_reward:.2f}")
        except Exception as e:
            logger.debug(f"Error in immediate price feedback training: {e}")
    def _train_models_on_immediate_feedback(self, signal: Dict, training_data: Dict, reward: float):
        """Train models immediately on price feedback"""
        try:
            symbol = signal.get('symbol', 'ETH/USDT')
            action = 0 if signal.get('action') == 'BUY' else 1
            # Train COB RL model immediately if COB RL training is enabled
            if (self.orchestrator and hasattr(self.orchestrator, 'cob_rl_agent') and
                self.orchestrator.cob_rl_agent and hasattr(self.orchestrator, 'model_manager')):
                try:
                    # Get COB features for immediate training
                    cob_features = self._get_cob_features_for_training(symbol, signal.get('price', 0))
                    if cob_features:
                        # Store immediate experience
                        if hasattr(self.orchestrator.cob_rl_agent, 'remember'):
                            self.orchestrator.cob_rl_agent.remember(
                                cob_features, action, reward, cob_features, done=False  # Not done for immediate feedback
                            )
                        # Immediate training if enough samples
                        if hasattr(self.orchestrator.cob_rl_agent, 'memory') and len(self.orchestrator.cob_rl_agent.memory) > 16:
                            if hasattr(self.orchestrator.cob_rl_agent, 'replay'):
                                loss = self.orchestrator.cob_rl_agent.replay(batch_size=8)  # Smaller batch for immediate training
                                if loss is not None:
                                    logger.debug(f"COB RL immediate training - loss: {loss:.4f}, reward: {reward:.2f}")
                except Exception as e:
                    logger.debug(f"Error training COB RL on immediate feedback: {e}")
            # Train DQN model immediately if DQN training is enabled
            if (self.orchestrator and hasattr(self.orchestrator, 'rl_agent') and
                self.orchestrator.rl_agent and getattr(self, 'dqn_training_enabled', True)):
                try:
                    # Create immediate DQN experience
                    state = self._get_rl_state_for_training(symbol, signal.get('price', 0))
                    if state:
                        if hasattr(self.orchestrator.rl_agent, 'remember'):
                            self.orchestrator.rl_agent.remember(state, action, reward, state, done=False)
                        # Immediate training
                        if hasattr(self.orchestrator.rl_agent, 'replay') and hasattr(self.orchestrator.rl_agent, 'memory'):
                            if len(self.orchestrator.rl_agent.memory) > 16:
                                loss = self.orchestrator.rl_agent.replay(batch_size=8)
                                if loss is not None:
                                    logger.debug(f"DQN immediate training - loss: {loss:.4f}, reward: {reward:.2f}")
                except Exception as e:
                    logger.debug(f"Error training DQN on immediate feedback: {e}")
            # Train CNN model immediately if CNN training is enabled
            if (self.orchestrator and hasattr(self.orchestrator, 'cnn_model') and
                self.orchestrator.cnn_model and getattr(self, 'cnn_training_enabled', True)):
                try:
                    # Create immediate CNN training data
                    cnn_features = self._create_cnn_cob_features(symbol, {
                        'current_snapshot': {'price': signal.get('price', 0), 'imbalance': 0},
                        'history': self.cob_data_history.get(symbol, [])[-10:],
                        'timestamp': datetime.now()
                    })
                    if cnn_features:
                        # For CNN, we can update internal training data or use model-specific training
                        if hasattr(self.orchestrator.cnn_model, 'update_training_data'):
                            self.orchestrator.cnn_model.update_training_data(cnn_features, action, reward)
                        logger.debug(f"CNN immediate training data updated - action: {action}, reward: {reward:.2f}")
                except Exception as e:
                    logger.debug(f"Error training CNN on immediate feedback: {e}")
        except Exception as e:
            logger.debug(f"Error in immediate model training: {e}")
    def _update_confidence_calibration(self, signal: Dict, prediction_correct: bool, price_change_abs: float):
        """Update confidence calibration based on prediction accuracy"""
        try:
            signal_type = signal.get('type', 'unknown')
            signal_confidence = signal.get('confidence', 0.5)
            if signal_type not in self.confidence_calibration:
                return
            calibration = self.confidence_calibration[signal_type]
            # Track total predictions and accuracy
            calibration['total_predictions'] += 1
            if prediction_correct:
                calibration['correct_predictions'] += 1
            # Track accuracy by confidence ranges
            confidence_range = f"{int(signal_confidence * 10) / 10:.1f}"  # 0.0-1.0 in 0.1 increments
            if confidence_range not in calibration['accuracy_by_confidence']:
                calibration['accuracy_by_confidence'][confidence_range] = {
                    'total': 0,
                    'correct': 0,
                    'avg_price_change': 0.0
                }
            range_stats = calibration['accuracy_by_confidence'][confidence_range]
            range_stats['total'] += 1
            if prediction_correct:
                range_stats['correct'] += 1
            range_stats['avg_price_change'] = (
                (range_stats['avg_price_change'] * (range_stats['total'] - 1)) + price_change_abs
            ) / range_stats['total']
            # Update confidence adjustment every 50 predictions
            if calibration['total_predictions'] % 50 == 0:
                self._recalibrate_confidence_levels(signal_type)
        except Exception as e:
            logger.debug(f"Error updating confidence calibration: {e}")
    def _recalibrate_confidence_levels(self, signal_type: str):
        """Recalibrate confidence levels based on historical performance"""
        try:
            calibration = self.confidence_calibration[signal_type]
            accuracy_by_confidence = calibration['accuracy_by_confidence']
            # Calculate expected vs actual accuracy for each confidence range
            total_adjustment = 0.0
            valid_ranges = 0
            for conf_range, stats in accuracy_by_confidence.items():
                if stats['total'] >= 5:  # Need at least 5 predictions for reliable calibration
                    expected_accuracy = float(conf_range)  # Confidence should match accuracy
                    actual_accuracy = stats['correct'] / stats['total']
                    adjustment = actual_accuracy / expected_accuracy if expected_accuracy > 0 else 1.0
                    total_adjustment += adjustment
                    valid_ranges += 1
            if valid_ranges > 0:
                calibration['confidence_adjustment'] = total_adjustment / valid_ranges
                calibration['last_calibration'] = datetime.now()
                logger.info(f"🔧 CONFIDENCE CALIBRATION: {signal_type} adjustment = {calibration['confidence_adjustment']:.3f} "
                           f"(based on {valid_ranges} confidence ranges)")
        except Exception as e:
            logger.debug(f"Error recalibrating confidence levels: {e}")
    def _get_calibrated_confidence(self, signal_type: str, raw_confidence: float) -> float:
        """Get calibrated confidence level based on historical performance"""
        try:
            if signal_type in self.confidence_calibration:
                adjustment = self.confidence_calibration[signal_type]['confidence_adjustment']
                calibrated = raw_confidence * adjustment
                return max(0.0, min(1.0, calibrated))  # Clamp to [0,1]
            return raw_confidence
        except Exception as e:
            logger.debug(f"Error getting calibrated confidence: {e}")
            return raw_confidence
    # This function is used to train all models on a signal
    # ToDo: review this function and make sure it is correct
    def _train_all_models_on_signal(self, signal: Dict):
        """Train ALL models on executed trade signal - Comprehensive training system"""
        try:
@@ -5311,7 +5567,10 @@ class CleanTradingDashboard:
            # Generate signal if imbalance exceeds threshold
            if abs_imbalance > threshold:
                # Calculate more realistic confidence (never exactly 1.0)
-                final_confidence = min(0.95, base_confidence + confidence_boost)
+                raw_confidence = min(0.95, base_confidence + confidence_boost)
                # Apply confidence calibration based on historical performance
                final_confidence = self._get_calibrated_confidence('cob_liquidity_imbalance', raw_confidence)
                signal = {
                    'timestamp': datetime.now(),
@@ -5354,6 +5613,7 @@ class CleanTradingDashboard:
                'history': self.cob_data_history[symbol][-15:],  # Last 15 seconds
                'bucketed_data': self.cob_bucketed_data[symbol],
                'cumulative_imbalance': cumulative_imbalance,
                'cob_imbalance_ma': self.cob_imbalance_ma.get(symbol, {}),  # ✅ ADD MOVING AVERAGES
                'timestamp': cob_snapshot['timestamp'],
                'stats': cob_snapshot.get('stats', {}),
                'bids': cob_snapshot.get('bids', []),
--- a/web/component_manager.py
+++ b/web/component_manager.py
@@ -374,18 +374,16 @@ class DashboardComponentManager:
            html.Div(imbalance_stats_display),
            # COB Imbalance Moving Averages
-            ma_display = []
+            html.Div([
-            if imbalance_ma_data:
+                html.H6("Imbalance MAs", className="mt-3 mb-2 small text-muted text-uppercase"),
-                ma_display.append(html.H6("Imbalance MAs", className="mt-3 mb-2 small text-muted text-uppercase"))
+                *[
-                for timeframe, ma_value in imbalance_ma_data.items():
+                    html.Div([
                    ma_color = "text-success" if ma_value > 0 else "text-danger"
                    ma_text = f"MA {timeframe}: {ma_value:.3f}"
                    ma_display.append(html.Div([
                        html.Strong(f"{timeframe}: ", className="small"),
-                        html.Span(ma_text, className=f"small {ma_color}")
+                        html.Span(f"MA {timeframe}: {ma_value:.3f}", className=f"small {'text-success' if ma_value > 0 else 'text-danger'}")
-                    ], className="mb-1"))
+                    ], className="mb-1")
-
+                    for timeframe, ma_value in (imbalance_ma_data or {}).items()
-            html.Div(ma_display),
+                ]
            ]) if imbalance_ma_data else html.Div(),
            html.Hr(className="my-2"),