logging

2025-07-30 11:40:30 +03:00
parent 6ca19f4536
commit 36f429a0e2
6 changed files with 528 additions and 211 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -58,6 +58,7 @@ from core.extrema_trainer import (
 from utils.inference_logger import get_inference_logger, log_model_inference
 from utils.database_manager import get_database_manager
 from utils.checkpoint_manager import load_best_checkpoint
+from safe_logging import setup_training_logger

 # Import COB integration for real-time market microstructure data
 try:
@@ -300,6 +301,9 @@ class TradingOrchestrator:
        
        logger.info(f"Using device: {self.device}")

+        # Initialize training logger
+        self.training_logger = setup_training_logger()
+
        # Configuration - AGGRESSIVE for more training data
        self.confidence_threshold = self.config.orchestrator.get(
            "confidence_threshold", 0.15
@@ -1412,13 +1416,20 @@ class TradingOrchestrator:
                    ui_state = json.load(f)
                    if "model_toggle_states" in ui_state:
                        self.model_toggle_states.update(ui_state["model_toggle_states"])
+                        # Validate and clean the loaded states
+                        self._validate_model_toggle_states()
                    logger.info(f"UI state loaded from {self.ui_state_file}")
        except Exception as e:
            logger.error(f"Error loading UI state: {e}")
+            # If loading fails, ensure we have valid default states
+            self._validate_model_toggle_states()

    def _save_ui_state(self):
        """Save UI state to file"""
        try:
+            # Validate and clean model toggle states before saving
+            self._validate_model_toggle_states()
+            
            os.makedirs(os.path.dirname(self.ui_state_file), exist_ok=True)
            ui_state = {
                "model_toggle_states": self.model_toggle_states,
@@ -1429,6 +1440,36 @@ class TradingOrchestrator:
            logger.debug(f"UI state saved to {self.ui_state_file}")
        except Exception as e:
            logger.error(f"Error saving UI state: {e}")
+    
+    def _validate_model_toggle_states(self):
+        """Validate and clean model toggle states to ensure proper boolean values"""
+        try:
+            for model_name, toggle_state in self.model_toggle_states.items():
+                if not isinstance(toggle_state, dict):
+                    logger.warning(f"Invalid toggle state for {model_name}, resetting to defaults")
+                    self.model_toggle_states[model_name] = {"inference_enabled": True, "training_enabled": True}
+                    continue
+                
+                # Ensure inference_enabled is boolean
+                if "inference_enabled" in toggle_state:
+                    if not isinstance(toggle_state["inference_enabled"], bool):
+                        logger.warning(f"Invalid inference_enabled value for {model_name}: {toggle_state['inference_enabled']}, setting to True")
+                        toggle_state["inference_enabled"] = True
+                
+                # Ensure training_enabled is boolean
+                if "training_enabled" in toggle_state:
+                    if not isinstance(toggle_state["training_enabled"], bool):
+                        logger.warning(f"Invalid training_enabled value for {model_name}: {toggle_state['training_enabled']}, setting to True")
+                        toggle_state["training_enabled"] = True
+                
+                # Ensure both keys exist
+                if "inference_enabled" not in toggle_state:
+                    toggle_state["inference_enabled"] = True
+                if "training_enabled" not in toggle_state:
+                    toggle_state["training_enabled"] = True
+                    
+        except Exception as e:
+            logger.error(f"Error validating model toggle states: {e}")

    def get_model_toggle_state(self, model_name: str) -> Dict[str, bool]:
        """Get toggle state for a model"""
@@ -2248,7 +2289,7 @@ class TradingOrchestrator:
                        predicted_price_vector = prediction.metadata['price_direction']
                    
                    # Calculate sophisticated reward using the new PnL penalty/reward system
-                    sophisticated_reward, was_correct = self._calculate_sophisticated_reward(
+                    sophisticated_reward, was_correct, should_skip = self._calculate_sophisticated_reward(
                        predicted_action=prediction.action,
                        prediction_confidence=prediction.confidence,
                        price_change_pct=price_change_pct,
@@ -2260,6 +2301,11 @@ class TradingOrchestrator:
                        predicted_price_vector=predicted_price_vector
                    )

+                    # Skip training if this is a neutral action (no position + HOLD)
+                    if should_skip:
+                        logger.debug(f"Skipping training for neutral action: {prediction.action} (no position)")
+                        continue
+                    
                    # Create training record for the new training system
                    training_record = {
                        "symbol": symbol,
@@ -2668,6 +2714,59 @@ class TradingOrchestrator:
            return {}

    def log_model_statistics(self, detailed: bool = False):
+        """Log comprehensive model statistics and performance metrics"""
+        try:
+            self.training_logger.info("=" * 80)
+            self.training_logger.info("COMPREHENSIVE MODEL PERFORMANCE SUMMARY")
+            self.training_logger.info("=" * 80)
+            
+            # Log overall system performance
+            if hasattr(self, 'model_performance'):
+                self.training_logger.info("OVERALL MODEL PERFORMANCE:")
+                for model_name, perf in self.model_performance.items():
+                    accuracy = perf.get('accuracy', 0)
+                    total = perf.get('total', 0)
+                    correct = perf.get('correct', 0)
+                    self.training_logger.info(f"  {model_name.upper()}: {accuracy:.1%} ({correct}/{total})")
+            
+            # Log detailed model statistics
+            if hasattr(self, 'model_statistics'):
+                self.training_logger.info("\nDETAILED MODEL STATISTICS:")
+                for model_name, stats in self.model_statistics.items():
+                    self.training_logger.info(f"  {model_name.upper()}:")
+                    self.training_logger.info(f"    Inferences: {stats.total_inferences}")
+                    self.training_logger.info(f"    Trainings: {stats.total_trainings}")
+                    self.training_logger.info(f"    Current loss: {stats.current_loss:.4f}" if stats.current_loss else "    Current loss: N/A")
+                    self.training_logger.info(f"    Best loss: {stats.best_loss:.4f}" if stats.best_loss else "    Best loss: N/A")
+                    self.training_logger.info(f"    Average loss: {stats.average_loss:.4f}" if stats.average_loss else "    Average loss: N/A")
+                    self.training_logger.info(f"    Inference rate: {stats.inference_rate_per_minute:.1f}/min")
+                    self.training_logger.info(f"    Training rate: {stats.training_rate_per_minute:.1f}/min")
+                    self.training_logger.info(f"    Avg inference time: {stats.average_inference_time_ms:.1f}ms")
+                    self.training_logger.info(f"    Avg training time: {stats.average_training_time_ms:.1f}ms")
+            
+            # Log decision fusion performance
+            if hasattr(self, 'decision_fusion_enabled') and self.decision_fusion_enabled:
+                self.training_logger.info("\nDECISION FUSION PERFORMANCE:")
+                self.training_logger.info(f"  Mode: {getattr(self, 'decision_fusion_mode', 'unknown')}")
+                self.training_logger.info(f"  Decisions made: {getattr(self, 'decision_fusion_decisions_count', 0)}")
+                self.training_logger.info(f"  Training samples: {len(getattr(self, 'decision_fusion_training_data', []))}")
+            
+            # Log enhanced training system status
+            if hasattr(self, 'enhanced_training_system'):
+                self.training_logger.info("\nENHANCED TRAINING SYSTEM:")
+                if self.enhanced_training_system:
+                    stats = self.enhanced_training_system.get_training_statistics()
+                    self.training_logger.info(f"  Status: {'ACTIVE' if stats.get('is_training', False) else 'INACTIVE'}")
+                    self.training_logger.info(f"  Status: {'ACTIVE' if stats.get('is_training', False) else 'INACTIVE'}")
+                    self.training_logger.info(f"  Iteration: {stats.get('training_iteration', 0)}")
+                    self.training_logger.info(f"  Experience buffer: {stats.get('experience_buffer_size', 0)}")
+                else:
+                    self.training_logger.info("  Status: NOT INITIALIZED")
+            
+            self.training_logger.info("=" * 80)
+            
+        except Exception as e:
+            logger.error(f"Error logging comprehensive statistics: {e}")
        """Log current model statistics for monitoring"""
        try:
            if not self.model_statistics:
@@ -3283,7 +3382,7 @@ class TradingOrchestrator:
                    price_outcome = f"Inference: ${inference_price:.2f} ({time_diff_seconds:.1f}s ago) -> Current: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
                else:
                    # For older predictions, use a more conservative approach
-                    price_outcome = f"Inference: ${inference_price:.2f} ({time_diff_seconds:.1f}s ago) -> Current: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
+                    price_outcome = f"Inference: ${inference_price:.2f} ({time_diff_seconds/60:.1f}m ago) -> Current: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
            else:
                # Fall back to historical price comparison if no inference price
                try:
@@ -3367,7 +3466,7 @@ class TradingOrchestrator:
            if "price_direction" in prediction and prediction["price_direction"]:
                predicted_price_vector = prediction["price_direction"]
            
-            reward, _ = self._calculate_sophisticated_reward(
+            reward, _, should_skip = self._calculate_sophisticated_reward(
                predicted_action,
                predicted_confidence,
                actual_price_change_pct,
@@ -3528,7 +3627,7 @@ class TradingOrchestrator:
            if "price_direction" in prediction and prediction["price_direction"]:
                predicted_price_vector = prediction["price_direction"]
            
-            reward, was_correct = self._calculate_sophisticated_reward(
+            reward, was_correct, should_skip = self._calculate_sophisticated_reward(
                predicted_action,
                prediction_confidence,
                price_change_pct,
@@ -3540,6 +3639,11 @@ class TradingOrchestrator:
                predicted_price_vector=predicted_price_vector,
            )

+            # Skip training and accuracy tracking if this is a neutral action (no position + HOLD)
+            if should_skip:
+                logger.debug(f"Skipping training and accuracy tracking for neutral action: {predicted_action} (no position)")
+                return
+            
            # Update model performance tracking
            if model_name not in self.model_performance:
                self.model_performance[model_name] = {
@@ -3592,17 +3696,29 @@ class TradingOrchestrator:
                )

            # Enhanced logging for training evaluation
-            logger.info(f"Training evaluation for {model_name}:")
-            logger.info(
+            self.training_logger.info(f"TRAINING EVALUATION for {model_name.upper()}:")
+            self.training_logger.info(
                f"  Action: {predicted_action} | Confidence: {prediction_confidence:.3f}"
            )
-            logger.info(
+            self.training_logger.info(
                f"  Price change: {price_change_pct:+.3f}% | Time: {time_diff_seconds:.1f}s"
            )
-            logger.info(f"  Reward: {reward:.4f} | Correct: {was_correct}")
-            logger.info(
+            self.training_logger.info(f"  Reward: {reward:.4f} | Correct: {was_correct}")
+            self.training_logger.info(
                f"  Accuracy: {self.model_performance[model_name]['accuracy']:.1%} ({self.model_performance[model_name]['correct']}/{self.model_performance[model_name]['total']})"
            )
+            
+            # Add detailed performance metrics logging
+            if hasattr(self, 'model_statistics') and model_name in self.model_statistics:
+                stats = self.model_statistics[model_name]
+                self.training_logger.info(f"  Model Statistics:")
+                self.training_logger.info(f"    Total inferences: {stats.total_inferences}")
+                self.training_logger.info(f"    Total trainings: {stats.total_trainings}")
+                self.training_logger.info(f"    Current loss: {stats.current_loss:.4f}" if stats.current_loss else "    Current loss: N/A")
+                self.training_logger.info(f"    Best loss: {stats.best_loss:.4f}" if stats.best_loss else "    Best loss: N/A")
+                self.training_logger.info(f"    Average loss: {stats.average_loss:.4f}" if stats.average_loss else "    Average loss: N/A")
+                self.training_logger.info(f"    Inference rate: {stats.inference_rate_per_minute:.1f}/min")
+                self.training_logger.info(f"    Training rate: {stats.training_rate_per_minute:.1f}/min")

            # Train the specific model based on sophisticated outcome
            await self._train_model_on_outcome(
@@ -3647,7 +3763,7 @@ class TradingOrchestrator:
        has_position: bool = None,
        current_position_pnl: float = 0.0,
        predicted_price_vector: dict = None,
-    ) -> tuple[float, bool]:
+    ) -> tuple[float, bool, bool]:
        """
        Calculate sophisticated reward based on prediction accuracy, confidence, and price movement magnitude
        Now considers position status and current P&L when evaluating decisions
@@ -3666,7 +3782,10 @@ class TradingOrchestrator:
            predicted_price_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1)

        Returns:
-            tuple: (reward, was_correct)
+            tuple: (reward, was_correct, should_skip)
+            - reward: The calculated reward value
+            - was_correct: Whether the prediction was correct (True/False)
+            - should_skip: Whether this should be skipped from accuracy calculations and training (True/False)
        """
        try:
            # NOISE REDUCTION: Treat low-confidence signals as HOLD
@@ -3677,10 +3796,11 @@ class TradingOrchestrator:
            
            # FEE-AWARE THRESHOLDS: Account for trading fees (0.05-0.06% per trade, ~0.12% round trip)
            fee_cost = 0.12  # 0.12% round trip fee cost
-            movement_threshold = 0.15  # Minimum movement to be profitable after fees
-            strong_movement_threshold = 0.5  # Strong movements - good profit potential
-            rapid_movement_threshold = 1.0  # Rapid movements - excellent profit potential
-            massive_movement_threshold = 2.0  # Massive movements - extraordinary profit potential
+            pnl_threshold = 0.02  # 0.02% - minimum movement to include in PnL/accuracy calculations
+            movement_threshold = 0.20  # Minimum movement to be profitable after fees (increased from 0.15%)
+            strong_movement_threshold = 0.8  # Strong movements - good profit potential (increased from 0.5%)
+            rapid_movement_threshold = 1.5  # Rapid movements - excellent profit potential (increased from 1.0%)
+            massive_movement_threshold = 3.0  # Massive movements - extraordinary profit potential (increased from 2.0%)

            # Determine current position status if not provided
            if has_position is None and symbol:
@@ -3693,29 +3813,48 @@ class TradingOrchestrator:

            # Determine if prediction was directionally correct
            was_correct = False
+            should_skip = False  # Whether to skip from accuracy calculations and training
            directional_accuracy = 0.0
+            
+            # Check if price movement is significant enough for PnL/accuracy calculation
+            abs_price_change = abs(price_change_pct)
+            include_in_accuracy = abs_price_change >= pnl_threshold
+            
+            # Always check directional correctness for learning, but only include significant moves in accuracy
+            direction_correct = False

            if predicted_action == "BUY":
-                # BUY signals need to overcome fee costs for profitability
-                was_correct = price_change_pct > movement_threshold
+                # Check directional correctness (always for learning)
+                direction_correct = price_change_pct > 0
                
-                # ENHANCED FEE-AWARE REWARD STRUCTURE
-                if price_change_pct > massive_movement_threshold:
-                    # Massive movements (2%+) - EXTRAORDINARY rewards for high confidence
-                    directional_accuracy = price_change_pct * 5.0  # 5x multiplier for massive moves
-                    if prediction_confidence > 0.8:
-                        directional_accuracy *= 2.0  # Additional 2x for high confidence (10x total)
-                elif price_change_pct > rapid_movement_threshold:
-                    # Rapid movements (1%+) - EXCELLENT rewards for high confidence
-                    directional_accuracy = price_change_pct * 3.0  # 3x multiplier for rapid moves
-                    if prediction_confidence > 0.7:
-                        directional_accuracy *= 1.5  # Additional 1.5x for good confidence (4.5x total)
-                elif price_change_pct > strong_movement_threshold:
-                    # Strong movements (0.5%+) - GOOD rewards
-                    directional_accuracy = price_change_pct * 2.0  # 2x multiplier for strong moves
+                # Only consider "correct" for accuracy if movement is significant AND profitable
+                if include_in_accuracy:
+                    was_correct = price_change_pct > movement_threshold
                else:
-                    # Small movements - minimal rewards (fees eat most profit)
-                    directional_accuracy = max(0, (price_change_pct - fee_cost)) * 0.5  # Penalty for fee cost
+                    # Small movement - learn direction but don't include in accuracy
+                    was_correct = None  # Exclude from accuracy calculation
+                
+                # ENHANCED FEE-AWARE REWARD STRUCTURE (only for significant movements)
+                if include_in_accuracy:
+                    if price_change_pct > massive_movement_threshold:
+                        # Massive movements (2%+) - EXTRAORDINARY rewards for high confidence
+                        directional_accuracy = price_change_pct * 5.0  # 5x multiplier for massive moves
+                        if prediction_confidence > 0.8:
+                            directional_accuracy *= 2.0  # Additional 2x for high confidence (10x total)
+                    elif price_change_pct > rapid_movement_threshold:
+                        # Rapid movements (1%+) - EXCELLENT rewards for high confidence
+                        directional_accuracy = price_change_pct * 3.0  # 3x multiplier for rapid moves
+                        if prediction_confidence > 0.7:
+                            directional_accuracy *= 1.5  # Additional 1.5x for good confidence (4.5x total)
+                    elif price_change_pct > strong_movement_threshold:
+                        # Strong movements (0.5%+) - GOOD rewards
+                        directional_accuracy = price_change_pct * 2.0  # 2x multiplier for strong moves
+                    else:
+                        # Small but significant movements - minimal rewards (fees eat most profit)
+                        directional_accuracy = max(0, (price_change_pct - fee_cost)) * 0.5  # Penalty for fee cost
+                else:
+                    # Very small movement - learn direction but minimal reward
+                    directional_accuracy = price_change_pct * 0.1 if direction_correct else -abs(price_change_pct) * 0.1
                    
            elif predicted_action == "SELL":
                # SELL signals need to overcome fee costs for profitability
@@ -3741,41 +3880,86 @@ class TradingOrchestrator:
                    directional_accuracy = max(0, (abs_change - fee_cost)) * 0.5  # Penalty for fee cost
                    
            elif predicted_action == "HOLD":
-                # HOLD evaluation with noise reduction - smaller rewards to reduce training noise
+                # HOLD evaluation with position side awareness - considers LONG vs SHORT positions
                if has_position:
-                    # If we have a position, HOLD evaluation depends on P&L and price movement
+                    # Get position side to properly evaluate HOLD decisions
+                    position_side = self._get_position_side(symbol) if symbol else "LONG"
+                    
                    if current_position_pnl > 0:  # Currently profitable position
-                        # Holding a profitable position is good if price continues favorably
-                        if price_change_pct > 0:  # Price went up while holding profitable position - excellent
-                            was_correct = True
-                            directional_accuracy = price_change_pct * 0.8  # Reduced from 1.5 to reduce noise
-                        elif abs(price_change_pct) < movement_threshold:  # Price stable - good
-                            was_correct = True
-                            directional_accuracy = movement_threshold * 0.5  # Reduced reward to reduce noise
-                        else:  # Price dropped while holding profitable position - still okay but less reward
-                            was_correct = True
-                            directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.3)
+                        if position_side == "LONG":
+                            # For LONG positions: HOLD is good if price goes up or stays stable
+                            if price_change_pct > 0:  # Price went up - excellent hold
+                                was_correct = True
+                                directional_accuracy = price_change_pct * 0.8
+                            elif abs(price_change_pct) < movement_threshold:  # Price stable - good hold
+                                was_correct = True
+                                directional_accuracy = movement_threshold * 0.5
+                            else:  # Price dropped - still okay but less reward
+                                was_correct = True
+                                directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.3)
+                        elif position_side == "SHORT":
+                            # For SHORT positions: HOLD is good if price goes down or stays stable
+                            if price_change_pct < 0:  # Price went down - excellent hold
+                                was_correct = True
+                                directional_accuracy = abs(price_change_pct) * 0.8
+                            elif abs(price_change_pct) < movement_threshold:  # Price stable - good hold
+                                was_correct = True
+                                directional_accuracy = movement_threshold * 0.5
+                            else:  # Price went up - still okay but less reward
+                                was_correct = True
+                                directional_accuracy = max(0, (current_position_pnl / 100.0) - abs(price_change_pct) * 0.3)
+                        else:
+                            # Unknown position side - fallback to general logic
+                            if abs(price_change_pct) < movement_threshold:
+                                was_correct = True
+                                directional_accuracy = movement_threshold * 0.4
+                            else:
+                                was_correct = False
+                                directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
+                                
                    elif current_position_pnl < 0:  # Currently losing position
-                        # Holding a losing position is generally bad - should consider closing
-                        if price_change_pct > movement_threshold:  # Price recovered - good hold
-                            was_correct = True
-                            directional_accuracy = price_change_pct * 0.6  # Reduced reward
-                        else:  # Price continued down or stayed flat - bad hold
-                            was_correct = False
-                            # Penalty proportional to loss magnitude
-                            directional_accuracy = abs(current_position_pnl / 100.0) * 0.3  # Reduced penalty
+                        if position_side == "LONG":
+                            # For LONG positions: HOLD is good if price recovers (goes up)
+                            if price_change_pct > movement_threshold:  # Price recovered - good hold
+                                was_correct = True
+                                directional_accuracy = price_change_pct * 0.6
+                            else:  # Price continued down or stayed flat - bad hold
+                                was_correct = False
+                                directional_accuracy = abs(current_position_pnl / 100.0) * 0.3
+                        elif position_side == "SHORT":
+                            # For SHORT positions: HOLD is good if price recovers (goes down)
+                            if price_change_pct < -movement_threshold:  # Price recovered - good hold
+                                was_correct = True
+                                directional_accuracy = abs(price_change_pct) * 0.6
+                            else:  # Price continued up or stayed flat - bad hold
+                                was_correct = False
+                                directional_accuracy = abs(current_position_pnl / 100.0) * 0.3
+                        else:
+                            # Unknown position side - fallback to general logic
+                            if abs(price_change_pct) > movement_threshold:
+                                was_correct = True
+                                directional_accuracy = abs(price_change_pct) * 0.6
+                            else:
+                                was_correct = False
+                                directional_accuracy = abs(current_position_pnl / 100.0) * 0.3
+                                
                    else:  # Breakeven position
                        # Standard HOLD evaluation for breakeven positions
                        if abs(price_change_pct) < movement_threshold:  # Price stable - good
                            was_correct = True
-                            directional_accuracy = movement_threshold * 0.4  # Reduced reward
+                            directional_accuracy = movement_threshold * 0.4
                        else:  # Price moved significantly - missed opportunity
                            was_correct = False
                            directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.5
                else:
-                    # If we don't have a position, HOLD is correct if price stayed relatively stable
-                    was_correct = abs(price_change_pct) < movement_threshold
-                    directional_accuracy = max(0, movement_threshold - abs(price_change_pct)) * 0.4  # Reduced reward
+                    # If we don't have a position, HOLD should be skipped from accuracy calculations and training
+                    # No position + HOLD = NEUTRAL (no action taken, no profit/loss)
+                    was_correct = None  # Not applicable
+                    should_skip = True  # Skip from accuracy calculations and training
+                    directional_accuracy = 0.0  # No reward/penalty for neutral action
+                    # Force reward to 0.0 for NEUTRAL actions
+                    final_reward = 0.0
+                    return final_reward, was_correct, should_skip

            # Calculate FEE-AWARE magnitude-based multiplier (aggressive rewards for profitable movements)
            abs_movement = abs(price_change_pct)
@@ -3877,7 +4061,7 @@ class TradingOrchestrator:
            # Clamp reward to reasonable range
            final_reward = max(-5.0, min(5.0, final_reward))

-            return final_reward, was_correct
+            return final_reward, was_correct, should_skip

        except Exception as e:
            logger.error(f"Error calculating sophisticated reward: {e}")
@@ -3887,14 +4071,23 @@ class TradingOrchestrator:
            if predicted_action == "HOLD" and has_position:
                # If holding a position, HOLD is correct if price didn't drop significantly
                simple_correct = price_change_pct > -0.2  # Allow small losses while holding
+                should_skip = False
+            elif predicted_action == "HOLD" and not has_position:
+                # No position + HOLD = NEUTRAL, should be skipped
+                simple_correct = None
+                should_skip = True
+                # Force reward to 0.0 for NEUTRAL actions
+                return 0.0, simple_correct, should_skip
            else:
                # Standard evaluation for other cases
                simple_correct = (
                    (predicted_action == "BUY" and price_change_pct > 0.1)
                    or (predicted_action == "SELL" and price_change_pct < -0.1)
-                    or (predicted_action == "HOLD" and abs(price_change_pct) < 0.1)
                )
-            return (1.0 if simple_correct else -0.5, simple_correct)
+                should_skip = False
+            
+            simple_reward = 1.0 if simple_correct else -0.5 if simple_correct is not None else 0.0
+            return simple_reward, simple_correct, should_skip

    def _calculate_price_vector_loss(
        self,
@@ -4138,7 +4331,7 @@ class TradingOrchestrator:
                # Extract price vector from record if available
                predicted_price_vector = record.get("price_direction") or record.get("predicted_price_vector")
                
-                sophisticated_reward, _ = self._calculate_sophisticated_reward(
+                sophisticated_reward, _, should_skip = self._calculate_sophisticated_reward(
                    record.get("action", "HOLD"),
                    record.get("confidence", 0.5),
                    price_change_pct,
@@ -4149,6 +4342,11 @@ class TradingOrchestrator:
                    predicted_price_vector=predicted_price_vector,
                )
                
+                # Skip training if this is a neutral action (no position + HOLD)
+                if should_skip:
+                    logger.debug(f"Skipping training for neutral action: {record.get('action', 'HOLD')} (no position)")
+                    return
+                
                # Calculate price vector training loss if we have vector predictions
                if predicted_price_vector:
                    vector_loss = self._calculate_price_vector_loss(
@@ -4346,17 +4544,21 @@ class TradingOrchestrator:
                    next_state=state,  # Simplified - using same state
                    done=True,
                )
-                logger.debug(
-                    f"Added experience to {model_name}: action={prediction['action']}, reward={reward:.3f}"
-                )
+                logger.info(f"RL EXPERIENCE ADDED to {model_name.upper()}:")
+                logger.info(f"  Action: {prediction['action']} (index: {action_idx})")
+                logger.info(f"  Reward: {reward:.3f}")
+                logger.info(f"  State shape: {state.shape}")
+                logger.info(f"  Memory size: {memory_size}")

                # Trigger training if enough experiences
                memory_size = len(getattr(model, "memory", []))
                batch_size = getattr(model, "batch_size", 32)
                if memory_size >= batch_size:
-                    logger.debug(
-                        f"Training {model_name} with {memory_size} experiences"
-                    )
+                                            self.training_logger.info(f"RL TRAINING STARTED for {model_name.upper()}:")
+                        self.training_logger.info(f"  Experiences: {memory_size}")
+                        self.training_logger.info(f"  Batch size: {batch_size}")
+                        self.training_logger.info(f"  Action: {prediction['action']}")
+                        self.training_logger.info(f"  Reward: {reward:.3f}")

                    # Ensure model is in training mode
                    if hasattr(model, "policy_net"):
@@ -4371,9 +4573,13 @@ class TradingOrchestrator:
                        self._update_model_training_statistics(
                            model_name, training_loss, training_duration_ms
                        )
-                        logger.debug(
-                            f"RL training completed for {model_name}: loss={training_loss:.4f}, time={training_duration_ms:.1f}ms"
-                        )
+                        self.training_logger.info(f"RL TRAINING COMPLETED for {model_name.upper()}:")
+                        self.training_logger.info(f"  Loss: {training_loss:.4f}")
+                        self.training_logger.info(f"  Training time: {training_duration_ms:.1f}ms")
+                        self.training_logger.info(f"  Experiences used: {memory_size}")
+                        self.training_logger.info(f"  Action: {prediction['action']}")
+                        self.training_logger.info(f"  Reward: {reward:.3f}")
+                        self.training_logger.info(f"  State shape: {state.shape}")
                        return True
                    elif training_loss == 0.0:
                        logger.warning(
@@ -4675,9 +4881,22 @@ class TradingOrchestrator:
                        model_name, current_loss, training_duration_ms
                    )

-                    logger.debug(
-                        f"CNN direct training completed: loss={current_loss:.4f}, time={training_duration_ms:.1f}ms"
+                    self.training_logger.info(
+                        f"CNN DIRECT TRAINING COMPLETED:"
                    )
+                    self.training_logger.info(f"  Model: {model_name}")
+                    self.training_logger.info(f"  Loss: {current_loss:.4f}")
+                    self.training_logger.info(f"  Training time: {training_duration_ms:.1f}ms")
+                    self.training_logger.info(f"  Action: {actual_action}")
+                    self.training_logger.info(f"  Reward: {reward:.4f}")
+                    self.training_logger.info(f"  Symbol: {symbol}")
+                    
+                    # Log detailed loss breakdown
+                    if 'price_direction_loss' in locals():
+                        self.training_logger.info(f"  Price direction loss: {price_direction_loss:.4f}")
+                    if 'extrema_loss' in locals():
+                        logger.info(f"  Extrema loss: {extrema_loss:.4f}")
+                    logger.info(f"  Total loss: {total_loss:.4f}")
                    
                    # Trigger long-term training on stored inference records
                    if hasattr(self.cnn_model, "train_on_stored_records") and hasattr(self, "cnn_optimizer"):
@@ -4708,7 +4927,12 @@ class TradingOrchestrator:
                                # Train on stored records
                                long_term_loss = self.cnn_model.train_on_stored_records(self.cnn_optimizer, min_records=5)
                                if long_term_loss > 0:
-                                    logger.debug(f"CNN long-term training completed: loss={long_term_loss:.4f}")
+                                    self.training_logger.info(f"CNN LONG-TERM TRAINING COMPLETED:")
+                                    self.training_logger.info(f"  Long-term loss: {long_term_loss:.4f}")
+                                    self.training_logger.info(f"  Records processed: {len(self.cnn_model.inference_records)}")
+                                    self.training_logger.info(f"  Price change: {price_change_pct:+.3f}%")
+                                    self.training_logger.info(f"  Current price: ${current_price:.2f}")
+                                    self.training_logger.info(f"  Inference price: ${inference_price:.2f}")
                        except Exception as e:
                            logger.debug(f"Error in CNN long-term training: {e}")
                    
@@ -4722,9 +4946,10 @@ class TradingOrchestrator:
                symbol = record.get("symbol", "ETH/USDT")
                actual_action = prediction["action"]
                model.add_training_sample(symbol, actual_action, reward)
-                logger.debug(
-                    f"Added training sample to {model_name}: action={actual_action}, reward={reward:.3f}"
-                )
+                logger.info(f"TRAINING SAMPLE ADDED to {model_name.upper()}:")
+                logger.info(f"  Action: {actual_action}")
+                logger.info(f"  Reward: {reward:.3f}")
+                logger.info(f"  Symbol: {symbol}")

                # If model has train method, trigger training
                if hasattr(model, "train") and callable(getattr(model, "train")):
@@ -4741,9 +4966,30 @@ class TradingOrchestrator:
                            self._update_model_training_statistics(
                                model_name, current_loss, training_duration_ms
                            )
-                            logger.debug(
-                                f"Model {model_name} training completed: loss={current_loss:.4f}"
-                            )
+                            self.training_logger.info(f"MODEL TRAINING COMPLETED for {model_name.upper()}:")
+                            self.training_logger.info(f"  Loss: {current_loss:.4f}")
+                            self.training_logger.info(f"  Training time: {training_duration_ms:.1f}ms")
+                            self.training_logger.info(f"  Action: {actual_action}")
+                            self.training_logger.info(f"  Reward: {reward:.3f}")
+                            self.training_logger.info(f"  Symbol: {symbol}")
+                            
+                            # Log additional training metrics if available
+                            if "accuracy" in training_results:
+                                self.training_logger.info(f"  Accuracy: {training_results['accuracy']:.4f}")
+                            if "epochs" in training_results:
+                                self.training_logger.info(f"  Epochs: {training_results['epochs']}")
+                            if "samples" in training_results:
+                                self.training_logger.info(f"  Samples: {training_results['samples']}")
+                            
+                            # Periodic comprehensive logging (every 10th training)
+                            if hasattr(self, '_training_count'):
+                                self._training_count += 1
+                            else:
+                                self._training_count = 1
+                            
+                            if self._training_count % 10 == 0:
+                                self.training_logger.info("PERIODIC COMPREHENSIVE TRAINING LOG:")
+                                self.log_model_statistics(detailed=True)
                        else:
                            self._update_model_training_statistics(
                                model_name, training_duration_ms=training_duration_ms
@@ -7169,6 +7415,19 @@ class TradingOrchestrator:
        except Exception:
            return False

+    def _get_position_side(self, symbol: str) -> Optional[str]:
+        """Get the side of the current position (LONG/SHORT) or None if no position"""
+        try:
+            if self.trading_executor and hasattr(
+                self.trading_executor, "get_current_position"
+            ):
+                position = self.trading_executor.get_current_position(symbol)
+                if position and position.get("size", 0) > 0:
+                    return position.get("side", "LONG").upper()
+            return None
+        except Exception:
+            return None
+


    def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position):