"""
Trading Orchestrator - Main Decision Making Module

CRITICAL POLICY: NO SYNTHETIC DATA ALLOWED
This module MUST ONLY use real market data from exchanges.
NEVER use np.random.*, mock/fake/synthetic data, or placeholder values.
If data is unavailable: return None/0/empty, log errors, raise exceptions.
See: reports/REAL_MARKET_DATA_POLICY.md

This is the core orchestrator that:
1. Coordinates CNN and RL modules via model registry
2. Combines their outputs with confidence weighting
3. Makes final trading decisions (BUY/SELL/HOLD)
4. Manages the learning loop between components
5. Ensures memory efficiency (8GB constraint)
6. Provides real-time COB (Change of Bid) data for models
7. Integrates EnhancedRealtimeTrainingSystem for continuous learning
"""

import asyncio
import logging
import time
import threading
<<<<<<< HEAD
=======
import numpy as np
import pandas as pd
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple, Union, Deque
from dataclasses import dataclass, field
from collections import deque
import json

# Try to import optional dependencies
try:
    import numpy as np
    HAS_NUMPY = True
except ImportError:
    np = None
    HAS_NUMPY = False

try:
    import pandas as pd
    HAS_PANDAS = True
except ImportError:
    pd = None
    HAS_PANDAS = False

import os
import shutil

# Try to import PyTorch
try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    HAS_TORCH = True
except ImportError:
    torch = None
    nn = None
    optim = None
    HAS_TORCH = False

# Text export integration
from .text_export_integration import TextExportManager
from .llm_proxy import LLMProxy, LLMConfig
import pandas as pd
from pathlib import Path

from .config import get_config
from .data_provider import DataProvider
from .universal_data_adapter import UniversalDataAdapter, UniversalDataStream
<<<<<<< HEAD
from NN.training.model_manager import create_model_manager, ModelManager, ModelMetrics, CheckpointMetadata
from NN.models.model_interfaces import ModelInterface, CNNModelInterface, RLAgentInterface, ExtremaTrainerInterface # Import from new file
from NN.models.cob_rl_model import COBRLModelInterface # Specific import for COB RL Interface
from core.extrema_trainer import ExtremaTrainer # Import ExtremaTrainer for its interface
=======
from models import (
    get_model_registry,
    ModelInterface,
    CNNModelInterface,
    RLAgentInterface,
    ModelRegistry,
)
from NN.models.cob_rl_model import (
    COBRLModelInterface,
)  # Specific import for COB RL Interface
from NN.models.model_interfaces import (
    ModelInterface as NNModelInterface,
    CNNModelInterface as NNCNNModelInterface,
    RLAgentInterface as NNRLAgentInterface,
    ExtremaTrainerInterface as NNExtremaTrainerInterface,
)  # Import from new file
from core.extrema_trainer import (
    ExtremaTrainer,
)  # Import ExtremaTrainer for its interface

# Import new logging and database systems
from utils.inference_logger import get_inference_logger, log_model_inference
from utils.database_manager import get_database_manager
from utils.checkpoint_manager import load_best_checkpoint
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b

# Import COB integration for real-time market microstructure data
try:
    from .cob_integration import COBIntegration
    from .multi_exchange_cob_provider import COBSnapshot

    COB_INTEGRATION_AVAILABLE = True
except ImportError:
    COB_INTEGRATION_AVAILABLE = False
    COBIntegration = None
    COBSnapshot = None

# Import EnhancedRealtimeTrainingSystem (support multiple locations)
try:
    # Preferred location under NN/training
    from NN.training.enhanced_realtime_training import EnhancedRealtimeTrainingSystem  # type: ignore
    ENHANCED_TRAINING_AVAILABLE = True
except Exception:
    try:
        # Fallback flat import
        from enhanced_realtime_training import EnhancedRealtimeTrainingSystem  # type: ignore
        ENHANCED_TRAINING_AVAILABLE = True
    except Exception:
        # Dynamic sys.path injection as last resort
        try:
            import sys, os
            current_dir = os.path.dirname(os.path.abspath(__file__))
            nn_training_dir = os.path.normpath(os.path.join(current_dir, "..", "NN", "training"))
            if nn_training_dir not in sys.path:
                sys.path.insert(0, nn_training_dir)
            from enhanced_realtime_training import EnhancedRealtimeTrainingSystem  # type: ignore
            ENHANCED_TRAINING_AVAILABLE = True
        except Exception:
            EnhancedRealtimeTrainingSystem = None  # type: ignore
            ENHANCED_TRAINING_AVAILABLE = False
            logging.warning(
                "EnhancedRealtimeTrainingSystem not found. Real-time training features will be disabled."
            )

logger = logging.getLogger(__name__)


@dataclass
class Prediction:
    """Represents a prediction from a model"""

    action: str  # 'BUY', 'SELL', 'HOLD'
    confidence: float  # 0.0 to 1.0
    probabilities: Dict[str, float]  # Probabilities for each action
    timeframe: str  # Timeframe this prediction is for
    timestamp: datetime
    model_name: str  # Name of the model that made this prediction
    metadata: Optional[Dict[str, Any]] = None  # Additional model-specific data


@dataclass
class ModelStatistics:
    """Statistics for tracking model performance and inference metrics"""

    model_name: str
    last_inference_time: Optional[datetime] = None
    last_training_time: Optional[datetime] = None
    total_inferences: int = 0
    total_trainings: int = 0
    inference_rate_per_minute: float = 0.0
    inference_rate_per_second: float = 0.0
    training_rate_per_minute: float = 0.0
    training_rate_per_second: float = 0.0
    average_inference_time_ms: float = 0.0
    average_training_time_ms: float = 0.0
    current_loss: Optional[float] = None
    average_loss: Optional[float] = None
    best_loss: Optional[float] = None
    worst_loss: Optional[float] = None
    accuracy: Optional[float] = None
    last_prediction: Optional[str] = None
    last_confidence: Optional[float] = None
    inference_times: deque = field(
        default_factory=lambda: deque(maxlen=100)
    )  # Last 100 inference times
    training_times: deque = field(
        default_factory=lambda: deque(maxlen=100)
    )  # Last 100 training times
    inference_durations_ms: deque = field(
        default_factory=lambda: deque(maxlen=100)
    )  # Last 100 inference durations
    training_durations_ms: deque = field(
        default_factory=lambda: deque(maxlen=100)
    )  # Last 100 training durations
    losses: deque = field(default_factory=lambda: deque(maxlen=100))  # Last 100 losses
    predictions_history: deque = field(
        default_factory=lambda: deque(maxlen=50)
    )  # Last 50 predictions

    def update_inference_stats(
        self,
        prediction: Optional[Prediction] = None,
        loss: Optional[float] = None,
        inference_duration_ms: Optional[float] = None,
    ):
        """Update inference statistics"""
        current_time = datetime.now()

        # Update inference timing
        self.last_inference_time = current_time
        self.total_inferences += 1
        self.inference_times.append(current_time)

        # Update inference duration
        if inference_duration_ms is not None:
            self.inference_durations_ms.append(inference_duration_ms)
            if self.inference_durations_ms:
                self.average_inference_time_ms = sum(self.inference_durations_ms) / len(
                    self.inference_durations_ms
                )

        # Calculate inference rates
        if len(self.inference_times) > 1:
            time_window = (
                self.inference_times[-1] - self.inference_times[0]
            ).total_seconds()
            if time_window > 0:
                self.inference_rate_per_second = len(self.inference_times) / time_window
                self.inference_rate_per_minute = self.inference_rate_per_second * 60

        # Update prediction stats
        if prediction:
            self.last_prediction = prediction.action
            self.last_confidence = prediction.confidence
            self.predictions_history.append(
                {
                    "action": prediction.action,
                    "confidence": prediction.confidence,
                    "timestamp": prediction.timestamp,
                }
            )

        # Update loss stats
        if loss is not None:
            self.current_loss = loss
            self.losses.append(loss)

            if self.losses:
                self.average_loss = sum(self.losses) / len(self.losses)
                self.best_loss = (
                    min(self.losses)
                    if self.best_loss is None
                    else min(self.best_loss, loss)
                )
                self.worst_loss = (
                    max(self.losses)
                    if self.worst_loss is None
                    else max(self.worst_loss, loss)
                )

    def update_training_stats(
        self, loss: Optional[float] = None, training_duration_ms: Optional[float] = None
    ):
        """Update training statistics"""
        current_time = datetime.now()

        # Update training timing
        self.last_training_time = current_time
        self.total_trainings += 1
        self.training_times.append(current_time)

        # Update training duration
        if training_duration_ms is not None:
            self.training_durations_ms.append(training_duration_ms)
            if self.training_durations_ms:
                self.average_training_time_ms = sum(self.training_durations_ms) / len(
                    self.training_durations_ms
                )

        # Calculate training rates
        if len(self.training_times) > 1:
            time_window = (
                self.training_times[-1] - self.training_times[0]
            ).total_seconds()
            if time_window > 0:
                self.training_rate_per_second = len(self.training_times) / time_window
                self.training_rate_per_minute = self.training_rate_per_second * 60

        # Update loss stats
        if loss is not None:
            self.current_loss = loss
            self.losses.append(loss)

            if self.losses:
                self.average_loss = sum(self.losses) / len(self.losses)
                self.best_loss = (
                    min(self.losses)
                    if self.best_loss is None
                    else min(self.best_loss, loss)
                )
                self.worst_loss = (
                    max(self.losses)
                    if self.worst_loss is None
                    else max(self.worst_loss, loss)
                )


@dataclass
class TradingDecision:
    """Final trading decision from the orchestrator"""

    action: str  # 'BUY', 'SELL', 'HOLD'
    confidence: float  # Combined confidence
    symbol: str
    price: float
    timestamp: datetime
    reasoning: Dict[str, Any]  # Why this decision was made
    memory_usage: Dict[str, int]  # Memory usage of models
    source: str = "orchestrator"  # Source of the decision (model name or system)
    # NEW: Aggressiveness parameters
    entry_aggressiveness: float = 0.5  # 0.0 = conservative, 1.0 = very aggressive
    exit_aggressiveness: float = 0.5  # 0.0 = conservative, 1.0 = very aggressive
    current_position_pnl: float = 0.0  # Current open position P&L for RL feedback


class TradingOrchestrator:
    """
    Enhanced Trading Orchestrator with full ML and COB integration
    Coordinates CNN, DQN, and COB models for advanced trading decisions
    Features real-time COB (Change of Bid) data for market microstructure data
    Includes EnhancedRealtimeTrainingSystem for continuous learning
    """
<<<<<<< HEAD
    
    def __init__(self, data_provider: Optional[DataProvider] = None, enhanced_rl_training: bool = True, model_manager: Optional[ModelManager] = None):
=======

    def __init__(
        self,
        data_provider: Optional[DataProvider] = None,
        enhanced_rl_training: bool = True,
        model_registry: Optional[ModelRegistry] = None,
    ):
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Initialize the enhanced orchestrator with full ML capabilities"""
        self.config = get_config()
        self.data_provider = data_provider or DataProvider()
        self.universal_adapter = UniversalDataAdapter(self.data_provider)
        self.model_manager = model_manager or create_model_manager()
        self.enhanced_rl_training = enhanced_rl_training

        # Determine the device to use (GPU if available, else CPU)
        # Initialize device - force CPU mode to avoid CUDA errors
        if torch.cuda.is_available():
            try:
                # Test CUDA availability
                test_tensor = torch.tensor([1.0]).cuda()
                self.device = torch.device("cuda")
                logger.info("CUDA device initialized successfully")
            except Exception as e:
                logger.warning(f"CUDA initialization failed: {e}, falling back to CPU")
                self.device = torch.device("cpu")
        else:
            self.device = torch.device("cpu")
        
        logger.info(f"Using device: {self.device}")

        # Canonical model name aliases to eliminate ambiguity across UI/DB/FS
        # Canonical → accepted aliases (internal/legacy)
        self.model_name_aliases: Dict[str, list] = {
            "DQN": ["dqn_agent", "dqn"],
            "CNN": ["enhanced_cnn", "cnn", "cnn_model", "standardized_cnn"],
            "EXTREMA": ["extrema_trainer", "extrema"],
            "COB": ["cob_rl_model", "cob_rl"],
            "DECISION": ["decision_fusion", "decision"],
        }

        # Recent inference buffer for vector supervision (configurable length)
        self.recent_inference_maxlen: int = self.config.orchestrator.get(
            "recent_inference_buffer", 10
        )
        # Model name -> deque of recent inference records
        self.recent_inferences: Dict[str, Deque[Dict]] = {}

        # Configuration - AGGRESSIVE for more training data
<<<<<<< HEAD
        self.confidence_threshold = self.config.orchestrator.get('confidence_threshold', 0.15)  # Lowered from 0.20
        self.confidence_threshold_close = self.config.orchestrator.get('confidence_threshold_close', 0.08)  # Lowered from 0.10
        self.decision_frequency = self.config.orchestrator.get('decision_frequency', 5)
        self.symbols = self.config.get('symbols', ['ETH/USDT'])  # Enhanced to support multiple symbols
        
=======
        self.confidence_threshold = self.config.orchestrator.get(
            "confidence_threshold", 0.15
        )  # Lowered from 0.20
        self.confidence_threshold_close = self.config.orchestrator.get(
            "confidence_threshold_close", 0.08
        )  # Lowered from 0.10
        # Decision frequency limit to prevent excessive trading
        self.decision_frequency = self.config.orchestrator.get("decision_frequency", 30)

        self.symbol = self.config.get(
            "symbol", "ETH/USDT"
        )  # main symbol we wre trading and making predictions on. only one!
        self.ref_symbols = self.config.get(
            "ref_symbols", ["BTC/USDT"]
        )  # Enhanced to support multiple reference symbols. ToDo: we can add 'SOL/USDT' later

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        # NEW: Aggressiveness parameters
        self.entry_aggressiveness = self.config.orchestrator.get(
            "entry_aggressiveness", 0.5
        )  # 0.0 = conservative, 1.0 = very aggressive
        self.exit_aggressiveness = self.config.orchestrator.get(
            "exit_aggressiveness", 0.5
        )  # 0.0 = conservative, 1.0 = very aggressive

        # Position tracking for P&L feedback
        self.current_positions: Dict[str, Dict] = (
            {}
        )  # {symbol: {side, size, entry_price, entry_time, pnl}}
        self.trading_executor = None  # Will be set by dashboard or external system
<<<<<<< HEAD
        
        # Model management delegated to unified ModelManager
        # self.model_weights and self.model_performance are now handled by self.model_manager

        # State tracking
        self.last_decision_time: Dict[str, datetime] = {}  # {symbol: datetime}
        self.recent_decisions: Dict[str, List[TradingDecision]] = {}    # {symbol: List[TradingDecision]}
        
=======

        # Dashboard reference for callbacks
        self.dashboard = None

        # Real-time processing state
        self.realtime_processing = False
        self.realtime_processing_task = None
        self.running = False
        self.trade_loop_task = None

        # Dynamic weights (will be adapted based on performance)
        self.model_weights: Dict[str, float] = {}  # {model_name: weight}
        self._initialize_default_weights()

        # State tracking
        self.last_decision_time: Dict[str, datetime] = {}  # {symbol: datetime}
        self.recent_decisions: Dict[str, List[TradingDecision]] = (
            {}
        )  # {symbol: List[TradingDecision]}
        self.model_performance: Dict[str, Dict[str, Any]] = (
            {}
        )  # {model_name: {'correct': int, 'total': int, 'accuracy': float}}

        # Model statistics tracking
        self.model_statistics: Dict[str, ModelStatistics] = (
            {}
        )  # {model_name: ModelStatistics}

        # Signal rate limiting to prevent spam
        self.last_signal_time: Dict[str, Dict[str, datetime]] = (
            {}
        )  # {symbol: {action: datetime}}
        self.min_signal_interval = timedelta(
            seconds=30
        )  # Minimum 30 seconds between same signals
        self.last_confirmed_signal: Dict[str, Dict[str, Any]] = (
            {}
        )  # {symbol: {action, timestamp, confidence}}

        # Decision fusion overconfidence tracking
        self.decision_fusion_overconfidence_count = 0
        self.max_overconfidence_threshold = 3  # Disable after 3 overconfidence detections

        # Signal accumulation for trend confirmation
        self.signal_accumulator: Dict[str, List[Dict]] = (
            {}
        )  # {symbol: List[signal_data]}
        self.required_confirmations = 3  # Number of consistent signals needed
        self.signal_timeout_seconds = 30  # Signals expire after 30 seconds

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        # Model prediction tracking for dashboard visualization
        self.recent_dqn_predictions: Dict[str, deque] = (
            {}
        )  # {symbol: List[Dict]} - Recent DQN predictions
        self.recent_cnn_predictions: Dict[str, deque] = (
            {}
        )  # {symbol: List[Dict]} - Recent CNN predictions
        self.prediction_accuracy_history: Dict[str, deque] = (
            {}
        )  # {symbol: List[Dict]} - Prediction accuracy tracking

        # Initialize prediction tracking for the primary trading symbol only
        self.recent_dqn_predictions[self.symbol] = deque(maxlen=100)
        self.recent_cnn_predictions[self.symbol] = deque(maxlen=50)
        self.prediction_accuracy_history[self.symbol] = deque(maxlen=200)
        self.signal_accumulator[self.symbol] = []

        # Decision callbacks
        self.decision_callbacks: List[Any] = []

        # ENHANCED: Decision Fusion System - Built into orchestrator (no separate file needed!)
        self.decision_fusion_enabled: bool = True
        self.decision_fusion_network: Any = None
        self.fusion_training_history: List[Any] = []
        self.last_fusion_inputs: Dict[str, Any] = (
            {}
        )

        # Model toggle states - control which models contribute to decisions
        self.model_toggle_states = {
            "dqn": {"inference_enabled": True, "training_enabled": True, "routing_enabled": True},
            "cnn": {"inference_enabled": True, "training_enabled": True, "routing_enabled": True},
            "cob_rl": {"inference_enabled": True, "training_enabled": True, "routing_enabled": True},
            "decision_fusion": {"inference_enabled": True, "training_enabled": True, "routing_enabled": True},
            "transformer": {"inference_enabled": True, "training_enabled": True, "routing_enabled": True},
        }
        
        # UI state persistence
        self.ui_state_file = "data/ui_state.json"
        self._load_ui_state()  # Fix: Explicitly initialize as dictionary
        self.fusion_checkpoint_frequency: int = 50  # Save every 50 decisions
        self.fusion_decisions_count: int = 0
        self.fusion_training_data: List[Any] = (
            []
        )  # Store training examples for decision model

        # Use data provider directly for BaseDataInput building (optimized)

        # COB Integration - Real-time market microstructure data
        self.cob_integration = (
            None  # Will be set to COBIntegration instance if available
        )
        self.latest_cob_data: Dict[str, Any] = {}  # {symbol: COBSnapshot}
        self.latest_cob_features: Dict[str, Any] = (
            {}
        )  # {symbol: np.ndarray} - CNN features
        self.latest_cob_state: Dict[str, Any] = (
            {}
        )  # {symbol: np.ndarray} - DQN state features
        self.cob_feature_history: Dict[str, List[Any]] = {
            self.symbol: []
        }  # Rolling history for primary trading symbol

        # Enhanced ML Models
        self.rl_agent: Any = None  # DQN Agent
        self.cnn_model: Any = None  # CNN Model for pattern recognition
        self.extrema_trainer: Any = None  # Extrema/pivot trainer
        self.primary_transformer: Any = None  # Transformer model
        self.primary_transformer_trainer: Any = None  # Transformer model trainer
        self.transformer_checkpoint_info: Dict[str, Any] = (
            {}
        )  # Transformer checkpoint info
        self.cob_rl_agent: Any = None  # COB RL Agent
        self.decision_model: Any = None  # Decision Fusion model

        self.latest_cnn_features: Dict[str, Any] = {}  # CNN hidden features
        self.latest_cnn_predictions: Dict[str, Any] = {}  # CNN predictions

        # Enhanced RL features
        self.sensitivity_learning_queue: List[Any] = []  # For outcome-based learning
        self.perfect_move_buffer: List[Any] = []  # Buffer for perfect move analysis
        self.position_status: Dict[str, Any] = {}  # Current positions

        # Real-time processing with error handling
        self.realtime_processing: bool = False
        self.realtime_tasks: List[Any] = []
        self.failed_tasks: List[Any] = []  # Track failed tasks for debugging

        # Training tracking
        self.last_trained_symbols: Dict[str, datetime] = {}

        # SIMPLIFIED INFERENCE DATA STORAGE - Single last inference per model
        self.last_inference: Dict[str, Dict] = {}  # {model_name: last_inference_record}

        # Initialize inference logger
        self.inference_logger = get_inference_logger()
        self.db_manager = get_database_manager()

        # ENHANCED: Real-time Training System Integration
        self.enhanced_training_system = (
            None  # Will be set to EnhancedRealtimeTrainingSystem if available
        )
        # Enable training by default - don't depend on external training system
        self.training_enabled: bool = enhanced_rl_training

        logger.info(
            "Enhanced TradingOrchestrator initialized with full ML capabilities"
        )
        logger.info(f"Enhanced RL training: {enhanced_rl_training}")
        logger.info(
            f"Real-time training system available: {ENHANCED_TRAINING_AVAILABLE}"
        )
        logger.info(f"Training enabled: {self.training_enabled}")
        logger.info(f"Confidence threshold: {self.confidence_threshold}")
        # logger.info(f"Decision frequency: {self.decision_frequency}s")
        logger.info(
            f"Primary symbol: {self.symbol}, Reference symbols: {self.ref_symbols}"
        )
        logger.info("Universal Data Adapter integrated for centralized data flow")

        # Start data collection if available
        logger.info("Starting data collection...")
        if hasattr(self.data_provider, "start_centralized_data_collection"):
            self.data_provider.start_centralized_data_collection()
            logger.info(
                "Centralized data collection started - all models and dashboard will receive data"
            )
        elif hasattr(self.data_provider, "start_training_data_collection"):
            self.data_provider.start_training_data_collection()
            logger.info("Training data collection started")
        else:
            logger.info(
                "Data provider does not require explicit data collection startup"
            )

        # Data provider is already initialized and optimized

        # Log initial data status
        logger.info("Simplified data integration initialized")
        self._log_data_status()

        # Initialize database cleanup task
        self._schedule_database_cleanup()

        # CRITICAL: Initialize checkpoint manager for saving training progress
        self.checkpoint_manager = None
        self.training_iterations = 0  # Track training iterations for periodic saves
        self._initialize_checkpoint_manager()

        # Initialize models, COB integration, and training system
        self._initialize_ml_models()
        self._initialize_cob_integration()
        self._start_cob_integration_sync()  # Start COB integration
        self._initialize_decision_fusion()  # Initialize fusion system
        self._initialize_transformer_model()  # Initialize transformer model
        self._initialize_enhanced_training_system()  # Initialize real-time training
<<<<<<< HEAD
        
        # Initialize and start data stream monitor (single source of truth)
        self._initialize_data_stream_monitor()
        
        # Load historical data for models and RL training
        self._load_historical_data_for_models()
    
    # SINGLE-USE FUNCTION - Called only once in codebase
=======
        self._initialize_text_export_manager()  # Initialize text data export
        self._initialize_llm_proxy()  # Initialize LLM proxy for trading signals

    def _normalize_model_name(self, name: str) -> str:
        """Map various registry/UI names to canonical toggle keys."""
        try:
            # Use alias map to unify names to canonical keys
            alias_to_canonical = {
                **{alias: "DQN" for alias in ["dqn_agent", "dqn"]},
                **{alias: "CNN" for alias in ["enhanced_cnn", "cnn", "cnn_model", "standardized_cnn"]},
                **{alias: "EXTREMA" for alias in ["extrema_trainer", "extrema"]},
                **{alias: "COB" for alias in ["cob_rl_model", "cob_rl"]},
                **{alias: "DECISION" for alias in ["decision_fusion", "decision"]},
                "transformer_model": "TRANSFORMER",
            }
            return alias_to_canonical.get(name, name)
        except Exception:
            return name
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def _initialize_ml_models(self):
        """Initialize ML models for enhanced trading"""
        try:
            logger.info("Initializing ML models...")
<<<<<<< HEAD
            
            # Initialize model state tracking (SSOT)
            # Note: COB_RL functionality is now integrated into Enhanced CNN
            self.model_states = {
                'dqn': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False},
                'cnn': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False},
                'decision': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False},
                'extrema_trainer': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False},
                'transformer': {'initial_loss': None, 'current_loss': None, 'best_loss': None, 'checkpoint_loaded': False}
=======

            # Initialize model state tracking (SSOT) - Updated with current training progress
            self.model_states = {
                "dqn": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": True,
                },
                "cnn": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": True,
                },
                "cob_rl": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
                "decision": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
                "transformer": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
                "extrema_trainer": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            }

            # Initialize DQN Agent
            try:
                from NN.models.dqn_agent import DQNAgent

                # Determine actual state size from BaseDataInput
                try:
                    base_data = self.data_provider.build_base_data_input(self.symbol)
                    if base_data:
                        actual_state_size = len(base_data.get_feature_vector())
                        logger.info(f"Detected actual state size: {actual_state_size}")
                    else:
                        actual_state_size = 7850  # Fallback based on error message
                        logger.warning(
                            f"Could not determine state size, using fallback: {actual_state_size}"
                        )
                except Exception as e:
                    actual_state_size = 7850  # Fallback based on error message
                    logger.warning(
                        f"Error determining state size: {e}, using fallback: {actual_state_size}"
                    )

                action_size = self.config.rl.get("action_space", 3)
                self.rl_agent = DQNAgent(
                    state_shape=actual_state_size, 
                    n_actions=action_size,
                    config=self.config.rl
                )
                self.rl_agent.to(self.device)  # Move DQN agent to the determined device

                # Load best checkpoint and capture initial state (using database metadata or filesystem fallback)
                checkpoint_loaded = False
                if hasattr(self.rl_agent, "load_best_checkpoint"):
                    try:
<<<<<<< HEAD
                        self.rl_agent.load_best_checkpoint()  # This loads the state into the model
                        # Check if we have checkpoints available
                        from NN.training.model_manager import load_best_checkpoint
                        result = load_best_checkpoint("dqn")
                        if result:
                            file_path, metadata = result
                            self.model_states['dqn']['initial_loss'] = getattr(metadata, 'initial_loss', None)
                            self.model_states['dqn']['current_loss'] = metadata.loss
                            self.model_states['dqn']['best_loss'] = metadata.loss
                            self.model_states['dqn']['checkpoint_loaded'] = True
                            self.model_states['dqn']['checkpoint_filename'] = metadata.checkpoint_id
                            checkpoint_loaded = True
                            loss_str = f"{metadata.loss:.4f}" if metadata.loss is not None else "N/A"
                            logger.info(f"DQN checkpoint loaded: {metadata.checkpoint_id} (loss={loss_str})")
=======
                        self.rl_agent.load_best_checkpoint()  # Load model state if available
                        # 1) Try DB metadata first
                        try:
                            db_manager = get_database_manager()
                            checkpoint_metadata = db_manager.get_best_checkpoint_metadata("dqn_agent")
                        except Exception:
                            checkpoint_metadata = None
                        if checkpoint_metadata:
                            self.model_states["dqn"]["initial_loss"] = 0.412
                            self.model_states["dqn"]["current_loss"] = checkpoint_metadata.performance_metrics.get("loss", 0.0)
                            self.model_states["dqn"]["best_loss"] = checkpoint_metadata.performance_metrics.get("loss", 0.0)
                            self.model_states["dqn"]["checkpoint_loaded"] = True
                            self.model_states["dqn"]["checkpoint_filename"] = checkpoint_metadata.checkpoint_id
                            checkpoint_loaded = True
                            loss_str = f"{checkpoint_metadata.performance_metrics.get('loss', 0.0):.4f}"
                            logger.info(f"DQN checkpoint loaded: {checkpoint_metadata.checkpoint_id} (loss={loss_str})")
                        else:
                            # 2) Filesystem fallback via CheckpointManager
                            try:
                                from utils.checkpoint_manager import get_checkpoint_manager
                                cm = get_checkpoint_manager()
                                result = cm.load_best_checkpoint("dqn_agent")
                                if result:
                                    model_path, meta = result
                                    # We already loaded model weights via load_best_checkpoint; just record metadata
                                    self.model_states["dqn"]["checkpoint_loaded"] = True
                                    self.model_states["dqn"]["checkpoint_filename"] = getattr(meta, 'checkpoint_id', None)
                                    checkpoint_loaded = True
                                    logger.info(f"DQN checkpoint (fs) detected: {getattr(meta, 'checkpoint_id', 'unknown')}")
                            except Exception:
                                pass
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                    except Exception as e:
                        logger.warning(f"Error loading DQN checkpoint (likely dimension mismatch): {e}")
                        logger.info("DQN will start fresh due to checkpoint incompatibility")
                        checkpoint_loaded = False

                if not checkpoint_loaded:
                    # New model - no synthetic data, start fresh
                    self.model_states["dqn"]["initial_loss"] = None
                    self.model_states["dqn"]["current_loss"] = None
                    self.model_states["dqn"]["best_loss"] = None
                    self.model_states["dqn"][
                        "checkpoint_filename"
                    ] = "none (fresh start)"
                    logger.info("DQN starting fresh - no checkpoint found")

                logger.info(
                    f"DQN Agent initialized: {actual_state_size} state features, {action_size} actions"
                )
            except ImportError:
                logger.warning("DQN Agent not available")
                self.rl_agent = None

            # Initialize CNN Model directly (no adapter)
            try:
                from NN.models.enhanced_cnn import EnhancedCNN

                # Initialize CNN model directly
                input_shape = 7850  # Unified feature vector size
                n_actions = 3  # BUY, SELL, HOLD
                self.cnn_model = EnhancedCNN(
                    input_shape=input_shape, n_actions=n_actions
                )
                self.cnn_adapter = None  # No adapter needed
                self.cnn_optimizer = optim.Adam(
                    self.cnn_model.parameters(), lr=0.001
                )  # Initialize optimizer for CNN

                # Load best checkpoint and capture initial state (using database metadata or filesystem fallback)
                checkpoint_loaded = False
                try:
<<<<<<< HEAD
                    from NN.training.model_manager import load_best_checkpoint
                    result = load_best_checkpoint("cnn")
                    if result:
                        file_path, metadata = result
                        # Actually load the model weights from the checkpoint
                        try:
                            # TODO(Guideline: initialize required attributes before use) Define self.device (CUDA/CPU) before loading checkpoints.
                            checkpoint_data = torch.load(file_path, map_location=self.device)
                            if 'model_state_dict' in checkpoint_data:
                                self.cnn_model.load_state_dict(checkpoint_data['model_state_dict'])
                                logger.info(f"CNN model weights loaded from: {file_path}")
                            elif 'state_dict' in checkpoint_data:
                                self.cnn_model.load_state_dict(checkpoint_data['state_dict'])
                                logger.info(f"CNN model weights loaded from: {file_path}")
                            else:
                                # Try loading directly as state dict
                                self.cnn_model.load_state_dict(checkpoint_data)
                                logger.info(f"CNN model weights loaded directly from: {file_path}")

                            # Update model states
                            self.model_states['cnn']['initial_loss'] = checkpoint_data.get('initial_loss', 0.412)
                            self.model_states['cnn']['current_loss'] = metadata.loss or checkpoint_data.get('loss', 0.0187)
                            self.model_states['cnn']['best_loss'] = metadata.loss or checkpoint_data.get('best_loss', 0.0134)
                            self.model_states['cnn']['checkpoint_loaded'] = True
                            self.model_states['cnn']['checkpoint_filename'] = metadata.checkpoint_id
                            checkpoint_loaded = True
                            loss_str = f"{metadata.loss:.4f}" if metadata.loss is not None else "N/A"
                            logger.info(f"CNN checkpoint loaded: {metadata.checkpoint_id} (loss={loss_str})")
                        except Exception as load_error:
                            logger.warning(f"Failed to load CNN model weights: {load_error}")
                            # Continue with fresh model but mark as loaded for metadata purposes
                            self.model_states['cnn']['checkpoint_loaded'] = True
                            checkpoint_loaded = True
=======
                    db_manager = get_database_manager()
                    checkpoint_metadata = db_manager.get_best_checkpoint_metadata(
                        "enhanced_cnn"
                    )
                    if checkpoint_metadata and os.path.exists(checkpoint_metadata.file_path):
                        try:
                            saved = torch.load(checkpoint_metadata.file_path, map_location=self.device)
                            if saved and saved.get("model_state_dict"):
                                self.cnn_model.load_state_dict(saved["model_state_dict"], strict=False)
                                checkpoint_loaded = True
                        except Exception as load_ex:
                            logger.warning(f"CNN checkpoint load_state_dict failed: {load_ex}")
                    if not checkpoint_loaded:
                        # Filesystem fallback
                        from utils.checkpoint_manager import load_best_checkpoint as _load_best_ckpt
                        result = _load_best_ckpt("enhanced_cnn")
                        if result:
                            ckpt_path, meta = result
                            try:
                                saved = torch.load(ckpt_path, map_location=self.device)
                                if saved and saved.get("model_state_dict"):
                                    self.cnn_model.load_state_dict(saved["model_state_dict"], strict=False)
                                    checkpoint_loaded = True
                                    self.model_states["cnn"]["checkpoint_filename"] = getattr(meta, "checkpoint_id", os.path.basename(ckpt_path))
                            except Exception as e_load:
                                logger.warning(f"Failed loading CNN weights from {ckpt_path}: {e_load}")
                    # Update model_states flags after attempts
                    self.model_states["cnn"]["checkpoint_loaded"] = checkpoint_loaded
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                except Exception as e:
                    logger.warning(f"Error loading CNN checkpoint: {e}")
                    checkpoint_loaded = False
                if not checkpoint_loaded:
                    # New model - no synthetic data
<<<<<<< HEAD
                    self.model_states['cnn']['initial_loss'] = None
                    self.model_states['cnn']['current_loss'] = None
                    self.model_states['cnn']['best_loss'] = None
                    logger.info("CNN starting fresh - no checkpoint found")
                
                logger.info("Enhanced CNN model initialized with integrated COB functionality")
                logger.info("  - CNN handles both price patterns AND market microstructure (COB) analysis")
                logger.info("  - Unified model eliminates redundancy and improves context integration")
=======
                    self.model_states["cnn"]["initial_loss"] = None
                    self.model_states["cnn"]["current_loss"] = None
                    self.model_states["cnn"]["best_loss"] = None
                    self.model_states["cnn"]["checkpoint_loaded"] = False
                    logger.info("CNN starting fresh - no checkpoint found or failed to load")
                else:
                    logger.info("CNN weights loaded from checkpoint successfully")

                logger.info("Enhanced CNN model initialized directly")
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            except ImportError:
                try:
                    from NN.models.standardized_cnn import StandardizedCNN

                    self.cnn_model = StandardizedCNN()
                    self.cnn_adapter = None  # No adapter available
                    self.cnn_model.to(
                        self.device
                    )  # Move basic CNN model to the determined device
                    self.cnn_optimizer = optim.Adam(
                        self.cnn_model.parameters(), lr=0.001
                    )  # Initialize optimizer for basic CNN

                    # Load checkpoint for basic CNN as well
                    if hasattr(self.cnn_model, "load_best_checkpoint"):
                        checkpoint_data = self.cnn_model.load_best_checkpoint()
                        if checkpoint_data:
                            self.model_states["cnn"]["initial_loss"] = (
                                checkpoint_data.get("initial_loss", 0.412)
                            )
                            self.model_states["cnn"]["current_loss"] = (
                                checkpoint_data.get("loss", 0.0187)
                            )
                            self.model_states["cnn"]["best_loss"] = checkpoint_data.get(
                                "best_loss", 0.0134
                            )
                            self.model_states["cnn"]["checkpoint_loaded"] = True
                            logger.info(
                                f"CNN checkpoint loaded: loss={checkpoint_data.get('loss', 'N/A')}"
                            )
                        else:
                            self.model_states["cnn"]["initial_loss"] = None
                            self.model_states["cnn"]["current_loss"] = None
                            self.model_states["cnn"]["best_loss"] = None
                            logger.info("CNN starting fresh - no checkpoint found")

                    logger.info("Basic CNN model initialized")
                except ImportError:
                    logger.warning("CNN model not available")
                    self.cnn_model = None
                    self.cnn_adapter = None
                    self.cnn_optimizer = (
                        None  # Ensure optimizer is also None if model is not available
                    )

            # Initialize Extrema Trainer
            try:
                from core.extrema_trainer import ExtremaTrainer

                self.extrema_trainer = ExtremaTrainer(
                    data_provider=self.data_provider,
                    symbols=[self.symbol],  # Only primary trading symbol
                )

                # Load checkpoint and capture initial state
                if hasattr(self.extrema_trainer, "load_best_checkpoint"):
                    checkpoint_data = self.extrema_trainer.load_best_checkpoint()
                    if checkpoint_data:
                        self.model_states["extrema_trainer"]["initial_loss"] = (
                            checkpoint_data.get("initial_loss", 0.356)
                        )
                        self.model_states["extrema_trainer"]["current_loss"] = (
                            checkpoint_data.get("loss", 0.0098)
                        )
                        self.model_states["extrema_trainer"]["best_loss"] = (
                            checkpoint_data.get("best_loss", 0.0076)
                        )
                        self.model_states["extrema_trainer"]["checkpoint_loaded"] = True
                        logger.info(
                            f"Extrema trainer checkpoint loaded: loss={checkpoint_data.get('loss', 'N/A')}"
                        )
                    else:
                        self.model_states["extrema_trainer"]["initial_loss"] = None
                        self.model_states["extrema_trainer"]["current_loss"] = None
                        self.model_states["extrema_trainer"]["best_loss"] = None
                        logger.info(
                            "Extrema trainer starting fresh - no checkpoint found"
                        )

                logger.info("Extrema trainer initialized")
            except ImportError:
                logger.warning("Extrema trainer not available")
                self.extrema_trainer = None
<<<<<<< HEAD
            
            # Initialize COB RL Model - UNIFIED with ModelManager
            cob_rl_available = False
            try:
                from NN.models.cob_rl_model import COBRLModelInterface
                cob_rl_available = True
            except ImportError as e:
                logger.warning(f"COB RL dependencies not available: {e}")
                cob_rl_available = False

            if cob_rl_available:
                try:
                    # Initialize COB RL model using unified approach
                    self.cob_rl_agent = COBRLModelInterface(
                        model_checkpoint_dir="@checkpoints/cob_rl",
                        device='cuda' if (HAS_TORCH and torch.cuda.is_available()) else 'cpu'
                    )

                    # Add COB RL to model states tracking
                    self.model_states['cob_rl'] = {
                        'initial_loss': None,
                        'current_loss': None,
                        'best_loss': None,
                        'checkpoint_loaded': False
                    }

                    # Load best checkpoint using unified ModelManager
                    checkpoint_loaded = False
                    try:
                        from NN.training.model_manager import load_best_checkpoint
                        result = load_best_checkpoint("cob_rl")
                        if result:
                            file_path, metadata = result
                            self.model_states['cob_rl']['initial_loss'] = getattr(metadata, 'loss', None)
                            self.model_states['cob_rl']['current_loss'] = getattr(metadata, 'loss', None)
                            self.model_states['cob_rl']['best_loss'] = getattr(metadata, 'loss', None)
                            self.model_states['cob_rl']['checkpoint_loaded'] = True
                            self.model_states['cob_rl']['checkpoint_filename'] = getattr(metadata, 'checkpoint_id', 'unknown')
                            checkpoint_loaded = True
                            loss_str = f"{getattr(metadata, 'loss', 'N/A'):.4f}" if getattr(metadata, 'loss', None) is not None else "N/A"
                            logger.info(f"COB RL checkpoint loaded: {getattr(metadata, 'checkpoint_id', 'unknown')} (loss={loss_str})")
                    except Exception as e:
                        logger.warning(f"Error loading COB RL checkpoint: {e}")

                    if not checkpoint_loaded:
                        # New model - no synthetic data, start fresh
                        self.model_states['cob_rl']['initial_loss'] = None
                        self.model_states['cob_rl']['current_loss'] = None
                        self.model_states['cob_rl']['best_loss'] = None
                        self.model_states['cob_rl']['checkpoint_filename'] = 'none (fresh start)'
                        logger.info("COB RL starting fresh - no checkpoint found")

                    logger.info("COB RL Agent initialized and integrated with unified ModelManager")

                except Exception as e:
                    logger.error(f"Error initializing COB RL: {e}")
                    self.cob_rl_agent = None
                    cob_rl_available = False

            if not cob_rl_available:
                # COB RL not available due to missing dependencies
                # Still try to load checkpoint metadata for display purposes
                logger.info("COB RL dependencies missing - attempting checkpoint metadata load only")

                self.model_states['cob_rl'] = {
                    'initial_loss': None,
                    'current_loss': None,
                    'best_loss': None,
                    'checkpoint_loaded': False,
                    'checkpoint_filename': 'dependencies missing'
                }

                # Try to load checkpoint metadata even without the model
                try:
                    from NN.training.model_manager import load_best_checkpoint
                    result = load_best_checkpoint("cob_rl")
                    if result:
                        file_path, metadata = result
                        self.model_states['cob_rl']['checkpoint_loaded'] = True
                        self.model_states['cob_rl']['checkpoint_filename'] = getattr(metadata, 'checkpoint_id', 'found')
                        logger.info(f"COB RL checkpoint metadata loaded (model unavailable): {getattr(metadata, 'checkpoint_id', 'unknown')}")
                    else:
                        logger.info("No COB RL checkpoint found")
                except Exception as e:
                    logger.debug(f"Could not load COB RL checkpoint metadata: {e}")
=======

            # Initialize COB RL Model
            try:
                from NN.models.cob_rl_model import COBRLModelInterface

                self.cob_rl_agent = COBRLModelInterface()
                # Move COB RL agent to the determined device if it supports it
                if hasattr(self.cob_rl_agent, "to"):
                    self.cob_rl_agent.to(self.device)

                # Load best checkpoint and capture initial state (using checkpoint manager)
                checkpoint_loaded = False
                try:
                    from utils.checkpoint_manager import load_best_checkpoint
                    
                    # Try to load checkpoint using checkpoint manager
                    result = load_best_checkpoint("cob_rl")
                    if result:
                        file_path, metadata = result
                        # Load the checkpoint into the model
                        checkpoint = torch.load(file_path, map_location=self.device)
                        
                        # Load model state
                        if 'model_state_dict' in checkpoint:
                            self.cob_rl_agent.model.load_state_dict(checkpoint['model_state_dict'])
                        if 'optimizer_state_dict' in checkpoint and hasattr(self.cob_rl_agent, 'optimizer'):
                            self.cob_rl_agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                        
                        # Update model states
                        self.model_states["cob_rl"]["initial_loss"] = (
                            metadata.performance_metrics.get("loss", 0.0)
                        )
                        self.model_states["cob_rl"]["current_loss"] = (
                            metadata.performance_metrics.get("loss", 0.0)
                        )
                        self.model_states["cob_rl"]["best_loss"] = (
                            metadata.performance_metrics.get("loss", 0.0)
                        )
                        self.model_states["cob_rl"]["checkpoint_loaded"] = True
                        self.model_states["cob_rl"][
                            "checkpoint_filename"
                        ] = metadata.checkpoint_id
                        checkpoint_loaded = True
                        loss_str = f"{metadata.performance_metrics.get('loss', 0.0):.4f}"
                        logger.info(
                            f"COB RL checkpoint loaded: {metadata.checkpoint_id} (loss={loss_str})"
                        )
                except Exception as e:
                    logger.warning(f"Error loading COB RL checkpoint: {e}")

                if not checkpoint_loaded:
                    self.model_states["cob_rl"]["initial_loss"] = None
                    self.model_states["cob_rl"]["current_loss"] = None
                    self.model_states["cob_rl"]["best_loss"] = None
                    self.model_states["cob_rl"][
                        "checkpoint_filename"
                    ] = "none (fresh start)"
                    logger.info("COB RL starting fresh - no checkpoint found")
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b

                self.cob_rl_agent = None

<<<<<<< HEAD
            logger.info("COB RL initialization completed")
            logger.info("  - Uses @checkpoints/ directory structure")
            logger.info("  - Follows same load/save/checkpoint flow as other models")
            logger.info("  - Gracefully handles missing dependencies")
            
            # Initialize TRANSFORMER Model
            try:
                from NN.models.advanced_transformer_trading import create_trading_transformer, TradingTransformerConfig
                
                config = TradingTransformerConfig(
                    d_model=256,         # 15M parameters target
                    n_heads=8,           
                    n_layers=4,          
                    seq_len=50,          
                    n_actions=3,
                    use_multi_scale_attention=True,
                    use_market_regime_detection=True,
                    use_uncertainty_estimation=True
                )
                
                self.transformer_model, self.transformer_trainer = create_trading_transformer(config)
                
                # Load best checkpoint
                checkpoint_loaded = False
                try:
                    from NN.training.model_manager import load_best_checkpoint
                    result = load_best_checkpoint("transformer")
                    if result:
                        file_path, metadata = result
                        self.transformer_trainer.load_model(file_path)
                        self.model_states['transformer']['checkpoint_loaded'] = True
                        self.model_states['transformer']['checkpoint_filename'] = metadata.checkpoint_id
                        checkpoint_loaded = True
                        logger.info(f"Transformer checkpoint loaded: {metadata.checkpoint_id}")
                except Exception as e:
                    logger.debug(f"No transformer checkpoint found: {e}")

                if not checkpoint_loaded:
                    self.model_states['transformer']['checkpoint_loaded'] = False
                    self.model_states['transformer']['checkpoint_filename'] = 'none (fresh start)'
                    logger.info("Transformer starting fresh - no checkpoint found")
                
                logger.info("Transformer model initialized")
                
            except ImportError as e:
                logger.warning(f"Transformer model not available: {e}")
                self.transformer_model = None
                self.transformer_trainer = None
            
            # Initialize Decision Fusion Model
            try:
                from core.nn_decision_fusion import NeuralDecisionFusion
                
                # Initialize decision fusion (training_mode parameter only)
                self.decision_model = NeuralDecisionFusion(training_mode=True)
                
                # Load best checkpoint
                checkpoint_loaded = False
                try:
                    from NN.training.model_manager import load_best_checkpoint
                    result = load_best_checkpoint("decision")
                    if result:
                        file_path, metadata = result
                        import torch
                        checkpoint = torch.load(file_path, map_location='cpu')
                        if 'model_state_dict' in checkpoint:
                            self.decision_model.load_state_dict(checkpoint['model_state_dict'])
                        self.model_states['decision']['checkpoint_loaded'] = True
                        self.model_states['decision']['checkpoint_filename'] = metadata.checkpoint_id
                        checkpoint_loaded = True
                        logger.info(f"Decision model checkpoint loaded: {metadata.checkpoint_id}")
                except Exception as e:
                    logger.debug(f"No decision model checkpoint found: {e}")
                
                if not checkpoint_loaded:
                    self.model_states['decision']['checkpoint_loaded'] = False
                    self.model_states['decision']['checkpoint_filename'] = 'none (fresh start)'
                    logger.info("Decision model starting fresh - no checkpoint found")
                
                logger.info("Decision fusion model initialized")
                
            except ImportError as e:
                logger.warning(f"Decision fusion model not available: {e}")
                self.decision_model = None
            
            # Initialize all model states with defaults for non-loaded models
            for model_name in ['decision', 'transformer']:
                if model_name not in self.model_states:
                    self.model_states[model_name] = {
                        'initial_loss': None, 
                        'current_loss': None, 
                        'best_loss': None, 
                        'checkpoint_loaded': False,
                        'checkpoint_filename': 'none (fresh start)'
                    }
=======
            # Initialize Decision model state - no synthetic data
            self.model_states["decision"]["initial_loss"] = None
            self.model_states["decision"]["current_loss"] = None
            self.model_states["decision"]["best_loss"] = None
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b

            # CRITICAL: Register models with the model registry
            logger.info("Registering models with model registry...")
            logger.info(
                f"Model registry before registration: {len(self.model_registry.models)} models"
            )

            # Import model interfaces
            # These are now imported at the top of the file

            # Register RL Agent
            if self.rl_agent:
                try:
                    rl_interface = RLAgentInterface(self.rl_agent, name="dqn_agent")
<<<<<<< HEAD
                    # RL model registration handled by ModelManager
                    logger.info("RL Agent registered successfully")
=======
                    success = self.register_model(rl_interface, weight=0.2)
                    if success:
                        logger.info("RL Agent registered successfully")
                    else:
                        logger.error(
                            "Failed to register RL Agent - register_model returned False"
                        )
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                except Exception as e:
                    logger.error(f"Failed to register RL Agent: {e}")

            # Register CNN Model
            if self.cnn_model:
                try:
<<<<<<< HEAD
                    cnn_interface = CNNModelInterface(self.cnn_model, name="enhanced_cnn")
                    # CNN model registration handled by ModelManager
                    logger.info("CNN Model registered successfully")
=======
                    cnn_interface = CNNModelInterface(
                        self.cnn_model, name="enhanced_cnn"
                    )
                    success = self.register_model(cnn_interface, weight=0.25)
                    if success:
                        logger.info("CNN Model registered successfully")
                    else:
                        logger.error(
                            "Failed to register CNN Model - register_model returned False"
                        )
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                except Exception as e:
                    logger.error(f"Failed to register CNN Model: {e}")

            # Register Extrema Trainer
            if self.extrema_trainer:
                try:

                    class ExtremaTrainerInterface(ModelInterface):
                        def __init__(self, model: ExtremaTrainer, name: str):
                            super().__init__(name)
                            self.model = model

                        def predict(self, data=None):
                            try:
                                # Handle different data types that might be passed to ExtremaTrainer
                                symbol = None

                                if isinstance(data, str):
                                    # Direct symbol string
                                    symbol = data
                                elif isinstance(data, dict):
                                    # Dictionary with symbol information
                                    symbol = data.get("symbol")
                                elif isinstance(data, np.ndarray):
                                    # Numpy array - extract symbol from metadata or use default
                                    # For now, use the first symbol from the model's symbols list
                                    if (
                                        hasattr(self.model, "symbols")
                                        and self.model.symbols
                                    ):
                                        symbol = self.model.symbols[0]
                                    else:
                                        symbol = "ETH/USDT"  # Default fallback
                                else:
                                    # Unknown data type - use default symbol
                                    if (
                                        hasattr(self.model, "symbols")
                                        and self.model.symbols
                                    ):
                                        symbol = self.model.symbols[0]
                                    else:
                                        symbol = "ETH/USDT"  # Default fallback

                                if not symbol:
                                    logger.warning(
                                        f"ExtremaTrainerInterface.predict could not determine symbol from data: {type(data)}"
                                    )
                                    return None

                                features = self.model.get_context_features_for_model(
                                    symbol=symbol
                                )
                                if features is not None and features.size > 0:
                                    # The presence of features indicates a signal. We'll return a generic HOLD
                                    # with a neutral confidence. This can be refined if ExtremaTrainer provides
                                    # more specific BUY/SELL signals directly.
                                    # Provide next-pivot prediction vector capped at 5 min
                                    pred = self.model.predict_next_pivot(symbol=symbol)
                                    if pred:
                                        return {
                                            "action": "HOLD",
                                            "confidence": pred.confidence,
                                            "prediction": {
                                                "target_type": pred.target_type,
                                                "predicted_time": pred.predicted_time,
                                                "predicted_price": pred.predicted_price,
                                                "horizon_seconds": pred.horizon_seconds,
                                            },
                                        }
                                    # Fallback neutral
                                    return {"action": "HOLD", "confidence": 0.5}
                                return None
                            except Exception as e:
                                logger.error(
                                    f"Error in extrema trainer prediction: {e}"
                                )
                                return None

                        # UNUSED FUNCTION - Not called anywhere in codebase
                        def get_memory_usage(self) -> float:
                            return 30.0  # MB

<<<<<<< HEAD
                    extrema_interface = ExtremaTrainerInterface(self.extrema_trainer, name="extrema_trainer")
                    # Extrema model registration handled by ModelManager
                    logger.info("Extrema Trainer registered successfully")
                except Exception as e:
                    logger.error(f"Failed to register Extrema Trainer: {e}")
            
            # COB RL Model registration removed - model was removed for cleanup
            # See COB_MODEL_ARCHITECTURE_DOCUMENTATION.md for recreation details
            logger.info("COB RL model registration skipped - model removed pending COB data quality improvements")
            
            # Register Transformer Model
            if hasattr(self, 'transformer_model') and self.transformer_model:
                try:
                    class TransformerModelInterface(ModelInterface):
                        def __init__(self, model, trainer, name: str):
                            super().__init__(name)
                            self.model = model
                            self.trainer = trainer

                        def predict(self, data):
                            try:
                                if hasattr(self.model, 'predict'):
                                    return self.model.predict(data)
                                return None
                            except Exception as e:
                                logger.error(f"Error in transformer prediction: {e}")
                                return None

                        # UNUSED FUNCTION - Not called anywhere in codebase
                        def get_memory_usage(self) -> float:
                            return 60.0  # MB estimate for transformer

                    transformer_interface = TransformerModelInterface(self.transformer_model, self.transformer_trainer, name="transformer")
                    # Transformer model registration handled by ModelManager
                    logger.info("Transformer Model registered successfully")
                except Exception as e:
                    logger.error(f"Failed to register Transformer Model: {e}")

            # Register Decision Fusion Model
            if hasattr(self, 'decision_model') and self.decision_model:
                try:
                    class DecisionModelInterface(ModelInterface):
=======
                    extrema_interface = ExtremaTrainerInterface(
                        self.extrema_trainer, name="extrema_trainer"
                    )
                    self.register_model(
                        extrema_interface, weight=0.15
                    )  # Lower weight for extrema signals
                    logger.info("Extrema Trainer registered successfully")
                except Exception as e:
                    logger.error(f"Failed to register Extrema Trainer: {e}")

            # Register COB RL Agent - Create a proper interface wrapper
            if self.cob_rl_agent:
                try:

                    class COBRLModelInterfaceWrapper(ModelInterface):
                        def __init__(self, model, name: str):
                            super().__init__(name)
                            self.model = model

                        def predict(self, data):
                            try:
                                if hasattr(self.model, "predict"):
                                    # Ensure data has correct dimensions for COB RL model (2000 features)
                                    if isinstance(data, np.ndarray):
                                        features = data.flatten()
                                        # COB RL expects 2000 features
                                        if len(features) < 2000:
                                            padded_features = np.zeros(2000)
                                            padded_features[: len(features)] = features
                                            features = padded_features
                                        elif len(features) > 2000:
                                            features = features[:2000]
                                        return self.model.predict(features)
                                    else:
                                        return self.model.predict(data)
                                return None
                            except Exception as e:
                                logger.error(f"Error in COB RL prediction: {e}")
                                return None

                        def get_memory_usage(self) -> float:
                            return 50.0  # MB

                    cob_rl_interface = COBRLModelInterfaceWrapper(
                        self.cob_rl_agent, name="cob_rl_model"
                    )
                    self.register_model(cob_rl_interface, weight=0.4)
                    logger.info("COB RL Agent registered successfully")
                except Exception as e:
                    logger.error(f"Failed to register COB RL Agent: {e}")

            # Register Decision Fusion Model
            if hasattr(self, 'decision_fusion_network') and self.decision_fusion_network:
                try:
                    class DecisionFusionModelInterface(ModelInterface):
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                        def __init__(self, model, name: str):
                            super().__init__(name)
                            self.model = model

                        def predict(self, data):
                            try:
<<<<<<< HEAD
                                if hasattr(self.model, 'predict'):
                                    return self.model.predict(data)
                                return None
                            except Exception as e:
                                logger.error(f"Error in decision model prediction: {e}")
                                return None

                        # UNUSED FUNCTION - Not called anywhere in codebase
                        def get_memory_usage(self) -> float:
                            return 40.0  # MB estimate for decision model

                    decision_interface = DecisionModelInterface(self.decision_model, name="decision")
                    # Decision model registration handled by ModelManager
=======
                                if hasattr(self.model, "forward"):
                                    # Convert data to tensor if needed
                                    if isinstance(data, np.ndarray):
                                        data = torch.from_numpy(data).float()
                                    elif not isinstance(data, torch.Tensor):
                                        logger.warning(f"Decision fusion received unexpected data type: {type(data)}")
                                        return None
                                    
                                    # Ensure data has correct shape
                                    if data.dim() == 1:
                                        data = data.unsqueeze(0)  # Add batch dimension
                                    
                                    with torch.no_grad():
                                        self.model.eval()
                                        output = self.model(data)
                                        probabilities = output.squeeze().cpu().numpy()
                                        
                                        # Convert to action prediction
                                        action_idx = np.argmax(probabilities)
                                        actions = ["BUY", "SELL", "HOLD"]
                                        action = actions[action_idx]
                                        confidence = float(probabilities[action_idx])
                                        
                                        return {
                                            "action": action,
                                            "confidence": confidence,
                                            "probabilities": {
                                                "BUY": float(probabilities[0]),
                                                "SELL": float(probabilities[1]),
                                                "HOLD": float(probabilities[2])
                                            }
                                        }
                                return None
                            except Exception as e:
                                logger.error(f"Error in Decision Fusion prediction: {e}")
                                return None

                        def get_memory_usage(self) -> float:
                            return 25.0  # MB

                    decision_fusion_interface = DecisionFusionModelInterface(
                        self.decision_fusion_network, name="decision_fusion"
                    )
                    self.register_model(decision_fusion_interface, weight=0.3)
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                    logger.info("Decision Fusion Model registered successfully")
                except Exception as e:
                    logger.error(f"Failed to register Decision Fusion Model: {e}")

<<<<<<< HEAD
            # Model weight normalization handled by ModelManager
            # Model weights now handled by ModelManager
            logger.info("Model management delegated to unified ModelManager")
            logger.info("COB_RL model removed - cleaner architecture pending COB data quality fixes")
=======
            # Normalize weights after all registrations
            self._normalize_weights()
            logger.info(f"Current model weights: {self.model_weights}")
            logger.info(
                f"Model registry after registration: {len(self.model_registry.models)} models"
            )
            logger.info(f"Registered models: {list(self.model_registry.models.keys())}")
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b

        except Exception as e:
            logger.error(f"Error initializing ML models: {e}")

<<<<<<< HEAD
    # UNUSED FUNCTION - Not called anywhere in codebase
    def update_model_loss(self, model_name: str, current_loss: float, best_loss: float = None):
=======
    def _calculate_cnn_price_direction_loss(
        self,
        price_direction_pred: torch.Tensor,
        rewards: torch.Tensor,
        actions: torch.Tensor,
        target_vector: Optional[Dict[str, float]] = None,
    ) -> Optional[torch.Tensor]:
        """
        Calculate price direction loss for CNN model.

        If target_vector is provided, perform supervised regression towards the
        explicit direction/confidence. Otherwise, derive weak targets from
        rewards and actions.

        Args:
            price_direction_pred: [batch, 2] = [direction, confidence]
            rewards: [batch]
            actions: [batch]
            target_vector: Optional dict {'direction': float, 'confidence': float}

        Returns:
            Loss tensor or None.
        """
        try:
            if price_direction_pred.size(1) != 2:
                return None

            batch_size = price_direction_pred.size(0)
            direction_pred = price_direction_pred[:, 0]
            confidence_pred = price_direction_pred[:, 1]

            # Supervised targets from explicit vector if available
            if target_vector and isinstance(target_vector, dict):
                try:
                    t_dir = float(target_vector.get("direction", 0.0))
                    t_conf = float(target_vector.get("confidence", 0.0))
                    direction_targets = torch.full(
                        (batch_size,), t_dir, device=price_direction_pred.device, dtype=direction_pred.dtype
                    )
                    confidence_targets = torch.full(
                        (batch_size,), t_conf, device=price_direction_pred.device, dtype=confidence_pred.dtype
                    )
                    dir_loss = nn.MSELoss()(direction_pred, direction_targets)
                    conf_loss = nn.MSELoss()(confidence_pred, confidence_targets)
                    return dir_loss + 0.3 * conf_loss
                except Exception:
                    # Fall back to weak supervision below
                    pass

            # Weak supervision from rewards/actions
            with torch.no_grad():
                direction_targets = torch.zeros(batch_size, device=price_direction_pred.device)
                for i in range(batch_size):
                    if rewards[i] > 0.01:
                        if actions[i] == 0:  # BUY
                            direction_targets[i] = 1.0
                        elif actions[i] == 1:  # SELL
                            direction_targets[i] = -1.0
                confidence_targets = torch.abs(rewards).clamp(0, 1)

            dir_loss = nn.MSELoss()(direction_pred, direction_targets)
            conf_loss = nn.MSELoss()(confidence_pred, confidence_targets)
            return dir_loss + 0.3 * conf_loss

        except Exception as e:
            logger.debug(f"Error calculating CNN price direction loss: {e}")
            return None

    def _calculate_cnn_extrema_loss(
        self, extrema_pred: torch.Tensor, rewards: torch.Tensor, actions: torch.Tensor
    ) -> torch.Tensor:
        """
        Calculate extrema loss for CNN model

        Args:
            extrema_pred: Extrema predictions
            rewards: Tensor containing rewards
            actions: Tensor containing actions

        Returns:
            Extrema loss tensor
        """
        try:
            batch_size = extrema_pred.size(0)

            # Create targets based on reward patterns
            with torch.no_grad():
                extrema_targets = (
                    torch.ones(batch_size, dtype=torch.long, device=extrema_pred.device)
                    * 2
                )  # Default to "neither"

                for i in range(batch_size):
                    # High positive reward suggests we're at a good entry point
                    if rewards[i] > 0.05:
                        if actions[i] == 0:  # BUY action
                            extrema_targets[i] = 0  # Bottom
                        elif actions[i] == 1:  # SELL action
                            extrema_targets[i] = 1  # Top

            # Calculate cross-entropy loss
            if extrema_pred.size(1) >= 3:
                extrema_loss = nn.CrossEntropyLoss()(
                    extrema_pred[:, :3], extrema_targets
                )
            else:
                extrema_loss = nn.CrossEntropyLoss()(extrema_pred, extrema_targets)

            return extrema_loss

        except Exception as e:
            logger.debug(f"Error calculating CNN extrema loss: {e}")
            return None

    def update_model_loss(
        self, model_name: str, current_loss: float, best_loss: Optional[float] = None
    ):
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Update model loss and potentially best loss"""
        if model_name in self.model_states:
            self.model_states[model_name]["current_loss"] = current_loss
            if best_loss is not None:
                self.model_states[model_name]["best_loss"] = best_loss
            elif (
                self.model_states[model_name]["best_loss"] is None
                or current_loss < self.model_states[model_name]["best_loss"]
            ):
                self.model_states[model_name]["best_loss"] = current_loss
            logger.debug(
                f"Updated {model_name} loss: current={current_loss:.4f}, best={self.model_states[model_name]['best_loss']:.4f}"
            )

        # Also update model statistics
        self._update_model_statistics(model_name, loss=current_loss)

    def get_model_training_stats(self) -> Dict[str, Dict[str, Any]]:
        """Get current model training statistics for dashboard display"""
        stats = {}

        for model_name, state in self.model_states.items():
            # Calculate improvement percentage
            improvement_pct = 0.0
            if state["initial_loss"] is not None and state["current_loss"] is not None:
                if state["initial_loss"] > 0:
                    improvement_pct = (
                        (state["initial_loss"] - state["current_loss"])
                        / state["initial_loss"]
                    ) * 100

            # Determine model status
            status = "LOADED" if state["checkpoint_loaded"] else "FRESH"

            # Get parameter count (estimated)
            param_counts = {
                "cnn": "50.0M",
                "dqn": "5.0M",
                "cob_rl": "3.0M",
                "decision": "2.0M",
                "extrema_trainer": "1.0M",
            }

            stats[model_name] = {
                "status": status,
                "param_count": param_counts.get(model_name, "1.0M"),
                "current_loss": state["current_loss"],
                "initial_loss": state["initial_loss"],
                "best_loss": state["best_loss"],
                "improvement_pct": improvement_pct,
                "checkpoint_loaded": state["checkpoint_loaded"],
            }

        return stats
    def clear_session_data(self):
        """Clear all session-related data for fresh start"""
        try:
            # Clear recent decisions and predictions
            self.recent_decisions = {}
            self.last_decision_time = {}
            self.last_signal_time = {}
            self.last_confirmed_signal = {}
            self.signal_accumulator = {self.symbol: []}

            # Clear prediction tracking
            for symbol in self.recent_dqn_predictions:
                self.recent_dqn_predictions[symbol].clear()
            for symbol in self.recent_cnn_predictions:
                self.recent_cnn_predictions[symbol].clear()
            for symbol in self.prediction_accuracy_history:
                self.prediction_accuracy_history[symbol].clear()

            # Close any open positions before clearing tracking
            self._close_all_positions()

            # Clear position tracking
            self.current_positions = {}
            self.position_status = {}

            # Clear training data (but keep model states)
            self.sensitivity_learning_queue = []
            self.perfect_move_buffer = []

            # Clear any outcome evaluation flags for last inferences
            for model_name in self.last_inference:
                if self.last_inference[model_name]:
                    self.last_inference[model_name]["outcome_evaluated"] = False

            # Clear fusion training data
            self.fusion_training_data = []
            self.last_fusion_inputs = {}

            # Reset decision callbacks data
            for callback in self.decision_callbacks:
                if hasattr(callback, "clear_session"):
                    callback.clear_session()

            logger.info("Orchestrator session data cleared")
            logger.info("🧠 Model states preserved for continued training")
            logger.info("📊 Prediction history cleared")
            logger.info("💼 Position tracking reset")

        except Exception as e:
            logger.error(f"Error clearing orchestrator session data: {e}")

    def sync_model_states_with_dashboard(self):
        """Sync model states with current dashboard values"""
        # Update based on the dashboard stats provided
        dashboard_stats = {
            "cnn": {
                "current_loss": 0.0000,
                "initial_loss": 0.4120,
                "improvement_pct": 100.0,
            },
            "dqn": {
                "current_loss": 0.0234,
                "initial_loss": 0.4120,
                "improvement_pct": 94.3,
            },
        }

        for model_name, stats in dashboard_stats.items():
            if model_name in self.model_states:
                self.model_states[model_name]["current_loss"] = stats["current_loss"]
                self.model_states[model_name]["initial_loss"] = stats["initial_loss"]
                if (
                    self.model_states[model_name]["best_loss"] is None
                    or stats["current_loss"]
                    < self.model_states[model_name]["best_loss"]
                ):
                    self.model_states[model_name]["best_loss"] = stats["current_loss"]
                logger.info(
                    f"Synced {model_name} model state: loss={stats['current_loss']:.4f}, improvement={stats['improvement_pct']:.1f}%"
                )

    # UNUSED FUNCTION - Not called anywhere in codebase
    def checkpoint_saved(self, model_name: str, checkpoint_data: Dict[str, Any]):
        """Callback when a model checkpoint is saved"""
        if model_name in self.model_states:
            self.model_states[model_name]["checkpoint_loaded"] = True
            self.model_states[model_name]["checkpoint_filename"] = checkpoint_data.get(
                "checkpoint_id"
            )
            logger.info(
                f"Checkpoint saved for {model_name}: {checkpoint_data.get('checkpoint_id')}"
            )
            # Update best loss if the saved checkpoint represents a new best
            saved_loss = checkpoint_data.get("loss")
            if saved_loss is not None:
                if (
                    self.model_states[model_name]["best_loss"] is None
                    or saved_loss < self.model_states[model_name]["best_loss"]
                ):
                    self.model_states[model_name]["best_loss"] = saved_loss
                    logger.info(f"New best loss for {model_name}: {saved_loss:.4f}")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_recent_predictions(self, limit: int = 10) -> List[Dict[str, Any]]:
        """Get recent predictions from all models for data streaming"""
        try:
            predictions = []

            # Collect predictions from prediction history if available
            if hasattr(self, 'prediction_history'):
                for symbol, preds in self.prediction_history.items():
                    recent_preds = list(preds)[-limit:]
                    for pred in recent_preds:
                        predictions.append({
                            'timestamp': pred.get('timestamp', datetime.now().isoformat()),
                            'model_name': pred.get('model_name', 'unknown'),
                            'symbol': symbol,
                            'prediction': pred.get('prediction'),
                            'confidence': pred.get('confidence', 0),
                            'action': pred.get('action')
                        })

            # Also collect from current model states
            for model_name, state in self.model_states.items():
                if 'last_prediction' in state:
                    predictions.append({
                        'timestamp': datetime.now().isoformat(),
                        'model_name': model_name,
                        'symbol': 'ETH/USDT',  # Default symbol
                        'prediction': state['last_prediction'],
                        'confidence': state.get('last_confidence', 0),
                        'action': state.get('last_action')
                    })

            # Sort by timestamp and return most recent
            predictions.sort(key=lambda x: x['timestamp'], reverse=True)
            return predictions[:limit]

        except Exception as e:
            logger.debug(f"Error getting recent predictions: {e}")
            return []

    # UNUSED FUNCTION - Not called anywhere in codebase
    def _save_orchestrator_state(self):
        """Save the current state of the orchestrator, including model states."""
        state = {
<<<<<<< HEAD
            'model_states': {k: {sk: sv for sk, sv in v.items() if sk != 'checkpoint_loaded'} # Exclude non-serializable
                             for k, v in self.model_states.items()},
            # 'model_weights': self.model_weights,  # Now handled by ModelManager
            'last_trained_symbols': list(self.last_trained_symbols.keys())
=======
            "model_states": {
                k: {
                    sk: sv for sk, sv in v.items() if sk != "checkpoint_loaded"
                }  # Exclude non-serializable
                for k, v in self.model_states.items()
            },
            "model_weights": self.model_weights,
            "last_trained_symbols": list(self.last_trained_symbols.keys()),
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        }
        save_path = os.path.join(
            self.config.paths.get("checkpoint_dir", "./models/saved"),
            "orchestrator_state.json",
        )
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        with open(save_path, "w") as f:
            json.dump(state, f, indent=4)
        logger.info(f"Orchestrator state saved to {save_path}")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def _load_orchestrator_state(self):
        """Load the orchestrator state from a saved file."""
        save_path = os.path.join(
            self.config.paths.get("checkpoint_dir", "./models/saved"),
            "orchestrator_state.json",
        )
        if os.path.exists(save_path):
            try:
                with open(save_path, "r") as f:
                    state = json.load(f)
<<<<<<< HEAD
                self.model_states.update(state.get('model_states', {}))
                # self.model_weights = state.get('model_weights', {})  # Now handled by ModelManager
                self.last_trained_symbols = {s: datetime.now() for s in state.get('last_trained_symbols', [])} # Restore with current time
=======
                self.model_states.update(state.get("model_states", {}))
                self.model_weights = state.get("model_weights", self.model_weights)
                self.last_trained_symbols = {
                    s: datetime.now() for s in state.get("last_trained_symbols", [])
                }  # Restore with current time
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                logger.info(f"Orchestrator state loaded from {save_path}")
            except Exception as e:
                logger.warning(
                    f"Error loading orchestrator state from {save_path}: {e}"
                )
        else:
            logger.info("No saved orchestrator state found. Starting fresh.")

    def _load_ui_state(self):
        """Load UI state from file"""
        try:
            if os.path.exists(self.ui_state_file):
                with open(self.ui_state_file, "r") as f:
                    ui_state = json.load(f)
                    if "model_toggle_states" in ui_state:
                        # Normalize and sanitize loaded toggle states
                        loaded = {}
                        for raw_name, raw_state in ui_state["model_toggle_states"].items():
                            key = self._normalize_model_name(raw_name)
                            state = {
                                "inference_enabled": bool(raw_state.get("inference_enabled", True)) if isinstance(raw_state.get("inference_enabled", True), bool) else True,
                                "training_enabled": bool(raw_state.get("training_enabled", True)) if isinstance(raw_state.get("training_enabled", True), bool) else True,
                                "routing_enabled": bool(raw_state.get("routing_enabled", True)) if isinstance(raw_state.get("routing_enabled", True), bool) else True,
                            }
                            loaded[key] = state
                        # Merge into current defaults
                        for k, v in loaded.items():
                            if k not in self.model_toggle_states:
                                self.model_toggle_states[k] = v
                            else:
                                self.model_toggle_states[k].update(v)
                    logger.info(f"UI state loaded from {self.ui_state_file}")
        except Exception as e:
            logger.error(f"Error loading UI state: {e}")

    def _save_ui_state(self):
        """Save UI state to file"""
        try:
            os.makedirs(os.path.dirname(self.ui_state_file), exist_ok=True)
            ui_state = {
                "model_toggle_states": self.model_toggle_states,
                "timestamp": datetime.now().isoformat()
            }
            with open(self.ui_state_file, "w") as f:
                json.dump(ui_state, f, indent=4)
            logger.debug(f"UI state saved to {self.ui_state_file}")
            # Also append a session snapshot for persistence across restarts
            self._append_session_snapshot()
        except Exception as e:
            logger.error(f"Error saving UI state: {e}")

    def _append_session_snapshot(self):
        """Append current session metrics to persistent JSON until cleared manually."""
        try:
            session_file = os.path.join("data", "session_state.json")
            os.makedirs(os.path.dirname(session_file), exist_ok=True)

            # Load existing
            existing = {}
            if os.path.exists(session_file):
                try:
                    with open(session_file, "r", encoding="utf-8") as f:
                        existing = json.load(f) or {}
                except Exception:
                    existing = {}

            # Collect metrics
            balance = 0.0
            pnl_total = 0.0
            closed_trades = []
            try:
                if hasattr(self, "trading_executor") and self.trading_executor:
                    balance = float(getattr(self.trading_executor, "account_balance", 0.0) or 0.0)
                    if hasattr(self.trading_executor, "trade_history"):
                        for t in self.trading_executor.trade_history:
                            try:
                                closed_trades.append({
                                    "symbol": t.symbol,
                                    "side": t.side,
                                    "qty": t.quantity,
                                    "entry": t.entry_price,
                                    "exit": t.exit_price,
                                    "pnl": t.pnl,
                                    "timestamp": getattr(t, "timestamp", None)
                                })
                                pnl_total += float(t.pnl or 0.0)
                            except Exception:
                                continue
            except Exception:
                pass

            # Models and performance (best-effort)
            models = {}
            try:
                models = {
                    "dqn": {
                        "available": bool(getattr(self, "rl_agent", None)),
                        "last_losses": getattr(getattr(self, "rl_agent", None), "losses", [])[-10:] if getattr(getattr(self, "rl_agent", None), "losses", None) else []
                    },
                    "cnn": {
                        "available": bool(getattr(self, "cnn_model", None))
                    },
                    "cob_rl": {
                        "available": bool(getattr(self, "cob_rl_agent", None))
                    },
                    "decision_fusion": {
                        "available": bool(getattr(self, "decision_model", None))
                    }
                }
            except Exception:
                pass

            snapshot = {
                "timestamp": datetime.now().isoformat(),
                "balance": balance,
                "session_pnl": pnl_total,
                "closed_trades": closed_trades,
                "models": models
            }

            if "snapshots" not in existing:
                existing["snapshots"] = []
            existing["snapshots"].append(snapshot)

            with open(session_file, "w", encoding="utf-8") as f:
                json.dump(existing, f, indent=2)
        except Exception as e:
            logger.error(f"Error appending session snapshot: {e}")

    def get_model_toggle_state(self, model_name: str) -> Dict[str, bool]:
        """Get toggle state for a model"""
        key = self._normalize_model_name(model_name)
        return self.model_toggle_states.get(key, {"inference_enabled": True, "training_enabled": True, "routing_enabled": True})

    def set_model_toggle_state(self, model_name: str, inference_enabled: bool = None, training_enabled: bool = None, routing_enabled: bool = None):
        """Set toggle state for a model - Universal handler for any model"""
        key = self._normalize_model_name(model_name)
        # Initialize model toggle state if it doesn't exist
        if key not in self.model_toggle_states:
            self.model_toggle_states[key] = {"inference_enabled": True, "training_enabled": True, "routing_enabled": True}
            logger.info(f"Initialized toggle state for new model: {key}")
        
        # Update the toggle states
        if inference_enabled is not None:
            self.model_toggle_states[key]["inference_enabled"] = inference_enabled
        if training_enabled is not None:
            self.model_toggle_states[key]["training_enabled"] = training_enabled
        if routing_enabled is not None:
            self.model_toggle_states[key]["routing_enabled"] = routing_enabled
        
        # Save the updated state
        self._save_ui_state()
        
        # Log the change
        logger.info(f"Model {key} toggle state updated: inference={self.model_toggle_states[key]['inference_enabled']}, training={self.model_toggle_states[key]['training_enabled']}, routing={self.model_toggle_states[key].get('routing_enabled', True)}")
        
        # Notify any listeners about the toggle change
        self._notify_model_toggle_change(key, self.model_toggle_states[key])
    
    def _notify_model_toggle_change(self, model_name: str, toggle_state: Dict[str, bool]):
        """Notify components about model toggle changes"""
        try:
            # This can be extended to notify other components
            # For now, just log the change
            logger.debug(f"Model toggle change notification: {model_name} -> {toggle_state}")
            
        except Exception as e:
            logger.debug(f"Error notifying model toggle change: {e}")
    
    def register_model_dynamically(self, model_name: str, model_interface):
        """Register a new model dynamically and set up its toggle state"""
        try:
            # Register with model registry
            if self.model_registry.register_model(model_interface):
                # Initialize toggle state for the new model
                if model_name not in self.model_toggle_states:
                    self.model_toggle_states[model_name] = {
                        "inference_enabled": True, 
                        "training_enabled": True
                    }
                    logger.info(f"Registered new model dynamically: {model_name}")
                    self._save_ui_state()
                    return True
            return False
            
        except Exception as e:
            logger.error(f"Error registering model {model_name} dynamically: {e}")
            return False
    
    def get_all_registered_models(self):
        """Get all registered models from registry and toggle states"""
        try:
            all_models = {}
            
            # Get models from registry
            if hasattr(self, 'model_registry') and self.model_registry:
                registry_models = self.model_registry.get_all_models()
                all_models.update(registry_models)
            
            # Add any models that have toggle states but aren't in registry
            for model_name in self.model_toggle_states.keys():
                if model_name not in all_models:
                    all_models[model_name] = {
                        'name': model_name,
                        'type': 'toggle_only',
                        'registered': False
                    }
            
            return all_models
            
        except Exception as e:
            logger.error(f"Error getting all registered models: {e}")
            return {}

    def is_model_inference_enabled(self, model_name: str) -> bool:
        """Check if model inference is enabled"""
        key = self._normalize_model_name(model_name)
        return self.model_toggle_states.get(key, {}).get("inference_enabled", True)

    def is_model_training_enabled(self, model_name: str) -> bool:
        """Check if model training is enabled"""
        key = self._normalize_model_name(model_name)
        return self.model_toggle_states.get(key, {}).get("training_enabled", True)

    def is_model_routing_enabled(self, model_name: str) -> bool:
        """Check if model output should be routed into decision making"""
        key = self._normalize_model_name(model_name)
        return self.model_toggle_states.get(key, {}).get("routing_enabled", True)

    def set_model_routing_state(self, model_name: str, routing_enabled: bool):
        """Set routing state for a model"""
        key = self._normalize_model_name(model_name)
        self.set_model_toggle_state(key, routing_enabled=routing_enabled)

    def disable_decision_fusion_temporarily(self, reason: str = "overconfidence detected"):
        """Temporarily disable decision fusion model due to issues"""
        logger.warning(f"Disabling decision fusion model: {reason}")
        self.set_model_toggle_state("decision_fusion", inference_enabled=False, training_enabled=False)
        logger.info("Decision fusion model disabled. Will use programmatic decision combination.")

    def enable_decision_fusion(self):
        """Re-enable decision fusion model"""
        logger.info("Re-enabling decision fusion model")
        self.set_model_toggle_state("decision_fusion", inference_enabled=True, training_enabled=True)
        self.decision_fusion_overconfidence_count = 0  # Reset overconfidence counter

    def get_decision_fusion_status(self) -> Dict[str, Any]:
        """Get current decision fusion model status"""
        return {
            "enabled": self.decision_fusion_enabled,
            "mode": self.decision_fusion_mode,
            "inference_enabled": self.is_model_inference_enabled("decision_fusion"),
            "training_enabled": self.is_model_training_enabled("decision_fusion"),
            "network_available": self.decision_fusion_network is not None,
            "overconfidence_count": self.decision_fusion_overconfidence_count,
            "max_overconfidence_threshold": self.max_overconfidence_threshold
        }

    async def start_continuous_trading(self, symbols: Optional[List[str]] = None):
        """Start the continuous trading loop, using a decision model and trading executor"""
        if symbols is None:
            symbols = [self.symbol]  # Only trade the primary symbol

        if not self.realtime_processing_task:
            self.realtime_processing_task = asyncio.create_task(
                self._trading_decision_loop()
            )

        self.running = True
        logger.info(f"Starting continuous trading for symbols: {symbols}")

        # Initial decision making to kickstart the process
        for symbol in symbols:
            await self.make_trading_decision(symbol)
            await asyncio.sleep(0.5)  # Small delay between initial decisions

        self.trade_loop_task = asyncio.create_task(self._trading_decision_loop())
        logger.info("Continuous trading loop initiated.")

<<<<<<< HEAD
    # UNUSED FUNCTION - Not called anywhere in codebase
=======
    async def _trading_decision_loop(self):
        """Main trading decision loop"""
        logger.info("Trading decision loop started")
        while self.running:
            try:
                # Only make decisions for the primary trading symbol
                await self.make_trading_decision(self.symbol)
                await asyncio.sleep(1)

                await asyncio.sleep(self.decision_frequency)
            except Exception as e:
                logger.error(f"Error in trading decision loop: {e}")
                await asyncio.sleep(5)  # Wait before retrying

    def set_dashboard(self, dashboard):
        """Set the dashboard reference for callbacks"""
        self.dashboard = dashboard
        logger.info("Dashboard reference set in orchestrator")

    def capture_cnn_prediction(
        self,
        symbol: str,
        direction: int,
        confidence: float,
        current_price: float,
        predicted_price: float,
    ):
        """Capture CNN prediction for dashboard visualization"""
        try:
            prediction_data = {
                "timestamp": datetime.now(),
                "direction": direction,
                "confidence": confidence,
                "current_price": current_price,
                "predicted_price": predicted_price,
            }
            self.recent_cnn_predictions[symbol].append(prediction_data)
            logger.debug(
                f"CNN prediction captured for {symbol}: {direction} with confidence {confidence:.3f}"
            )
        except Exception as e:
            logger.debug(f"Error capturing CNN prediction: {e}")

    def capture_dqn_prediction(
        self,
        symbol: str,
        action: int,
        confidence: float,
        current_price: float,
        q_values: List[float],
    ):
        """Capture DQN prediction for dashboard visualization"""
        try:
            prediction_data = {
                "timestamp": datetime.now(),
                "action": action,
                "confidence": confidence,
                "current_price": current_price,
                "q_values": q_values,
            }
            self.recent_dqn_predictions[symbol].append(prediction_data)
            logger.debug(
                f"DQN prediction captured for {symbol}: action {action} with confidence {confidence:.3f}"
            )
        except Exception as e:
            logger.debug(f"Error capturing DQN prediction: {e}")

    def _get_current_price(self, symbol: str) -> Optional[float]:
        """Get current price for a symbol - using dedicated live price API"""
        try:
            # Use the new low-latency live price method from data provider
            if hasattr(self.data_provider, "get_live_price_from_api"):
                return self.data_provider.get_live_price_from_api(symbol)
            else:
                # Fallback to old method if not available
                return self.data_provider.get_current_price(symbol)
        except Exception as e:
            logger.error(f"Error getting current price for {symbol}: {e}")
            return None

    async def _generate_fallback_prediction(
        self, symbol: str, current_price: float
    ) -> Optional[Prediction]:
        """Generate a basic momentum-based fallback prediction when no models are available"""
        try:
            # Get simple price history for momentum calculation
            timeframes = ["1m", "5m", "15m"]

            momentum_signals = []
            for timeframe in timeframes:
                try:
                    # Use the correct method name for DataProvider
                    data = None
                    if hasattr(self.data_provider, "get_historical_data"):
                        data = self.data_provider.get_historical_data(
                            symbol, timeframe, limit=20
                        )
                    elif hasattr(self.data_provider, "get_candles"):
                        data = self.data_provider.get_candles(
                            symbol, timeframe, limit=20
                        )
                    elif hasattr(self.data_provider, "get_data"):
                        data = self.data_provider.get_data(symbol, timeframe, limit=20)

                    if data and len(data) >= 10:
                        # Handle different data formats
                        prices = []
                        if isinstance(data, list) and len(data) > 0:
                            if hasattr(data[0], "close"):
                                prices = [candle.close for candle in data[-10:]]
                            elif isinstance(data[0], dict) and "close" in data[0]:
                                prices = [candle["close"] for candle in data[-10:]]
                            elif (
                                isinstance(data[0], (list, tuple)) and len(data[0]) >= 5
                            ):
                                prices = [
                                    candle[4] for candle in data[-10:]
                                ]  # Assuming close is 5th element

                        if prices and len(prices) >= 10:
                            # Simple momentum: if recent price > average, bullish
                            recent_avg = sum(prices[-5:]) / 5
                            older_avg = sum(prices[:5]) / 5
                            momentum = (
                                (recent_avg - older_avg) / older_avg
                                if older_avg > 0
                                else 0
                            )
                            momentum_signals.append(momentum)
                except Exception:
                    continue

            if momentum_signals:
                avg_momentum = sum(momentum_signals) / len(momentum_signals)

                # Convert momentum to action
                if avg_momentum > 0.01:  # 1% positive momentum
                    action = "BUY"
                    confidence = min(0.7, abs(avg_momentum) * 10)
                elif avg_momentum < -0.01:  # 1% negative momentum
                    action = "SELL"
                    confidence = min(0.7, abs(avg_momentum) * 10)
                else:
                    action = "HOLD"
                    confidence = 0.5

                return Prediction(
                    action=action,
                    confidence=confidence,
                    probabilities={
                        "BUY": confidence if action == "BUY" else (1 - confidence) / 2,
                        "SELL": (
                            confidence if action == "SELL" else (1 - confidence) / 2
                        ),
                        "HOLD": (
                            confidence if action == "HOLD" else (1 - confidence) / 2
                        ),
                    },
                    timeframe="mixed",
                    timestamp=datetime.now(),
                    model_name="fallback_momentum",
                    metadata={
                        "momentum": avg_momentum,
                        "signals_count": len(momentum_signals),
                    },
                )

            return None

        except Exception as e:
            logger.debug(f"Error generating fallback prediction for {symbol}: {e}")
            return None

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def _initialize_cob_integration(self):
        """Initialize COB integration for real-time market microstructure data"""
        if COB_INTEGRATION_AVAILABLE and COBIntegration is not None:
            try:
                self.cob_integration = COBIntegration(
                    symbols=[self.symbol]
                    + self.ref_symbols,  # Primary + reference symbols
                    data_provider=self.data_provider,
                )
                logger.info("COB Integration initialized")

                # Register callbacks for COB data
                if hasattr(self.cob_integration, "add_cnn_callback"):
                    self.cob_integration.add_cnn_callback(self._on_cob_cnn_features)
                if hasattr(self.cob_integration, "add_dqn_callback"):
                    self.cob_integration.add_dqn_callback(self._on_cob_dqn_features)
                if hasattr(self.cob_integration, "add_dashboard_callback"):
                    self.cob_integration.add_dashboard_callback(
                        self._on_cob_dashboard_data
                    )

            except Exception as e:
                logger.warning(f"Failed to initialize COB Integration: {e}")
                self.cob_integration = None
        else:
            logger.warning(
                "COB Integration not available. Please install `cob_integration` module."
            )

    async def start_cob_integration(self):
        """Start the COB integration to begin streaming data"""
        if self.cob_integration and hasattr(self.cob_integration, "start"):
            try:
                logger.info("Attempting to start COB integration...")
                await self.cob_integration.start()
<<<<<<< HEAD
                logger.info("COB Integration streaming started successfully.")
=======
                logger.info("COB Integration started successfully.")
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            except Exception as e:
                logger.error(f"Failed to start COB integration: {e}")
        else:
            logger.warning(
                "COB Integration not initialized or start method not available."
            )

<<<<<<< HEAD
    # UNUSED FUNCTION - Not called anywhere in codebase
    def _start_cob_matrix_worker(self):
        """Start a background worker to continuously update COB matrices for models"""
        if not self.cob_integration:
            logger.warning("COB Integration not available, cannot start COB matrix worker.")
            return

        # UNUSED FUNCTION - Not called anywhere in codebase
        def matrix_worker():
            logger.info("COB Matrix Worker started.")
            while self.realtime_processing:
                try:
                    for symbol in self.symbols:
                        cob_snapshot = self.cob_integration.get_latest_cob_snapshot(symbol)
                        if cob_snapshot:
                            # Generate CNN features and update orchestrator's latest
                            cnn_features = self._generate_cob_cnn_features(symbol, cob_snapshot)
                            if cnn_features is not None:
                                self.latest_cob_features[symbol] = cnn_features

                            # Generate DQN state and update orchestrator's latest
                            dqn_state = self._generate_cob_dqn_features(symbol, cob_snapshot)
                            if dqn_state is not None:
                                self.latest_cob_state[symbol] = dqn_state

                            # Update COB feature history (for sequence models)
                            self.cob_feature_history[symbol].append({
                                'timestamp': cob_snapshot.timestamp,
                                'cnn_features': cnn_features.tolist() if cnn_features is not None and hasattr(cnn_features, 'tolist') else [],
                                'dqn_state': dqn_state.tolist() if dqn_state is not None and hasattr(dqn_state, 'tolist') else []
                            })
                            # Keep history within reasonable bounds
                            while len(self.cob_feature_history[symbol]) > 100:
                                self.cob_feature_history[symbol].pop(0)
                        else:
                            logger.debug(f"No COB snapshot available for {symbol}")
                    time.sleep(0.5) # Update every 0.5 seconds
                        
                except Exception as e:
                    logger.error(f"Error in COB matrix worker: {e}")
                    time.sleep(5) # Wait before retrying
        
        # Start the worker thread
        matrix_thread = threading.Thread(target=matrix_worker, daemon=True)
        matrix_thread.start()

    # UNUSED FUNCTION - Not called anywhere in codebase
    def _update_cob_matrix_for_symbol(self, symbol: str):
        """Updates the COB matrix and features for a specific symbol."""
        if not self.cob_integration:
            logger.warning("COB Integration not available, cannot update COB matrix.")
            return
        
        cob_snapshot = self.cob_integration.get_latest_cob_snapshot(symbol)
        if cob_snapshot:
            cnn_features = self._generate_cob_cnn_features(symbol, cob_snapshot)
            if cnn_features is not None:
                self.latest_cob_features[symbol] = cnn_features

            dqn_state = self._generate_cob_dqn_features(symbol, cob_snapshot)
            if dqn_state is not None:
                self.latest_cob_state[symbol] = dqn_state

            # Update COB feature history (for sequence models)
            self.cob_feature_history[symbol].append({
                'timestamp': cob_snapshot.timestamp,
                'cnn_features': cnn_features.tolist() if cnn_features is not None and hasattr(cnn_features, 'tolist') else [],
                'dqn_state': dqn_state.tolist() if dqn_state is not None and hasattr(dqn_state, 'tolist') else []
            })
            while len(self.cob_feature_history[symbol]) > 100:
                self.cob_feature_history[symbol].pop(0)
=======
    def _start_cob_integration_sync(self):
        """Start COB integration synchronously during initialization"""
        if self.cob_integration and hasattr(self.cob_integration, "start"):
            try:
                logger.info("Starting COB integration during initialization...")
                # If start is async, we need to run it in the event loop
                import asyncio

                try:
                    # Try to get current event loop
                    loop = asyncio.get_event_loop()
                    if loop.is_running():
                        # If loop is running, schedule the coroutine
                        asyncio.create_task(self.cob_integration.start())
                    else:
                        # If no loop is running, run it
                        loop.run_until_complete(self.cob_integration.start())
                except RuntimeError:
                    # No event loop, create one
                    asyncio.run(self.cob_integration.start())
                logger.info("COB Integration started during initialization")
            except Exception as e:
                logger.warning(
                    f"Failed to start COB integration during initialization: {e}"
                )
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        else:
            logger.debug("COB Integration not available for startup")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def _on_cob_cnn_features(self, symbol: str, cob_data: Dict):
        """Callback for when new COB CNN features are available"""
        if not self.realtime_processing:
            return
        try:
            # This is where you would feed the features to the CNN model for prediction
            # or store them for training. For now, we just log and store the latest.
            # self.latest_cob_features[symbol] = cob_data['features']
            # logger.debug(f"COB CNN features updated for {symbol}: {cob_data['features'][:5]}...")

            # If training is enabled, add to training data
            if self.training_enabled and self.enhanced_training_system:
                # Use a safe method check before calling
                if hasattr(self.enhanced_training_system, "add_cob_cnn_experience"):
                    self.enhanced_training_system.add_cob_cnn_experience(
                        symbol, cob_data
                    )

        except Exception as e:
            logger.error(f"Error in _on_cob_cnn_features for {symbol}: {e}")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def _on_cob_dqn_features(self, symbol: str, cob_data: Dict):
        """Callback for when new COB DQN features are available"""
        if not self.realtime_processing:
            return
        try:
            # Store the COB state for DQN model access
            if "state" in cob_data and cob_data["state"] is not None:
                self.latest_cob_state[symbol] = cob_data["state"]
                logger.debug(
                    f"COB DQN state updated for {symbol}: shape {np.array(cob_data['state']).shape}"
                )
            else:
                logger.warning(
                    f"COB data for {symbol} missing 'state' field: {list(cob_data.keys())}"
                )

            # If training is enabled, add to training data
            if self.training_enabled and self.enhanced_training_system:
                # Use a safe method check before calling
                if hasattr(self.enhanced_training_system, "add_cob_dqn_experience"):
                    self.enhanced_training_system.add_cob_dqn_experience(
                        symbol, cob_data
                    )

        except Exception as e:
            logger.error(f"Error in _on_cob_dqn_features for {symbol}: {e}")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def _on_cob_dashboard_data(self, symbol: str, cob_data: Dict):
        """Callback for when new COB data is available for the dashboard"""
        if not self.realtime_processing:
            return
        try:
            self.latest_cob_data[symbol] = cob_data

            # Invalidate data provider cache when new COB data arrives
            if hasattr(self.data_provider, "invalidate_ohlcv_cache"):
                self.data_provider.invalidate_ohlcv_cache(symbol)
                logger.debug(
                    f"Invalidated data provider cache for {symbol} due to COB update"
                )

            # Update dashboard
            if self.dashboard and hasattr(
                self.dashboard, "update_cob_data_from_orchestrator"
            ):
                self.dashboard.update_cob_data_from_orchestrator(symbol, cob_data)
                logger.debug(f"📊 Sent COB data for {symbol} to dashboard")
            else:
                logger.debug(
                    f"📊 No dashboard connected to receive COB data for {symbol}"
                )

        except Exception as e:
            logger.error(f"Error in _on_cob_dashboard_data for {symbol}: {e}")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_cob_features(self, symbol: str) -> Optional[np.ndarray]:
        """Get the latest COB features for CNN model"""
        return self.latest_cob_features.get(symbol)

    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_cob_state(self, symbol: str) -> Optional[np.ndarray]:
        """Get the latest COB state for DQN model"""
        return self.latest_cob_state.get(symbol)

<<<<<<< HEAD
    # SINGLE-USE FUNCTION - Called only once in codebase
    def get_cob_snapshot(self, symbol: str) -> Optional[COBSnapshot]:
=======
    def get_cob_snapshot(self, symbol: str):
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Get the latest raw COB snapshot for a symbol"""
        if self.cob_integration and hasattr(
            self.cob_integration, "get_latest_cob_snapshot"
        ):
            return self.cob_integration.get_latest_cob_snapshot(symbol)
        return None

<<<<<<< HEAD
    # SINGLE-USE FUNCTION - Called only once in codebase
    def get_cob_feature_matrix(self, symbol: str, sequence_length: int = 60) -> Optional[np.ndarray]:
=======
    def get_cob_feature_matrix(
        self, symbol: str, sequence_length: int = 60
    ) -> Optional[np.ndarray]:
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Get a sequence of COB CNN features for sequence models"""
        if (
            symbol not in self.cob_feature_history
            or not self.cob_feature_history[symbol]
        ):
            return None

        features = [
            item["cnn_features"] for item in list(self.cob_feature_history[symbol])
        ][-sequence_length:]
        if not features:
            return None

        # Pad or truncate to ensure consistent length and shape
        expected_feature_size = 102  # From _generate_cob_cnn_features
        padded_features = []
        for f in features:
            if len(f) < expected_feature_size:
                padded_features.append(
                    np.pad(f, (0, expected_feature_size - len(f)), "constant").tolist()
                )
            elif len(f) > expected_feature_size:
                padded_features.append(f[:expected_feature_size].tolist())
            else:
                padded_features.append(f)

        # Ensure we have the desired sequence length by padding with zeros if necessary
        if len(padded_features) < sequence_length:
            padding = [
                [0.0] * expected_feature_size
                for _ in range(sequence_length - len(padded_features))
            ]
            padded_features = padding + padded_features
<<<<<<< HEAD
            
        return np.array(padded_features[-sequence_length:]).astype(np.float32) # Ensure correct length
    
    # Model management methods removed - all handled by unified ModelManager
    # Use self.model_manager for all model operations
    
    # Weight normalization removed - handled by ModelManager
    
    # UNUSED FUNCTION - Not called anywhere in codebase
    def add_decision_callback(self, callback):
=======

        return np.array(padded_features[-sequence_length:]).astype(
            np.float32
        )  # Ensure correct length

    def _initialize_default_weights(self):
        """Initialize default model weights from config"""
        self.model_weights = {
            "CNN": self.config.orchestrator.get("cnn_weight", 0.7),
            "RL": self.config.orchestrator.get("rl_weight", 0.3),
        }

        # Add weights for specific models if they exist
        if hasattr(self, "cnn_model") and self.cnn_model:
            self.model_weights["enhanced_cnn"] = 0.4

        # Only add DQN agent weight if it exists
        if hasattr(self, "rl_agent") and self.rl_agent:
            self.model_weights["dqn_agent"] = 0.3

        # Add COB RL model weight if it exists (HIGHEST PRIORITY)
        if hasattr(self, "cob_rl_agent") and self.cob_rl_agent:
            self.model_weights["cob_rl_model"] = 0.4

        # Add extrema trainer weight if it exists
        if hasattr(self, "extrema_trainer") and self.extrema_trainer:
            self.model_weights["extrema_trainer"] = 0.15

    def register_model(
        self, model: ModelInterface, weight: Optional[float] = None
    ) -> bool:
        """Register a new model with the orchestrator"""
        try:
            # Register with model registry
            if not self.model_registry.register_model(model):
                return False

            # Set weight
            if weight is not None:
                self.model_weights[model.name] = weight
            elif model.name not in self.model_weights:
                self.model_weights[model.name] = (
                    0.1  # Default low weight for new models
                )

            # Initialize performance tracking
            if model.name not in self.model_performance:
                self.model_performance[model.name] = {
                    "correct": 0,
                    "total": 0,
                    "accuracy": 0.0,
                }

            # Initialize model statistics tracking
            if model.name not in self.model_statistics:
                self.model_statistics[model.name] = ModelStatistics(
                    model_name=model.name
                )
                logger.debug(f"Initialized statistics tracking for {model.name}")

            # Initialize last inference storage for this model
            if model.name not in self.last_inference:
                self.last_inference[model.name] = None
                logger.debug(f"Initialized last inference storage for {model.name}")

            logger.info(
                f"Registered {model.name} model with weight {self.model_weights[model.name]}"
            )
            self._normalize_weights()
            return True

        except Exception as e:
            logger.error(f"Error registering model {model.name}: {e}")
            return False
    def unregister_model(self, model_name: str) -> bool:
        """Unregister a model"""
        try:
            if self.model_registry.unregister_model(model_name):
                if model_name in self.model_weights:
                    del self.model_weights[model_name]
                if model_name in self.model_performance:
                    del self.model_performance[model_name]
                if model_name in self.model_statistics:
                    del self.model_statistics[model_name]

                self._normalize_weights()
                logger.info(f"Unregistered {model_name} model")
                return True
            return False

        except Exception as e:
            logger.error(f"Error unregistering model {model_name}: {e}")
            return False

    def _normalize_weights(self):
        """Normalize model weights to sum to 1.0"""
        total_weight = sum(self.model_weights.values())
        if total_weight > 0:
            for model_name in self.model_weights:
                self.model_weights[model_name] /= total_weight

    async def add_decision_callback(self, callback):
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Add a callback function to be called when decisions are made"""
        self.decision_callbacks.append(callback)
        logger.info(
            f"Decision callback registered: {callback.__name__ if hasattr(callback, '__name__') else 'unnamed'}"
        )
        return True

    async def make_trading_decision(self, symbol: str) -> Optional[TradingDecision]:
        """
        Make a trading decision for a symbol by combining all registered model outputs
        """
        try:
            current_time = datetime.now()

            # EXECUTE EVERY SIGNAL: Remove decision frequency limit
            # Allow immediate execution of every signal from the decision model
            logger.debug(f"Processing signal for {symbol} - no frequency limit applied")

            # Get current market data
            current_price = self.data_provider.get_current_price(symbol)
            if current_price is None:
                logger.warning(f"No current price available for {symbol}")
                return None

            # Get predictions from all registered models
            predictions = await self._get_all_predictions(symbol)

            if not predictions:
<<<<<<< HEAD
                # TODO(Guideline: no stubs / no synthetic data) Replace this short-circuit with a real aggregated signal path.
                logger.warning("No model predictions available for %s; skipping decision per guidelines", symbol)
                return None
=======
                # FALLBACK: Generate basic momentum signal when no models are available
                logger.debug(
                    f"No model predictions available for {symbol}, generating fallback signal"
                )
                fallback_prediction = await self._generate_fallback_prediction(
                    symbol, current_price
                )
                if fallback_prediction:
                    predictions = [fallback_prediction]
                else:
                    logger.debug(f"No fallback prediction available for {symbol}")
                    return None

            # NEW BEHAVIOR: Check inference and training toggle states separately
            decision_fusion_inference_enabled = self.is_model_inference_enabled("decision_fusion")
            decision_fusion_training_enabled = self.is_model_training_enabled("decision_fusion")
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            
            # If training is enabled, we should also inference the model for training purposes
            # but we may not use the predictions for actions/signals depending on inference toggle
            should_inference_for_training = decision_fusion_training_enabled and (
                self.decision_fusion_enabled
                and self.decision_fusion_mode == "neural"
                and self.decision_fusion_network is not None
            )
            
            # If inference is enabled, use neural decision fusion for actions
            if (
                should_inference_for_training
                and decision_fusion_inference_enabled
            ):
                # Use neural decision fusion for both training and actions
                logger.debug(f"Using neural decision fusion for {symbol} (inference enabled)")
                decision = self._make_decision_fusion_decision(
                    symbol=symbol,
                    predictions=predictions,
                    current_price=current_price,
                    timestamp=current_time,
                )
            elif should_inference_for_training and not decision_fusion_inference_enabled:
                # Inference for training only, but use programmatic for actions
                logger.info(f"Decision fusion inference disabled, using programmatic mode for {symbol} (training enabled)")
                
                # Make neural inference for training purposes only
                training_decision = self._make_decision_fusion_decision(
                    symbol=symbol,
                    predictions=predictions,
                    current_price=current_price,
                    timestamp=current_time,
                )
                
                # Store inference for decision fusion training
                self._store_decision_fusion_inference(
                    training_decision, predictions, current_price
                )
                
                # Use programmatic decision for actual actions
                decision = self._combine_predictions(
                    symbol=symbol,
                    price=current_price,
                    predictions=predictions,
                    timestamp=current_time,
                )
            else:
                # Use programmatic decision combination (no neural inference)
                if not decision_fusion_inference_enabled and not decision_fusion_training_enabled:
                    logger.info(f"Decision fusion model disabled (inference and training off), using programmatic mode for {symbol}")
                else:
                    logger.debug(f"Using programmatic decision combination for {symbol}")
                
                decision = self._combine_predictions(
                    symbol=symbol,
                    price=current_price,
                    predictions=predictions,
                    timestamp=current_time,
                )
                
                # Train decision fusion model even in programmatic mode if training is enabled
                if (decision_fusion_training_enabled and 
                    self.decision_fusion_enabled and 
                    self.decision_fusion_network is not None):
                    
                    # Store inference for decision fusion (like other models)
                    self._store_decision_fusion_inference(
                        decision, predictions, current_price
                    )
                    
                    # Train fusion model in programmatic mode at regular intervals
                    self.decision_fusion_decisions_count += 1
                    if (self.decision_fusion_decisions_count % self.decision_fusion_training_interval == 0 and
                        len(self.decision_fusion_training_data) >= self.decision_fusion_min_samples):
                        
                        logger.info(f"Training decision fusion model in programmatic mode (decision #{self.decision_fusion_decisions_count})")
                        asyncio.create_task(self._train_decision_fusion_programmatic())

            # Update state
            self.last_decision_time[symbol] = current_time
            if symbol not in self.recent_decisions:
                self.recent_decisions[symbol] = []
            self.recent_decisions[symbol].append(decision)

            # Keep only recent decisions (last 100)
            if len(self.recent_decisions[symbol]) > 100:
                self.recent_decisions[symbol] = self.recent_decisions[symbol][-100:]

            # Call decision callbacks
            for callback in self.decision_callbacks:
                try:
                    await callback(decision)
                except Exception as e:
                    logger.error(f"Error in decision callback: {e}")
<<<<<<< HEAD
            
            # Model cleanup handled by ModelManager
            
=======

            # Add training samples based on current market conditions
            await self._add_training_samples_from_predictions(
                symbol, predictions, current_price
            )

            # Clean up memory periodically
            if len(self.recent_decisions[symbol]) % 20 == 0:  # Reduced from 50 to 20
                self.model_registry.cleanup_all_models()

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            return decision

        except Exception as e:
            logger.error(f"Error making trading decision for {symbol}: {e}")
            return None

    async def _add_training_samples_from_predictions(
        self, symbol: str, predictions: List[Prediction], current_price: float
    ):
        """Add training samples to models based on current predictions and market conditions"""
        try:
            # Get recent price data to evaluate if predictions would be correct
            # Use available methods from data provider
            try:
                # Try to get recent prices using get_price_at_index
                recent_prices = []
                for i in range(10):
                    price = self.data_provider.get_price_at_index(symbol, i, '1m')
                    if price is not None:
                        recent_prices.append(price)
                    else:
                        break
                
                if len(recent_prices) < 2:
                    # Fallback: use current price and a small assumed change
                    price_change_pct = 0.1  # Assume small positive change
                else:
                    # Calculate recent price change
                    price_change_pct = (
                        (current_price - recent_prices[-2]) / recent_prices[-2] * 100
                    )
            except Exception as e:
                logger.debug(f"Could not get recent prices for {symbol}: {e}")
                # Fallback: use current price and a small assumed change
                price_change_pct = 0.1  # Assume small positive change

            # Get current position P&L for sophisticated reward calculation
            current_position_pnl = self._get_current_position_pnl(symbol)
            has_position = self._has_open_position(symbol)

            # Add training samples for CNN predictions using sophisticated reward system
            for prediction in predictions:
                if "cnn" in prediction.model_name.lower():
                    # Extract price vector information if available
                    predicted_price_vector = None
                    if hasattr(prediction, 'price_direction') and prediction.price_direction:
                        predicted_price_vector = prediction.price_direction
                    elif hasattr(prediction, 'metadata') and prediction.metadata and 'price_direction' in prediction.metadata:
                        predicted_price_vector = prediction.metadata['price_direction']
                    
                    # Calculate sophisticated reward using the new PnL penalty/reward system
                    sophisticated_reward, was_correct = self._calculate_sophisticated_reward(
                        predicted_action=prediction.action,
                        prediction_confidence=prediction.confidence,
                        price_change_pct=price_change_pct,
                        time_diff_minutes=1.0,  # Assume 1 minute for now
                        has_price_prediction=False,
                        symbol=symbol,
                        has_position=has_position,
                        current_position_pnl=current_position_pnl,
                        predicted_price_vector=predicted_price_vector
                    )

                    # Create training record for the new training system
                    training_record = {
                        "symbol": symbol,
                        "model_name": prediction.model_name,
                        "action": prediction.action,
                        "confidence": prediction.confidence,
                        "timestamp": prediction.timestamp,
                        "current_price": current_price,
                        "price_change_pct": price_change_pct,
                        "was_correct": was_correct,
                        "sophisticated_reward": sophisticated_reward,
                        "current_position_pnl": current_position_pnl,
                        "has_position": has_position
                    }

                    # Use the new training system instead of old cnn_adapter
                    if hasattr(self, "cnn_model") and self.cnn_model:
                        # Train CNN model directly using the new system
                        training_success = await self._train_cnn_model(
                            model=self.cnn_model,
                            model_name=prediction.model_name,
                            record=training_record,
                            prediction={"action": prediction.action, "confidence": prediction.confidence},
                            reward=sophisticated_reward
                        )
                        
                        if training_success:
                            logger.debug(
                                f"CNN training completed: action={prediction.action}, reward={sophisticated_reward:.3f}, "
                                f"price_change={price_change_pct:.2f}%, was_correct={was_correct}, "
                                f"position_pnl={current_position_pnl:.2f}"
                            )
                        else:
                            logger.warning(f"CNN training failed for {prediction.model_name}")
                    
                    # Also try training through model registry if available
                    elif self.model_registry and prediction.model_name in self.model_registry.models:
                        model = self.model_registry.models[prediction.model_name]
                        training_success = await self._train_cnn_model(
                            model=model,
                            model_name=prediction.model_name,
                            record=training_record,
                            prediction={"action": prediction.action, "confidence": prediction.confidence},
                            reward=sophisticated_reward
                        )
                        
                        if training_success:
                            logger.debug(
                                f"CNN training via registry completed: {prediction.model_name}, "
                                f"reward={sophisticated_reward:.3f}, was_correct={was_correct}"
                            )
                        else:
                            logger.warning(f"CNN training via registry failed for {prediction.model_name}")

        except Exception as e:
            logger.error(f"Error adding training samples from predictions: {e}")
            import traceback
            logger.error(f"Traceback: {traceback.format_exc()}")

    async def _get_all_predictions(self, symbol: str) -> List[Prediction]:
<<<<<<< HEAD
        """Get predictions from all registered models via ModelManager"""
        # TODO(Guideline: remove stubs / integrate existing code) Implement ModelManager-driven prediction aggregation.
        raise RuntimeError("_get_all_predictions requires a real ModelManager integration (guideline: no stubs / no synthetic data).")
    
    async def _get_cnn_predictions(self, model: CNNModelInterface, symbol: str) -> List[Prediction]:
        """Get CNN predictions for multiple timeframes"""
        predictions = []
        
        try:
            # Get predictions for different timeframes
            timeframes = ['1m', '5m', '1h']
            
            for timeframe in timeframes:
                try:
                    # Get features from data provider 
                    features = self.data_provider.get_cnn_features_for_inference(symbol, timeframe, window_size=60)
                    
                    if features is not None and len(features) > 0:
                        # Get prediction from model
                        prediction_result = await model.predict(features)
                        
                        if prediction_result:
                            prediction = Prediction(
                                model_name=f"CNN_{timeframe}",
                                symbol=symbol,
                                signal=prediction_result.get('signal', 'HOLD'),
                                confidence=prediction_result.get('confidence', 0.0),
                                reasoning=f"CNN {timeframe} prediction",
                                features=features[:10].tolist() if len(features) > 10 else features.tolist(),
                                metadata={'timeframe': timeframe}
                            )
                            predictions.append(prediction)
                            
                            # Store prediction in database for tracking
                            if (hasattr(self, 'enhanced_training_system') and 
                                self.enhanced_training_system and 
                                hasattr(self.enhanced_training_system, 'store_model_prediction')):
                                
                                current_price = self._get_current_price_safe(symbol)
                                if current_price > 0:
                                    prediction_id = self.enhanced_training_system.store_model_prediction(
                                        model_name=f"CNN_{timeframe}",
                                        symbol=symbol,
                                        prediction_type=prediction.signal,
                                        confidence=prediction.confidence,
                                        current_price=current_price
                                    )
                                    logger.debug(f"Stored CNN prediction {prediction_id} for {symbol} {timeframe}")
                            
                except Exception as e:
                    logger.debug(f"Error getting CNN prediction for {symbol} {timeframe}: {e}")
                    continue
                    
        except Exception as e:
            logger.error(f"Error in CNN predictions for {symbol}: {e}")
        
        return predictions
    
    def _get_current_price_safe(self, symbol: str) -> float:
        """Safely get current price for a symbol"""
        try:
            # Try to get from data provider
            if hasattr(self.data_provider, 'get_latest_data'):
                latest = self.data_provider.get_latest_data(symbol)
                if latest and 'close' in latest:
                    return float(latest['close'])
            
            # Fallback values
            fallback_prices = {'ETH/USDT': 4300.0, 'BTC/USDT': 111000.0}
            return fallback_prices.get(symbol, 1000.0)
            
        except Exception as e:
            logger.debug(f"Error getting current price for {symbol}: {e}")
            return 0.0
    
    async def _get_cob_rl_prediction(self, model: COBRLModelInterface, symbol: str) -> Optional[Prediction]:
        """Get prediction from COB RL model"""
        try:
            # Get COB state from current market data
            cob_state = self._get_cob_state(symbol)
            if cob_state is None:
                return None
            
            # Get prediction from COB RL model
            if hasattr(model.model, 'act_with_confidence'):
                result = model.model.act_with_confidence(cob_state)
                if len(result) == 2:
=======
        """Get predictions from all registered models with input data storage"""
        predictions = []
        current_time = datetime.now()

        # Get the standard model input data once for all models
        # Prefer standardized input if available; fallback to legacy builder
        if hasattr(self.data_provider, "get_base_data_input"):
            base_data = self.data_provider.get_base_data_input(symbol)
        else:
            base_data = self.data_provider.build_base_data_input(symbol)
        if not base_data:
            logger.warning(f"Cannot build BaseDataInput for predictions: {symbol}")
            return predictions

        # Validate base_data has proper feature vector
        if hasattr(base_data, "get_feature_vector"):
            try:
                feature_vector = base_data.get_feature_vector()
                if feature_vector is None or (
                    isinstance(feature_vector, np.ndarray) and feature_vector.size == 0
                ):
                    logger.warning(
                        f"BaseDataInput has empty feature vector for {symbol}"
                    )
                    return predictions
            except Exception as e:
                logger.warning(
                    f"Error getting feature vector from BaseDataInput for {symbol}: {e}"
                )
                return predictions

        # log all registered models
        logger.debug(f"inferencing registered models: {self.model_registry.models}")

        for model_name, model in self.model_registry.models.items():
            try:
                # Respect inference toggle: skip inference entirely when disabled
                if not self.is_model_inference_enabled(model_name):
                    logger.debug(f"Inference disabled for {model_name}; skipping model call")
                    continue
                prediction = None
                model_input = base_data  # Use the same base data for all models

                # Track inference start time for statistics
                inference_start_time = time.time()

                if isinstance(model, CNNModelInterface):
                    # Get CNN predictions using the pre-built base data
                    cnn_predictions = await self._get_cnn_predictions(
                        model, symbol, base_data
                    )
                    inference_duration_ms = (time.time() - inference_start_time) * 1000
                    predictions.extend(cnn_predictions)
                    # Update statistics for CNN predictions
                    if cnn_predictions:
                        for cnn_pred in cnn_predictions:
                            self._update_model_statistics(
                                model_name,
                                cnn_pred,
                                inference_duration_ms=inference_duration_ms,
                            )
                            # Save audit image of inputs used for this inference
                            try:
                                from utils.audit_plotter import save_inference_audit_image
                                save_inference_audit_image(base_data, model_name=model_name, symbol=symbol, out_root="audit_inputs")
                            except Exception as _audit_ex:
                                logger.debug(f"Audit image save skipped: {str(_audit_ex)}")
                            await self._store_inference_data_async(
                                model_name, model_input, cnn_pred, current_time, symbol
                            )
                    else:
                        # Still update statistics even if no predictions (for timing)
                        self._update_model_statistics(
                            model_name, inference_duration_ms=inference_duration_ms
                        )

                elif isinstance(model, RLAgentInterface):
                    # Get RL prediction using the pre-built base data
                    rl_prediction = await self._get_rl_prediction(
                        model, symbol, base_data
                    )
                    inference_duration_ms = (time.time() - inference_start_time) * 1000
                    if rl_prediction:
                        predictions.append(rl_prediction)
                        prediction = rl_prediction
                        # Update statistics for RL prediction
                        self._update_model_statistics(
                            model_name,
                            prediction,
                            inference_duration_ms=inference_duration_ms,
                        )
                        # Save audit image of inputs used for this inference
                        try:
                            from utils.audit_plotter import save_inference_audit_image
                            save_inference_audit_image(base_data, model_name=model_name, symbol=symbol, out_root="audit_inputs")
                        except Exception as _audit_ex:
                            logger.debug(f"Audit image save skipped: {str(_audit_ex)}")
                        # Store input data for RL
                        await self._store_inference_data_async(
                            model_name, model_input, prediction, current_time, symbol
                        )
                    else:
                        # Still update statistics even if no prediction (for timing)
                        self._update_model_statistics(
                            model_name, inference_duration_ms=inference_duration_ms
                        )

                else:
                    # Generic model interface using the pre-built base data
                    generic_prediction = await self._get_generic_prediction(
                        model, symbol, base_data
                    )
                    inference_duration_ms = (time.time() - inference_start_time) * 1000
                    if generic_prediction:
                        predictions.append(generic_prediction)
                        prediction = generic_prediction
                        # Update statistics for generic prediction
                        self._update_model_statistics(
                            model_name,
                            prediction,
                            inference_duration_ms=inference_duration_ms,
                        )
                        # Save audit image of inputs used for this inference
                        try:
                            from utils.audit_plotter import save_inference_audit_image
                            save_inference_audit_image(base_data, model_name=model_name, symbol=symbol, out_root="audit_inputs")
                        except Exception as _audit_ex:
                            logger.debug(f"Audit image save skipped: {str(_audit_ex)}")
                        # Store input data for generic model
                        await self._store_inference_data_async(
                            model_name, model_input, prediction, current_time, symbol
                        )
                    else:
                        # Still update statistics even if no prediction (for timing)
                        self._update_model_statistics(
                            model_name, inference_duration_ms=inference_duration_ms
                        )

            except Exception as e:
                inference_duration_ms = (time.time() - inference_start_time) * 1000
                logger.error(f"Error getting prediction from {model_name}: {e}")
                # Still update statistics for failed inference (for timing)
                self._update_model_statistics(
                    model_name, inference_duration_ms=inference_duration_ms
                )
                continue

        # Note: Training is now triggered immediately within each prediction method
        # when previous inference data exists, rather than after all predictions

        return predictions

    def _update_model_statistics(
        self,
        model_name: str,
        prediction: Optional[Prediction] = None,
        loss: Optional[float] = None,
        inference_duration_ms: Optional[float] = None,
    ):
        """Update statistics for a specific model"""
        try:
            if model_name not in self.model_statistics:
                self.model_statistics[model_name] = ModelStatistics(
                    model_name=model_name
                )

            # Update the statistics
            self.model_statistics[model_name].update_inference_stats(
                prediction, loss, inference_duration_ms
            )

            # Log statistics periodically (every 10 inferences)
            stats = self.model_statistics[model_name]
            if stats.total_inferences % 10 == 0:
                last_prediction_str = (
                    stats.last_prediction
                    if stats.last_prediction is not None
                    else "None"
                )
                last_confidence_str = (
                    f"{stats.last_confidence:.3f}"
                    if stats.last_confidence is not None
                    else "N/A"
                )
                logger.debug(
                    f"Model {model_name} stats: {stats.total_inferences} inferences, "
                    f"{stats.inference_rate_per_minute:.1f}/min, "
                    f"avg: {stats.average_inference_time_ms:.1f}ms, "
                    f"last: {last_prediction_str} ({last_confidence_str})"
                )

        except Exception as e:
            logger.error(f"Error updating statistics for {model_name}: {e}")

    def _update_model_training_statistics(
        self,
        model_name: str,
        loss: Optional[float] = None,
        training_duration_ms: Optional[float] = None,
    ):
        """Update training statistics for a specific model"""
        try:
            if model_name not in self.model_statistics:
                self.model_statistics[model_name] = ModelStatistics(
                    model_name=model_name
                )

            # Update the training statistics
            self.model_statistics[model_name].update_training_stats(
                loss, training_duration_ms
            )

            # Log training statistics periodically (every 5 trainings)
            stats = self.model_statistics[model_name]
            if stats.total_trainings % 5 == 0:
                logger.debug(
                    f"Model {model_name} training stats: {stats.total_trainings} trainings, "
                    f"{stats.training_rate_per_minute:.1f}/min, "
                    f"avg: {stats.average_training_time_ms:.1f}ms, "
                    f"loss: {stats.current_loss:.4f}"
                    if stats.current_loss
                    else "loss: N/A"
                )

        except Exception as e:
            logger.error(f"Error updating training statistics for {model_name}: {e}")

    def get_model_statistics(
        self, model_name: Optional[str] = None
    ) -> Union[Dict[str, ModelStatistics], ModelStatistics, None]:
        """Get statistics for a specific model or all models"""
        try:
            if model_name:
                return self.model_statistics.get(model_name)
            else:
                return self.model_statistics.copy()
        except Exception as e:
            logger.error(f"Error getting model statistics: {e}")
            return None

    def get_decision_fusion_performance(self) -> Dict[str, Any]:
        """Get decision fusion model performance metrics"""
        try:
            if "decision_fusion" not in self.model_statistics:
                return {
                    "enabled": self.decision_fusion_enabled,
                    "mode": self.decision_fusion_mode,
                    "status": "not_initialized"
                }
            
            stats = self.model_statistics["decision_fusion"]
            
            # Calculate performance metrics
            performance_data = {
                "enabled": self.decision_fusion_enabled,
                "mode": self.decision_fusion_mode,
                "status": "active",
                "total_decisions": stats.total_inferences,
                "total_trainings": stats.total_trainings,
                "current_loss": stats.current_loss,
                "average_loss": stats.average_loss,
                "best_loss": stats.best_loss,
                "worst_loss": stats.worst_loss,
                "last_training_time": stats.last_training_time.isoformat() if stats.last_training_time else None,
                "last_inference_time": stats.last_inference_time.isoformat() if stats.last_inference_time else None,
                "training_rate_per_minute": stats.training_rate_per_minute,
                "inference_rate_per_minute": stats.inference_rate_per_minute,
                "average_training_time_ms": stats.average_training_time_ms,
                "average_inference_time_ms": stats.average_inference_time_ms
            }
            
            # Calculate performance score
            if stats.average_loss is not None:
                performance_data["performance_score"] = max(0.0, 1.0 - stats.average_loss)
            else:
                performance_data["performance_score"] = 0.0
            
            # Add recent predictions
            if stats.predictions_history:
                recent_predictions = list(stats.predictions_history)[-10:]
                performance_data["recent_predictions"] = [
                    {
                        "action": pred["action"],
                        "confidence": pred["confidence"],
                        "timestamp": pred["timestamp"].isoformat()
                    }
                    for pred in recent_predictions
                ]
            
            return performance_data
            
        except Exception as e:
            logger.error(f"Error getting decision fusion performance: {e}")
            return {
                "enabled": self.decision_fusion_enabled,
                "mode": self.decision_fusion_mode,
                "status": "error",
                "error": str(e)
            }

    def get_model_statistics_summary(self) -> Dict[str, Dict[str, Any]]:
        """Get a summary of all model statistics in a serializable format"""
        try:
            summary = {}
            for model_name, stats in self.model_statistics.items():
                summary[model_name] = {
                    "last_inference_time": (
                        stats.last_inference_time.isoformat()
                        if stats.last_inference_time
                        else None
                    ),
                    "last_training_time": (
                        stats.last_training_time.isoformat()
                        if stats.last_training_time
                        else None
                    ),
                    "total_inferences": stats.total_inferences,
                    "total_trainings": stats.total_trainings,
                    "inference_rate_per_minute": round(
                        stats.inference_rate_per_minute, 2
                    ),
                    "inference_rate_per_second": round(
                        stats.inference_rate_per_second, 4
                    ),
                    "training_rate_per_minute": round(
                        stats.training_rate_per_minute, 2
                    ),
                    "training_rate_per_second": round(
                        stats.training_rate_per_second, 4
                    ),
                    "average_inference_time_ms": round(
                        stats.average_inference_time_ms, 2
                    ),
                    "average_training_time_ms": round(
                        stats.average_training_time_ms, 2
                    ),
                    "current_loss": (
                        round(stats.current_loss, 6)
                        if stats.current_loss is not None
                        else None
                    ),
                    "average_loss": (
                        round(stats.average_loss, 6)
                        if stats.average_loss is not None
                        else None
                    ),
                    "best_loss": (
                        round(stats.best_loss, 6)
                        if stats.best_loss is not None
                        else None
                    ),
                    "worst_loss": (
                        round(stats.worst_loss, 6)
                        if stats.worst_loss is not None
                        else None
                    ),
                    "accuracy": (
                        round(stats.accuracy, 4) if stats.accuracy is not None else None
                    ),
                    "last_prediction": stats.last_prediction,
                    "last_confidence": (
                        round(stats.last_confidence, 4)
                        if stats.last_confidence is not None
                        else None
                    ),
                    "recent_predictions_count": len(stats.predictions_history),
                    "recent_losses_count": len(stats.losses),
                }
            return summary
        except Exception as e:
            logger.error(f"Error getting model statistics summary: {e}")
            return {}

    def log_model_statistics(self, detailed: bool = False):
        """Log current model statistics for monitoring"""
        try:
            if not self.model_statistics:
                logger.info("No model statistics available")
                return

            logger.info("=== Model Statistics Summary ===")
            for model_name, stats in self.model_statistics.items():
                if detailed:
                    logger.info(f"{model_name}:")
                    logger.info(
                        f"  Total inferences: {stats.total_inferences} (avg: {stats.average_inference_time_ms:.1f}ms)"
                    )
                    logger.info(
                        f"  Total trainings: {stats.total_trainings} (avg: {stats.average_training_time_ms:.1f}ms)"
                    )
                    logger.info(
                        f"  Inference rate: {stats.inference_rate_per_minute:.1f}/min ({stats.inference_rate_per_second:.3f}/sec)"
                    )
                    logger.info(
                        f"  Training rate: {stats.training_rate_per_minute:.1f}/min ({stats.training_rate_per_second:.3f}/sec)"
                    )
                    logger.info(f"  Last inference: {stats.last_inference_time}")
                    logger.info(f"  Last training: {stats.last_training_time}")
                    logger.info(
                        f"  Current loss: {stats.current_loss:.6f}"
                        if stats.current_loss
                        else "  Current loss: N/A"
                    )
                    logger.info(
                        f"  Average loss: {stats.average_loss:.6f}"
                        if stats.average_loss
                        else "  Average loss: N/A"
                    )
                    logger.info(
                        f"  Best loss: {stats.best_loss:.6f}"
                        if stats.best_loss
                        else "  Best loss: N/A"
                    )
                    logger.info(
                        f"  Last prediction: {stats.last_prediction} ({stats.last_confidence:.3f})"
                        if stats.last_prediction
                        else "  Last prediction: N/A"
                    )
                else:
                    inf_rate_str = f"{stats.inference_rate_per_minute:.1f}/min"
                    train_rate_str = (
                        f"{stats.training_rate_per_minute:.1f}/min"
                        if stats.total_trainings > 0
                        else "0/min"
                    )
                    inf_time_str = (
                        f"{stats.average_inference_time_ms:.1f}ms"
                        if stats.average_inference_time_ms > 0
                        else "N/A"
                    )
                    train_time_str = (
                        f"{stats.average_training_time_ms:.1f}ms"
                        if stats.average_training_time_ms > 0
                        else "N/A"
                    )
                    loss_str = (
                        f"{stats.current_loss:.4f}" if stats.current_loss else "N/A"
                    )
                    pred_str = (
                        f"{stats.last_prediction}({stats.last_confidence:.2f})"
                        if stats.last_prediction
                        else "N/A"
                    )
                    logger.info(
                        f"{model_name}: Inf: {stats.total_inferences}@{inf_time_str} ({inf_rate_str}) | "
                        f"Train: {stats.total_trainings}@{train_time_str} ({train_rate_str}) | "
                        f"Loss: {loss_str} | Last: {pred_str}"
                    )

        except Exception as e:
            logger.error(f"Error logging model statistics: {e}")

        # Log decision fusion performance specifically
        if self.decision_fusion_enabled:
            fusion_perf = self.get_decision_fusion_performance()
            if fusion_perf.get("status") == "active":
                logger.info("=== Decision Fusion Performance ===")
                logger.info(f"Mode: {fusion_perf.get('mode', 'unknown')}")
                logger.info(f"Total decisions: {fusion_perf.get('total_decisions', 0)}")
                logger.info(f"Total trainings: {fusion_perf.get('total_trainings', 0)}")
                current_loss = fusion_perf.get('current_loss')
                avg_loss = fusion_perf.get('average_loss')
                perf_score = fusion_perf.get('performance_score', 0)
                train_rate = fusion_perf.get('training_rate_per_minute', 0)
                
                logger.info(f"Current loss: {current_loss:.4f}" if current_loss is not None else "Current loss: N/A")
                logger.info(f"Average loss: {avg_loss:.4f}" if avg_loss is not None else "Average loss: N/A")
                logger.info(f"Performance score: {perf_score:.3f}")
                logger.info(f"Training rate: {train_rate:.2f}/min")

    async def _store_inference_data_async(
        self,
        model_name: str,
        model_input: Any,
        prediction: Prediction,
        timestamp: datetime,
        symbol: str = None,
    ):
        """Store last inference in memory and all inferences to database for future training"""
        try:
            logger.debug(
                f"Storing inference for {model_name}: {prediction.action} (confidence: {prediction.confidence:.3f})"
            )

            # Validate model_input before storing
            if model_input is None:
                logger.warning(
                    f"Skipping inference storage for {model_name}: model_input is None"
                )
                return

            if isinstance(model_input, dict) and not model_input:
                logger.warning(
                    f"Skipping inference storage for {model_name}: model_input is empty dict"
                )
                return

            # Extract symbol from prediction if not provided
            if symbol is None:
                symbol = getattr(
                    prediction, "symbol", "ETH/USDT"
                )  # Default to ETH/USDT if not available

            # Get current price at inference time
            current_price = self._get_current_price(symbol)

            # Create inference record - store only what's needed for training
            inference_record = {
                "timestamp": timestamp.isoformat(),
                "symbol": symbol,
                "model_name": model_name,
                "model_input": model_input,
                "prediction": {
                    "action": prediction.action,
                    "confidence": prediction.confidence,
                    "probabilities": prediction.probabilities,
                    "timeframe": prediction.timeframe,
                },
                "metadata": prediction.metadata or {},
                "training_outcome": None,  # Will be set when training occurs
                "outcome_evaluated": False,
                "inference_price": current_price,  # Store price at inference time
            }

            # Store only the last inference per model (for immediate training)
            self.last_inference[model_name] = inference_record

            # Push into in-memory recent buffer immediately
            try:
                if model_name not in self.recent_inferences:
                    self.recent_inferences[model_name] = deque(maxlen=self.recent_inference_maxlen)
                self.recent_inferences[model_name].append(inference_record)
            except Exception as e:
                logger.debug(f"Unable to append to recent buffer for {model_name}: {e}")

            # Also save to database using database manager for future training and analysis
            asyncio.create_task(
                self._save_to_database_manager_async(model_name, inference_record)
            )

            logger.debug(
                f"Stored last inference for {model_name} and queued database save"
            )

        except Exception as e:
            logger.error(f"Error storing inference data for {model_name}: {e}")

    async def _save_to_database_manager_async(
        self, model_name: str, inference_record: Dict
    ):
        """Save inference record using DatabaseManager for future training"""
        import hashlib
        import asyncio

        def save_to_db():
            try:
                # Extract data from inference record
                prediction = inference_record.get("prediction", {})
                symbol = inference_record.get("symbol", "ETH/USDT")
                timestamp_str = inference_record.get("timestamp", "")

                # Parse timestamp
                if isinstance(timestamp_str, str):
                    timestamp = datetime.fromisoformat(timestamp_str)
                else:
                    timestamp = timestamp_str

                # Create hash of input features for deduplication
                model_input = inference_record.get("model_input")
                input_features_hash = "unknown"
                input_features_array = None

                if model_input is not None:
                    # Convert to numpy array if possible
                    try:
                        if hasattr(model_input, "numpy"):  # PyTorch tensor
                            input_features_array = model_input.detach().cpu().numpy()
                        elif isinstance(model_input, np.ndarray):
                            input_features_array = model_input
                        elif isinstance(model_input, (list, tuple)):
                            input_features_array = np.array(model_input)

                        # Create hash of the input features
                        if input_features_array is not None:
                            input_features_hash = hashlib.md5(
                                input_features_array.tobytes()
                            ).hexdigest()[:16]
                    except Exception as e:
                        logger.debug(
                            f"Could not process input features for hashing: {e}"
                        )

                # Create InferenceRecord using the database manager's structure
                from utils.database_manager import InferenceRecord

                db_record = InferenceRecord(
                    model_name=model_name,
                    timestamp=timestamp,
                    symbol=symbol,
                    action=prediction.get("action", "HOLD"),
                    confidence=prediction.get("confidence", 0.0),
                    probabilities=prediction.get("probabilities", {}),
                    input_features_hash=input_features_hash,
                    processing_time_ms=0.0,  # We don't track this in orchestrator
                    memory_usage_mb=0.0,  # We don't track this in orchestrator
                    input_features=input_features_array,
                    checkpoint_id=None,
                    metadata=inference_record.get("metadata", {}),
                )

                # Log using database manager
                success = self.db_manager.log_inference(db_record)

                if success:
                    logger.debug(f"Saved inference to database for {model_name}")
                else:
                    logger.warning(
                        f"Failed to save inference to database for {model_name}"
                    )

            except Exception as e:
                logger.error(f"Error saving to database manager: {e}")

        # Run database operation in thread pool to avoid blocking
        await asyncio.get_event_loop().run_in_executor(None, save_to_db)

        # Note: in-memory recent buffer is appended in _store_inference_data_async

    def get_last_inference_status(self) -> Dict[str, Any]:
        """Get status of last inferences for all models"""
        status = {}
        for model_name, inference in self.last_inference.items():
            if inference:
                status[model_name] = {
                    "timestamp": inference.get("timestamp"),
                    "symbol": inference.get("symbol"),
                    "action": inference.get("prediction", {}).get("action"),
                    "confidence": inference.get("prediction", {}).get("confidence"),
                    "outcome_evaluated": inference.get("outcome_evaluated", False),
                    "training_outcome": inference.get("training_outcome"),
                }
            else:
                status[model_name] = None
        return status

    def get_training_data_from_db(
        self,
        model_name: str,
        symbol: str = None,
        hours_back: int = 24,
        limit: int = 1000,
    ) -> List[Dict]:
        """Get inference records for training from database manager"""
        try:
            # Use database manager's method specifically for training data
            db_records = self.db_manager.get_inference_records_for_training(
                model_name=model_name, symbol=symbol, hours_back=hours_back, limit=limit
            )

            # Convert to our format
            records = []
            for db_record in db_records:
                try:
                    record = {
                        "model_name": db_record.model_name,
                        "symbol": db_record.symbol,
                        "timestamp": db_record.timestamp.isoformat(),
                        "prediction": {
                            "action": db_record.action,
                            "confidence": db_record.confidence,
                            "probabilities": db_record.probabilities,
                            "timeframe": "1m",
                        },
                        "metadata": db_record.metadata or {},
                        "model_input": db_record.input_features,  # Full input features for training
                        "input_features_hash": db_record.input_features_hash,
                    }
                    records.append(record)
                except Exception as e:
                    logger.warning(f"Skipping malformed training record: {e}")
                    continue

            logger.info(f"Retrieved {len(records)} training records for {model_name}")
            return records

        except Exception as e:
            logger.error(f"Error getting training data from database: {e}")
            return []

    def _prepare_cnn_input_data(
        self, ohlcv_data: Dict, cob_data: Any, technical_indicators: Dict
    ) -> torch.Tensor:
        """Prepare standardized input data for CNN models with proper GPU device placement"""
        try:
            # Create feature matrix from OHLCV data
            features = []

            # Add OHLCV features for each timeframe
            for tf in ["1s", "1m", "1h", "1d"]:
                if tf in ohlcv_data and not ohlcv_data[tf].empty:
                    df = ohlcv_data[tf].tail(50)  # Last 50 bars
                    features.extend(
                        [
                            df["close"].pct_change().fillna(0).values,
                            (
                                df["volume"].values / df["volume"].max()
                                if df["volume"].max() > 0
                                else np.zeros(len(df))
                            ),
                        ]
                    )

            # Add technical indicators
            for key, value in technical_indicators.items():
                if not np.isnan(value):
                    features.append([value])

            # Flatten and pad/truncate to standard size
            if features:
                feature_array = np.concatenate(
                    [np.array(f).flatten() for f in features]
                )
                # Pad or truncate to 300 features
                if len(feature_array) < 300:
                    feature_array = np.pad(
                        feature_array, (0, 300 - len(feature_array)), "constant"
                    )
                else:
                    feature_array = feature_array[:300]
                # Convert to tensor and move to GPU
                return torch.tensor(
                    feature_array.reshape(1, -1),
                    dtype=torch.float32,
                    device=self.device,
                )
            else:
                # Return zero tensor on GPU
                return torch.zeros((1, 300), dtype=torch.float32, device=self.device)

        except Exception as e:
            logger.error(f"Error preparing CNN input data: {e}")
            return torch.zeros((1, 300), dtype=torch.float32, device=self.device)

    def _prepare_rl_input_data(
        self, ohlcv_data: Dict, cob_data: Any, technical_indicators: Dict
    ) -> torch.Tensor:
        """Prepare standardized input data for RL models with proper GPU device placement"""
        try:
            # Create state representation
            state_features = []

            # Add price and volume features
            if "1m" in ohlcv_data and not ohlcv_data["1m"].empty:
                df = ohlcv_data["1m"].tail(20)
                state_features.extend(
                    [
                        df["close"].pct_change().fillna(0).values,
                        df["volume"].pct_change().fillna(0).values,
                        (df["high"] - df["low"]) / df["close"],  # Volatility proxy
                    ]
                )

            # Add technical indicators
            for key, value in technical_indicators.items():
                if not np.isnan(value):
                    state_features.append(value)

            # Flatten and standardize size
            if state_features:
                state_array = np.concatenate(
                    [np.array(f).flatten() for f in state_features]
                )
                # Pad or truncate to expected RL state size
                expected_size = 100  # Adjust based on your RL model
                if len(state_array) < expected_size:
                    state_array = np.pad(
                        state_array, (0, expected_size - len(state_array)), "constant"
                    )
                else:
                    state_array = state_array[:expected_size]
                # Convert to tensor and move to GPU
                return torch.tensor(
                    state_array, dtype=torch.float32, device=self.device
                )
            else:
                # Return zero tensor on GPU
                return torch.zeros(100, dtype=torch.float32, device=self.device)

        except Exception as e:
            logger.error(f"Error preparing RL input data: {e}")
            return torch.zeros(100, dtype=torch.float32, device=self.device)

    def _store_inference_data(
        self,
        symbol: str,
        model_name: str,
        model_input: Any,
        prediction: Prediction,
        timestamp: datetime,
    ):
        """Store comprehensive inference data for future training with persistent storage"""
        try:
            # Get current market context for complete replay capability
            current_price = self.data_provider.get_current_price(symbol)

            # Create comprehensive inference record with ALL data needed for model replay
            inference_record = {
                "timestamp": timestamp,
                "symbol": symbol,
                "model_name": model_name,
                "current_price": current_price,
                # Complete model input data
                "model_input": {
                    "raw_input": model_input,
                    "input_shape": (
                        model_input.shape if hasattr(model_input, "shape") else None
                    ),
                    "input_type": str(type(model_input)),
                },
                # Complete prediction data
                "prediction": {
                    "action": prediction.action,
                    "confidence": prediction.confidence,
                    "probabilities": prediction.probabilities,
                    "timeframe": prediction.timeframe,
                },
                # Market context at prediction time
                "market_context": {
                    "price": current_price,
                    "timestamp": timestamp.isoformat(),
                    "symbol": symbol,
                },
                # Model metadata
                "metadata": {
                    "model_metadata": prediction.metadata or {},
                    "orchestrator_state": {
                        "confidence_threshold": self.confidence_threshold,
                        "training_enabled": self.training_enabled,
                    },
                },
                # Training outcome (will be filled later)
                "training_outcome": None,
                "outcome_evaluated": False,
            }

            # Store only the last inference per model (for immediate training)
            self.last_inference[model_name] = inference_record

            # Also save to database using database manager for future training (run in background)
            asyncio.create_task(
                self._save_to_database_manager_async(model_name, inference_record)
            )

            logger.debug(
                f"Stored last inference for {model_name} on {symbol} and queued database save"
            )

        except Exception as e:
            logger.error(f"Error storing inference data: {e}")

    def get_model_training_data(
        self, model_name: str, symbol: str = None
    ) -> List[Dict]:
        """Get training data for a specific model"""
        try:
            training_data = []

            # Use database manager to get training data
            training_data = self.get_training_data_from_db(model_name, symbol)

            logger.info(
                f"Retrieved {len(training_data)} training records for {model_name}"
            )
            return training_data

        except Exception as e:
            logger.error(f"Error getting model training data: {e}")
            return []

    async def _trigger_immediate_training_for_model(self, model_name: str, symbol: str):
        """Trigger immediate training for a specific model with previous inference data"""
        try:
            if model_name not in self.last_inference:
                logger.debug(f"No previous inference data for {model_name}")
                return

            inference_record = self.last_inference[model_name]

            # Skip if already evaluated
            if inference_record.get("outcome_evaluated", False):
                logger.debug(f"Skipping {model_name} - already evaluated")
                return

            # Get current price for outcome evaluation
            current_price = self._get_current_price(symbol)
            if current_price is None:
                logger.warning(
                    f"Cannot get current price for {symbol}, skipping immediate training for {model_name}"
                )
                return

            logger.info(
                f"Triggering immediate training for {model_name} with current price: {current_price}"
            )

            # Before evaluating the single record, compute a short-horizon direction vector
            # from recent inferences and attach to the prediction for vector supervision.
            try:
                vector = self._compute_recent_direction_vector(model_name, symbol)
                if vector is not None:
                    inference_record.setdefault("prediction", {})["price_direction"] = vector
            except Exception as e:
                logger.debug(f"Vector computation failed for {model_name}: {e}")

            # Evaluate the previous prediction and train the model immediately
            await self._evaluate_and_train_on_record(inference_record, current_price)

            # Log predicted vs actual outcome
            prediction = inference_record.get("prediction", {})
            predicted_action = prediction.get("action", "UNKNOWN")
            predicted_confidence = prediction.get("confidence", 0.0)

            # Calculate actual outcome
            symbol = inference_record.get("symbol", "ETH/USDT")
            predicted_price = None
            actual_price_change_pct = 0.0

            # Try to get price direction vectors from metadata (new format)
            if "price_direction" in prediction and prediction["price_direction"]:
                try:
                    price_direction_data = prediction["price_direction"]
                    # Process price direction data
                    if (
                        isinstance(price_direction_data, dict)
                        and "direction" in price_direction_data
                    ):
                        direction = price_direction_data["direction"]
                        confidence = price_direction_data.get("confidence", 1.0)

                        # Convert direction to price change percentage
                        # Scale by confidence and direction strength
                        predicted_price_change_pct = (
                            direction * confidence * 0.02
                        )  # 2% max change
                        predicted_price = current_price * (
                            1 + predicted_price_change_pct
                        )
                except Exception as e:
                    logger.debug(f"Error processing price direction data: {e}")

            # Fallback to old price prediction format
            elif "price_prediction" in prediction and prediction["price_prediction"]:
                try:
                    price_prediction_data = prediction["price_prediction"]
                    if (
                        isinstance(price_prediction_data, list)
                        and len(price_prediction_data) > 0
                    ):
                        predicted_price_change_pct = (
                            float(price_prediction_data[0]) * 0.01
                        )
                        predicted_price = current_price * (
                            1 + predicted_price_change_pct
                        )
                except Exception:
                    pass

            # Get inference price and timestamp from record
            inference_price = inference_record.get("inference_price")
            timestamp = inference_record.get("timestamp")

            if isinstance(timestamp, str):
                timestamp = datetime.fromisoformat(timestamp)

            time_diff_seconds = (datetime.now() - timestamp).total_seconds()
            actual_price_change_pct = 0.0

            # Use stored inference price for comparison
            if inference_price is not None:
                actual_price_change_pct = (
                    (current_price - inference_price) / inference_price * 100
                )

                # Use seconds-based comparison for short-lived predictions
                if time_diff_seconds <= 60:  # Within 1 minute
                    price_outcome = f"Inference: ${inference_price:.2f} ({time_diff_seconds:.1f}s ago) -> Current: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
                else:
                    # For older predictions, use a more conservative approach
                    price_outcome = f"Inference: ${inference_price:.2f} ({time_diff_seconds:.1f}s ago) -> Current: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
            else:
                # Fall back to historical price comparison if no inference price
                try:
                    historical_data = self.data_provider.get_historical_data(
                        symbol, "1m", limit=10
                    )
                    if historical_data is not None and not historical_data.empty:
                        historical_price = historical_data["close"].iloc[-1]
                        actual_price_change_pct = (
                            (current_price - historical_price) / historical_price * 100
                        )
                        price_outcome = f"Historical: ${historical_price:.2f} -> Current: ${current_price:.2f} ({actual_price_change_pct:+.2f}%)"
                    else:
                        price_outcome = (
                            f"Current: ${current_price:.2f} (no historical data)"
                        )
                except Exception as e:
                    logger.warning(f"Error calculating price change: {e}")
                    price_outcome = f"Current: ${current_price:.2f} (calculation error)"

            # Determine if prediction was correct based on predicted direction and actual price movement
            was_correct = False

            # Get predicted direction from the inference record
            predicted_direction = None
            if "price_direction" in prediction and prediction["price_direction"]:
                try:
                    price_direction_data = prediction["price_direction"]
                    if (
                        isinstance(price_direction_data, dict)
                        and "direction" in price_direction_data
                    ):
                        predicted_direction = price_direction_data["direction"]
                except Exception as e:
                    logger.debug(f"Error extracting predicted direction: {e}")

            # Evaluate based on predicted direction if available
            if predicted_direction is not None:
                # Use the predicted direction (-1 to 1) to determine correctness
                if (
                    predicted_direction > 0.1 and actual_price_change_pct > 0.1
                ):  # Predicted UP, price went UP
                    was_correct = True
                elif (
                    predicted_direction < -0.1 and actual_price_change_pct < -0.1
                ):  # Predicted DOWN, price went DOWN
                    was_correct = True
                elif (
                    abs(predicted_direction) <= 0.1
                    and abs(actual_price_change_pct) < 0.5
                ):  # Predicted SIDEWAYS, price stayed stable
                    was_correct = True
            else:
                # Fallback to action-based evaluation
                if (
                    predicted_action == "BUY" and actual_price_change_pct > 0.1
                ):  # Price went up
                    was_correct = True
                elif (
                    predicted_action == "SELL" and actual_price_change_pct < -0.1
                ):  # Price went down
                    was_correct = True
                elif (
                    predicted_action == "HOLD" and abs(actual_price_change_pct) < 0.5
                ):  # Price stayed stable
                    was_correct = True

            outcome_status = "CORRECT" if was_correct else "INCORRECT"

            # Get model statistics for enhanced logging
            model_stats = self.get_model_statistics(model_name)
            current_loss = model_stats.current_loss if model_stats else None
            best_loss = model_stats.best_loss if model_stats else None
            avg_loss = model_stats.average_loss if model_stats else None

            # Calculate reward for logging
            current_pnl = self._get_current_position_pnl(self.symbol)
            
            # Extract price vector from prediction metadata if available
            predicted_price_vector = None
            if "price_direction" in prediction and prediction["price_direction"]:
                predicted_price_vector = prediction["price_direction"]
            
            reward, _ = self._calculate_sophisticated_reward(
                predicted_action,
                predicted_confidence,
                actual_price_change_pct,
                time_diff_seconds / 60,  # Convert to minutes
                has_price_prediction=predicted_price is not None,
                symbol=self.symbol,
                current_position_pnl=current_pnl,
                predicted_price_vector=predicted_price_vector,
            )

            # Enhanced logging with detailed information
            logger.info(
                f"Completed immediate training for {model_name} - {outcome_status}"
            )
            logger.info(
                f"  Prediction: {predicted_action} (confidence: {predicted_confidence:.3f})"
            )
            logger.info(f"  {price_outcome}")
            logger.info(f"  Reward: {reward:.4f} | Time: {time_diff_seconds:.1f}s")

            # Safe formatting for loss values
            current_loss_str = (
                f"{current_loss:.4f}" if current_loss is not None else "N/A"
            )
            best_loss_str = f"{best_loss:.4f}" if best_loss is not None else "N/A"
            avg_loss_str = f"{avg_loss:.4f}" if avg_loss is not None else "N/A"
            logger.info(
                f"  Loss: {current_loss_str} | Best: {best_loss_str} | Avg: {avg_loss_str}"
            )
            logger.info(f"  Outcome: {outcome_status}")

            # Add comprehensive performance summary
            if model_name in self.model_performance:
                perf = self.model_performance[model_name]
                logger.info(
                    f"  Performance: {perf['directional_accuracy']:.1%} directional ({perf['directional_correct']}/{perf['total']}) | "
                    f"{perf['accuracy']:.1%} profitable ({perf['correct']}/{perf['total']})"
                )
                if perf["pivot_attempted"] > 0:
                    logger.info(
                        f"  Pivot Detection: {perf['pivot_accuracy']:.1%} ({perf['pivot_detected']}/{perf['pivot_attempted']})"
                    )

        except Exception as e:
            logger.error(f"Error in immediate training for {model_name}: {e}")
    async def _evaluate_and_train_on_record(self, record: Dict, current_price: float):
        """Evaluate prediction outcome and train model"""
        try:
            model_name = record["model_name"]
            prediction = record.get("prediction") or {}
            timestamp = record["timestamp"]

            # Convert timestamp string back to datetime if needed
            if isinstance(timestamp, str):
                timestamp = datetime.fromisoformat(timestamp)

            # Get inference price and calculate time difference
            inference_price = record.get("inference_price")
            time_diff_seconds = (datetime.now() - timestamp).total_seconds()
            time_diff_minutes = time_diff_seconds / 60  # minutes

            # Use stored inference price for comparison
            symbol = record["symbol"]
            price_change_pct = 0.0

            if inference_price is not None:
                price_change_pct = (
                    (current_price - inference_price) / inference_price * 100
                )
                logger.debug(
                    f"Using stored inference price: ${inference_price:.2f} ({time_diff_seconds:.1f}s ago) -> ${current_price:.2f} ({price_change_pct:+.2f}%)"
                )
            else:
                # Fall back to historical data if no inference price stored
                try:
                    historical_data = self.data_provider.get_historical_data(
                        symbol, "1m", limit=10
                    )
                    if historical_data is not None and not historical_data.empty:
                        historical_price = historical_data["close"].iloc[-1]
                        price_change_pct = (
                            (current_price - historical_price) / historical_price * 100
                        )
                        logger.debug(
                            f"Using historical price comparison: ${historical_price:.2f} -> ${current_price:.2f} ({price_change_pct:+.2f}%)"
                        )
                    else:
                        logger.warning(f"No historical data available for {symbol}")
                        return
                except Exception as e:
                    logger.warning(f"Error calculating price change: {e}")
                    return

            # Enhanced reward system based on prediction confidence and price movement magnitude
            predicted_action = prediction.get("action", "HOLD")
            prediction_confidence = prediction.get("confidence", 0.5)

            # Calculate sophisticated reward based on multiple factors
            current_pnl = self._get_current_position_pnl(symbol)
            
            # Extract price vector from prediction metadata if available
            predicted_price_vector = None
            if "price_direction" in prediction and prediction["price_direction"]:
                predicted_price_vector = prediction["price_direction"]
            
            reward, was_correct = self._calculate_sophisticated_reward(
                predicted_action,
                prediction_confidence,
                price_change_pct,
                time_diff_minutes,
                inference_price is not None,  # Add price prediction flag
                symbol,  # Pass symbol for position lookup
                None,  # Let method determine position status
                current_position_pnl=current_pnl,
                predicted_price_vector=predicted_price_vector,
            )

            # Initialize enhanced model performance tracking
            if model_name not in self.model_performance:
                self.model_performance[model_name] = {
                    "correct": 0,  # Profitability accuracy (backwards compatible)
                    "total": 0,
                    "accuracy": 0.0,  # Profitability accuracy (backwards compatible)
                    "directional_correct": 0,  # NEW: Directional accuracy
                    "directional_accuracy": 0.0,  # NEW: Directional accuracy %
                    "pivot_detected": 0,  # NEW: Successful pivot detections
                    "pivot_attempted": 0,  # NEW: Total pivot attempts
                    "pivot_accuracy": 0.0,  # NEW: Pivot detection accuracy
                    "price_predictions": {"total": 0, "accurate": 0, "avg_error": 0.0},
                }

            # Ensure all new keys exist (for existing models)
            perf = self.model_performance[model_name]
            if "directional_correct" not in perf:
                perf["directional_correct"] = 0
                perf["directional_accuracy"] = 0.0
                perf["pivot_detected"] = 0
                perf["pivot_attempted"] = 0
                perf["pivot_accuracy"] = 0.0

            # Ensure price_predictions key exists
            if "price_predictions" not in perf:
                perf["price_predictions"] = {"total": 0, "accurate": 0, "avg_error": 0.0}

            # Calculate directional accuracy separately
            directional_correct = (
                (predicted_action == "BUY" and price_change_pct > 0) or
                (predicted_action == "SELL" and price_change_pct < 0) or
                (predicted_action == "HOLD" and abs(price_change_pct) < 0.05)
            )

            # Update all accuracy metrics
            perf["total"] += 1
            if was_correct:  # Profitability accuracy
                perf["correct"] += 1
            if directional_correct:
                perf["directional_correct"] += 1
            
            # Update pivot detection tracking
            is_significant_move = abs(price_change_pct) > 0.08  # 0.08% threshold for "significant"
            if predicted_action in ["BUY", "SELL"] and is_significant_move:
                perf["pivot_attempted"] += 1
                if directional_correct:
                    perf["pivot_detected"] += 1

            # Calculate all accuracy percentages
            perf["accuracy"] = perf["correct"] / perf["total"]  # Profitability accuracy
            perf["directional_accuracy"] = perf["directional_correct"] / perf["total"]  # Directional accuracy
            if perf["pivot_attempted"] > 0:
                perf["pivot_accuracy"] = perf["pivot_detected"] / perf["pivot_attempted"]  # Pivot accuracy
            else:
                perf["pivot_accuracy"] = 0.0

            # Track price prediction accuracy if available
            if inference_price is not None:
                price_prediction_stats = self.model_performance[model_name][
                    "price_predictions"
                ]
                price_prediction_stats["total"] += 1

                # Calculate prediction error
                prediction_error_pct = abs(price_change_pct)
                price_prediction_stats["avg_error"] = (
                    price_prediction_stats["avg_error"]
                    * (price_prediction_stats["total"] - 1)
                    + prediction_error_pct
                ) / price_prediction_stats["total"]

                # Consider prediction accurate if error < 1%
                if prediction_error_pct < 1.0:
                    price_prediction_stats["accurate"] += 1

                logger.debug(
                    f"Price prediction accuracy for {model_name}: "
                    f"{price_prediction_stats['accurate']}/{price_prediction_stats['total']} "
                    f"({price_prediction_stats['avg_error']:.2f}% avg error)"
                )

            # Enhanced logging with new accuracy metrics
            perf = self.model_performance[model_name]
            logger.info(f"Training evaluation for {model_name}:")
            logger.info(
                f"  Action: {predicted_action} | Confidence: {prediction_confidence:.3f}"
            )
            logger.info(
                f"  Price change: {price_change_pct:+.3f}% | Time: {time_diff_seconds:.1f}s"
            )
            logger.info(f"  Reward: {reward:.4f} | Profitable: {was_correct} | Directional: {directional_correct}")
            logger.info(
                f"  Profitability: {perf['accuracy']:.1%} ({perf['correct']}/{perf['total']}) | "
                f"Directional: {perf['directional_accuracy']:.1%} ({perf['directional_correct']}/{perf['total']})"
            )
            if perf["pivot_attempted"] > 0:
                logger.info(
                    f"  Pivot Detection: {perf['pivot_accuracy']:.1%} ({perf['pivot_detected']}/{perf['pivot_attempted']})"
                )

            # Train the specific model based on sophisticated outcome
            await self._train_model_on_outcome(
                record, was_correct, price_change_pct, reward
            )

            # Mark this inference as evaluated to prevent re-training
            if (
                model_name in self.last_inference
                and self.last_inference[model_name] == record
            ):
                self.last_inference[model_name]["outcome_evaluated"] = True
                self.last_inference[model_name]["training_outcome"] = {
                    "was_correct": was_correct,
                    "reward": reward,
                    "price_change_pct": price_change_pct,
                    "evaluated_at": datetime.now().isoformat(),
                }

            price_pred_info = (
                f"inference: ${inference_price:.2f}"
                if inference_price is not None
                else "no inference price"
            )
            logger.debug(
                f"Evaluated {model_name} prediction: {'✓' if was_correct else '✗'} "
                f"({prediction['action']}, {price_change_pct:.2f}% change, "
                f"confidence: {prediction_confidence:.3f}, {price_pred_info}, reward: {reward:.3f})"
            )

        except Exception as e:
            logger.error(f"Error evaluating and training on record: {e}")

    def _is_pivot_point(self, price_change_pct: float, prediction_confidence: float, time_diff_minutes: float) -> tuple[bool, str, float]:
        """
        Detect if this is a significant pivot point worth trading.
        Pivot points are the key moments where markets change direction or momentum.
        
        Returns:
            tuple: (is_pivot, pivot_type, pivot_strength)
        """
        abs_change = abs(price_change_pct)
        
        # Pivot point thresholds (much more realistic for crypto)
        minor_pivot = 0.08   # 0.08% - small but tradeable pivot
        medium_pivot = 0.25  # 0.25% - significant pivot
        major_pivot = 0.6    # 0.6% - major pivot
        massive_pivot = 1.2  # 1.2% - massive pivot
        
        # Time-based multipliers (faster pivots are more valuable)
        time_multiplier = 1.0
        if time_diff_minutes < 2.0:      # Very fast pivot
            time_multiplier = 2.0
        elif time_diff_minutes < 5.0:    # Fast pivot
            time_multiplier = 1.5
        elif time_diff_minutes > 15.0:   # Slow pivot - less valuable
            time_multiplier = 0.7
            
        # Confidence multiplier (high confidence pivots are more valuable)
        confidence_multiplier = 0.5 + (prediction_confidence * 1.5)  # 0.5 to 2.0
        
        if abs_change >= massive_pivot:
            return True, "MASSIVE_PIVOT", 10.0 * time_multiplier * confidence_multiplier
        elif abs_change >= major_pivot:
            return True, "MAJOR_PIVOT", 5.0 * time_multiplier * confidence_multiplier
        elif abs_change >= medium_pivot:
            return True, "MEDIUM_PIVOT", 2.5 * time_multiplier * confidence_multiplier
        elif abs_change >= minor_pivot:
            return True, "MINOR_PIVOT", 1.2 * time_multiplier * confidence_multiplier
        else:
            return False, "NO_PIVOT", 0.1  # Very small reward for noise
    
    def _calculate_sophisticated_reward(
        self,
        predicted_action: str,
        prediction_confidence: float,
        price_change_pct: float,
        time_diff_minutes: float,
        has_price_prediction: bool = False,
        symbol: str = None,
        has_position: bool = None,
        current_position_pnl: float = 0.0,
        predicted_price_vector: dict = None,
    ) -> tuple[float, bool]:
        """
        PIVOT-POINT FOCUSED REWARD SYSTEM
        
        This system heavily rewards models for correctly identifying pivot points - 
        the actual profitable trading opportunities in the market. Small movements 
        are treated as noise and given minimal rewards.
        
        Key Features:
        - Separate directional accuracy vs profitability accuracy tracking
        - Heavy rewards for successful pivot point detection
        - Minimal penalties for noise (small movements)
        - Time-weighted rewards (faster detection = better)
        - Confidence-weighted rewards (higher confidence = better)
        
        Args:
            predicted_action: The predicted action ('BUY', 'SELL', 'HOLD')
            prediction_confidence: Model's confidence in the prediction (0.0 to 1.0)
            price_change_pct: Actual price change percentage
            time_diff_minutes: Time elapsed since prediction
            has_price_prediction: Whether the model made a price prediction
            symbol: Trading symbol (for position lookup)
            has_position: Whether we currently have a position (if None, will be looked up)
            current_position_pnl: Current unrealized P&L of open position (0.0 if no position)
            predicted_price_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1)

        Returns:
            tuple: (reward, directional_correct, profitability_correct, pivot_detected)
        """
        try:
            # Store original action for directional accuracy tracking
            original_action = predicted_action
            
            # PIVOT POINT DETECTION
            is_pivot, pivot_type, pivot_strength = self._is_pivot_point(
                price_change_pct, prediction_confidence, time_diff_minutes
            )
            
            # DIRECTIONAL ACCURACY (simple direction prediction)
            directional_correct = False
            if predicted_action == "BUY" and price_change_pct > 0:
                directional_correct = True
            elif predicted_action == "SELL" and price_change_pct < 0:
                directional_correct = True
            elif predicted_action == "HOLD" and abs(price_change_pct) < 0.05:  # Very small movement
                directional_correct = True
                
            # PROFITABILITY ACCURACY (fee-aware profitable trades)
            fee_cost = 0.10  # 0.10% round trip fee cost (realistic for most exchanges)
            profitability_correct = False
            
            if predicted_action == "BUY" and price_change_pct > fee_cost:
                profitability_correct = True
            elif predicted_action == "SELL" and price_change_pct < -fee_cost:
                profitability_correct = True
            elif predicted_action == "HOLD" and abs(price_change_pct) < fee_cost:
                profitability_correct = True

            # Determine current position status if not provided
            if has_position is None and symbol:
                has_position = self._has_open_position(symbol)
                # Get current position P&L if we have a position
                if has_position and current_position_pnl == 0.0:
                    current_position_pnl = self._get_current_position_pnl(symbol)
            elif has_position is None:
                has_position = False

            # PIVOT POINT REWARD CALCULATION
            base_reward = 0.0
            pivot_bonus = 0.0
            
            # For backwards compatibility, use profitability_correct as the main "was_correct"
            was_correct = profitability_correct
            
            # MASSIVE REWARDS FOR SUCCESSFUL PIVOT POINT DETECTION
            if is_pivot and directional_correct:
                # Base pivot reward
                base_reward = pivot_strength
                
                # EXTRAORDINARY bonuses for successful pivot predictions
                if pivot_type == "MASSIVE_PIVOT":
                    pivot_bonus = 50.0 * prediction_confidence  # Up to 50x reward!
                    logger.info(f"MASSIVE PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
                elif pivot_type == "MAJOR_PIVOT":
                    pivot_bonus = 20.0 * prediction_confidence  # Up to 20x reward!
                    logger.info(f"MAJOR PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
                elif pivot_type == "MEDIUM_PIVOT":
                    pivot_bonus = 8.0 * prediction_confidence   # Up to 8x reward!
                    logger.info(f"MEDIUM PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
                elif pivot_type == "MINOR_PIVOT":
                    pivot_bonus = 3.0 * prediction_confidence   # Up to 3x reward!
                    logger.info(f"MINOR PIVOT SUCCESS: {pivot_type} detected with {prediction_confidence:.2f} confidence = {pivot_bonus:.1f}x bonus!")
                    
                # Additional time-based bonus for early detection
                if time_diff_minutes < 1.0:
                    time_bonus = pivot_bonus * 0.5  # 50% bonus for very fast detection
                    pivot_bonus += time_bonus
                    logger.info(f"EARLY DETECTION BONUS: Detected {pivot_type} in {time_diff_minutes:.1f}m = +{time_bonus:.1f} bonus")
                
                base_reward += pivot_bonus
                
            elif is_pivot and not directional_correct:
                # MODERATE penalty for missing pivot points (still valuable to learn from)
                base_reward = -pivot_strength * 0.3  # Small penalty to encourage learning
                logger.debug(f"MISSED PIVOT: {pivot_type} missed, small penalty = {base_reward:.2f}")
                
            elif not is_pivot and directional_correct:
                # Small reward for correct direction on non-pivots (noise)
                base_reward = 0.2 * prediction_confidence
                logger.debug(f"NOISE CORRECT: Correct direction on noise movement = {base_reward:.2f}")
                
            else:
                # Very small penalty for wrong direction on noise (don't overtrain on noise)
                base_reward = -0.1 * prediction_confidence
                logger.debug(f"NOISE INCORRECT: Wrong direction on noise movement = {base_reward:.2f}")
            
            # POSITION-AWARE ADJUSTMENTS (conviction-aware; learned bias via reward shaping)
            if has_position:
                # Derive conviction from prediction_confidence (0..1)
                conviction = max(0.0, min(1.0, float(prediction_confidence)))
                # Estimate expected move magnitude if provided by vector; else 0
                expected_move_pct = 0.0
                try:
                    if predicted_price_vector and isinstance(predicted_price_vector, dict):
                        # Accept either a normalized magnitude or compute from price fields if present
                        if 'expected_move_pct' in predicted_price_vector:
                            expected_move_pct = float(predicted_price_vector.get('expected_move_pct', 0.0))
                        elif 'predicted_price' in predicted_price_vector and 'current_price' in predicted_price_vector:
                            cp = float(predicted_price_vector.get('current_price') or 0.0)
                            pp = float(predicted_price_vector.get('predicted_price') or 0.0)
                            if cp > 0 and pp > 0:
                                expected_move_pct = ((pp - cp) / cp) * 100.0
                except Exception:
                    expected_move_pct = 0.0

                # Normalize expected move impact into [0,1]
                expected_move_norm = max(0.0, min(1.0, abs(expected_move_pct) / 2.0))  # 2% move caps to 1.0

                # Conviction-tolerant drawdown penalty (cut losers early unless strong conviction for recovery)
                if current_position_pnl < 0:
                    pnl_loss = abs(current_position_pnl)
                    # Scale negative PnL into [0,1] using a soft scale (1% -> 1.0 cap)
                    loss_norm = max(0.0, min(1.0, pnl_loss / 1.0))
                    tolerance = (1.0 - min(0.9, conviction * expected_move_norm))  # high conviction reduces penalty
                    penalty = loss_norm * tolerance
                    base_reward -= 1.0 * penalty
                    logger.debug(
                        f"CONVICTION DRAWdown: pnl={current_position_pnl:.3f}, conv={conviction:.2f}, exp={expected_move_norm:.2f}, penalty={penalty:.3f}"
                    )
                else:
                    # Let winners run when conviction supports it
                    gain = max(0.0, current_position_pnl)
                    gain_norm = max(0.0, min(1.0, gain / 1.0))
                    run_bonus = 0.2 * gain_norm * (0.5 + 0.5 * conviction)
                    # Small nudge to keep holding if directionally correct
                    if predicted_action == "HOLD" and price_change_pct > 0:
                        base_reward += run_bonus
                        logger.debug(f"RUN BONUS: gain={gain:.3f}, conv={conviction:.2f}, bonus={run_bonus:.3f}")
            
            # PRICE VECTOR BONUS (if available)
            if predicted_price_vector and isinstance(predicted_price_vector, dict):
                vector_bonus = self._calculate_price_vector_bonus(
                    predicted_price_vector, price_change_pct, abs(price_change_pct), prediction_confidence
                )
                if vector_bonus > 0:
                    base_reward += vector_bonus
                    logger.debug(f"PRICE VECTOR BONUS: +{vector_bonus:.3f}")

            # Time decay factor (pivot detection should be fast)
            time_decay = max(0.3, 1.0 - (time_diff_minutes / 30.0))  # Decay over 30 minutes, min 30%
            
            # Apply time decay
            final_reward = base_reward * time_decay
            
            # Clamp reward to reasonable range (higher range for pivot bonuses)
            final_reward = max(-10.0, min(100.0, final_reward))
            
            # Log detailed accuracy information
            logger.debug(
                f"REWARD CALCULATION: action={predicted_action}, confidence={prediction_confidence:.3f}, "
                f"price_change={price_change_pct:.3f}%, pivot={is_pivot}/{pivot_type}, "
                f"directional_correct={directional_correct}, profitability_correct={profitability_correct}, "
                f"reward={final_reward:.3f}"
            )
            
            return final_reward, was_correct

        except Exception as e:
            logger.error(f"Error calculating sophisticated reward: {e}")
            # Fallback to simple directional accuracy
            simple_correct = (
                (predicted_action == "BUY" and price_change_pct > 0) or
                (predicted_action == "SELL" and price_change_pct < 0) or
                (predicted_action == "HOLD" and abs(price_change_pct) < 0.05)
            )
            return (1.0 if simple_correct else -0.1, simple_correct)

    def _calculate_price_vector_bonus(
        self, 
        predicted_vector: dict, 
        actual_price_change_pct: float, 
        abs_movement: float,
        prediction_confidence: float
    ) -> float:
        """
        Calculate bonus reward for accurate price direction and magnitude predictions
        
        Args:
            predicted_vector: Dict with 'direction' (-1 to 1) and 'confidence' (0 to 1)
            actual_price_change_pct: Actual price change percentage
            abs_movement: Absolute value of price movement
            prediction_confidence: Overall model confidence
            
        Returns:
            Bonus reward value (0 or positive)
        """
        try:
            predicted_direction = predicted_vector.get('direction', 0.0)
            vector_confidence = predicted_vector.get('confidence', 0.0)
            
            # Skip if vector prediction is too weak
            if abs(predicted_direction) < 0.1 or vector_confidence < 0.3:
                return 0.0
                
            # Calculate direction accuracy
            actual_direction = 1.0 if actual_price_change_pct > 0 else -1.0 if actual_price_change_pct < 0 else 0.0
            direction_accuracy = 0.0
            
            if actual_direction != 0.0:  # Only if there was actual movement
                # Check if predicted direction matches actual direction
                if (predicted_direction > 0 and actual_direction > 0) or (predicted_direction < 0 and actual_direction < 0):
                    direction_accuracy = min(abs(predicted_direction), 1.0)  # Stronger prediction = higher bonus
                    
                    # MAGNITUDE ACCURACY BONUS
                    # Convert predicted direction to expected magnitude (scaled by confidence)
                    predicted_magnitude = abs(predicted_direction) * vector_confidence * 2.0  # Scale to ~2% max
                    magnitude_error = abs(predicted_magnitude - abs_movement)
                    
                    # Bonus for accurate magnitude prediction (lower error = higher bonus)
                    if magnitude_error < 1.0:  # Within 1% error
                        magnitude_accuracy = max(0, 1.0 - magnitude_error)  # 0 to 1.0
                        
                        # COMBINED BONUS CALCULATION
                        base_vector_bonus = direction_accuracy * magnitude_accuracy * vector_confidence
                        
                        # Scale bonus based on movement size (bigger movements get bigger bonuses)
                        if abs_movement > 2.0:  # Massive movements
                            scale_factor = 3.0
                        elif abs_movement > 1.0:  # Rapid movements
                            scale_factor = 2.0
                        elif abs_movement > 0.5:  # Strong movements
                            scale_factor = 1.5
                        else:
                            scale_factor = 1.0
                        
                        final_bonus = base_vector_bonus * scale_factor * prediction_confidence
                        
                        logger.debug(f"VECTOR ANALYSIS: pred_dir={predicted_direction:.3f}, actual_dir={actual_direction:.3f}, "
                                   f"pred_mag={predicted_magnitude:.3f}, actual_mag={abs_movement:.3f}, "
                                   f"dir_acc={direction_accuracy:.3f}, mag_acc={magnitude_accuracy:.3f}, bonus={final_bonus:.3f}")
                        
                        return min(final_bonus, 2.0)  # Cap bonus at 2.0
                        
            return 0.0
            
        except Exception as e:
            logger.error(f"Error calculating price vector bonus: {e}")
            return 0.0

    def _compute_recent_direction_vector(self, model_name: str, symbol: str) -> Optional[Dict[str, float]]:
        """
        Compute a price direction vector from recent stored inferences by comparing
        current price with prices at the times of those inferences.

        Returns a dict: {'direction': float in [-1,1], 'confidence': float in [0,1]}
        """
        try:
            from statistics import median
            recent = self.recent_inferences.get(model_name)
            if not recent or len(recent) < 2:
                return None

            # Gather tuples (delta_pct, age_seconds) for last N inferences with stored price
            deltas = []
            now_price = self._get_current_price(symbol)
            if now_price is None or now_price <= 0:
                return None

            for rec in list(recent):
                infer_price = rec.get("inference_price")
                ts = rec.get("timestamp")
                if isinstance(ts, str):
                    try:
                        ts = datetime.fromisoformat(ts)
                    except Exception:
                        ts = None
                if infer_price is None or infer_price <= 0 or ts is None:
                    continue

                pct = (now_price - infer_price) / infer_price * 100.0
                age_sec = max(1.0, (datetime.now() - ts).total_seconds())
                deltas.append((pct, age_sec))

            if not deltas:
                return None

            # Weight recent observations more: weight = 1 / sqrt(age_seconds)
            weighted_sum = 0.0
            weight_total = 0.0
            magnitudes = []
            for pct, age in deltas:
                w = 1.0 / (age ** 0.5)
                weighted_sum += pct * w
                weight_total += w
                magnitudes.append(abs(pct))

            if weight_total <= 0:
                return None

            avg_pct = weighted_sum / weight_total  # signed percentage

            # Map avg_pct to direction in [-1, 1] using tanh on scaled percent (2% -> ~1)
            scale = 2.0
            direction = float(np.tanh(avg_pct / scale))

            # Confidence combines recency, agreement, and magnitude
            # Use normalized median magnitude capped at 2%
            med_mag = median(magnitudes) if magnitudes else 0.0
            mag_norm = max(0.0, min(1.0, med_mag / 2.0))

            # Agreement: fraction of deltas with the same sign as avg_pct
            if avg_pct > 0:
                agree = sum(1 for pct, _ in deltas if pct > 0) / len(deltas)
            elif avg_pct < 0:
                agree = sum(1 for pct, _ in deltas if pct < 0) / len(deltas)
            else:
                agree = 0.5

            # Recency: average weight normalized
            recency = max(0.0, min(1.0, (weight_total / len(deltas)) * (1.0 / (1.0 ** 0.5))))

            confidence = float(max(0.0, min(1.0, 0.5 * agree + 0.4 * mag_norm + 0.1 * recency)))

            return {"direction": direction, "confidence": confidence}

        except Exception as e:
            logger.debug(f"Error computing recent direction vector for {model_name}: {e}")
            return None

    async def _train_model_on_outcome(
        self,
        record: Dict,
        was_correct: bool,
        price_change_pct: float,
        sophisticated_reward: float = None,
    ):
        """Train models on outcome - now includes decision fusion"""
        try:
            model_name = record.get("model_name")
            if not model_name:
                logger.warning("No model name in training record")
                return

            # Calculate reward if not provided
            if sophisticated_reward is None:
                symbol = record.get("symbol", self.symbol)
                current_pnl = self._get_current_position_pnl(symbol)
                
                # Extract price vector from record if available
                predicted_price_vector = record.get("price_direction") or record.get("predicted_price_vector")
                
                sophisticated_reward, _ = self._calculate_sophisticated_reward(
                    record.get("action", "HOLD"),
                    record.get("confidence", 0.5),
                    price_change_pct,
                    record.get("time_diff_minutes", 1.0),
                    record.get("has_price_prediction", False),
                    symbol=symbol,
                    current_position_pnl=current_pnl,
                    predicted_price_vector=predicted_price_vector,
                )

            # Train decision fusion model if it's the model being evaluated
            if model_name == "decision_fusion":
                await self._train_decision_fusion_on_outcome(
                    record, was_correct, price_change_pct, sophisticated_reward
                )
                return

            # Original training logic for other models
            """Universal training for any model based on prediction outcome with sophisticated reward system"""
            try:
                model_name = record["model_name"]
                model_input = record["model_input"]
                prediction = record["prediction"]

                # Use sophisticated reward if provided, otherwise fallback to simple reward
                reward = (
                    sophisticated_reward
                    if sophisticated_reward is not None
                    else (1.0 if was_correct else -0.5)
                )

                # Get the actual model from registry
                model_interface = None
                if hasattr(self, "model_registry") and self.model_registry:
                    model_interface = self.model_registry.models.get(model_name)
                    logger.debug(
                        f"Found model interface {model_name} in registry: {type(model_interface).__name__}"
                    )
                else:
                    logger.debug(f"No model registry available for {model_name}")

                if not model_interface:
                    logger.warning(
                        f"Model {model_name} not found in registry, skipping training"
                    )
                    return

                # Get the underlying model from the interface
                underlying_model = getattr(model_interface, "model", None)
                if not underlying_model:
                    logger.warning(
                        f"No underlying model found for {model_name}, skipping training"
                    )
                    return

                logger.debug(
                    f"Training {model_name} with reward={reward:.3f} (was_correct={was_correct})"
                )
                logger.debug(f"Model interface type: {type(model_interface).__name__}")
                logger.debug(f"Underlying model type: {type(underlying_model).__name__}")

                # Debug: Log available training methods on both interface and underlying model
                interface_methods = []
                underlying_methods = []

                for method in [
                    "train_on_outcome",
                    "add_experience",
                    "remember",
                    "replay",
                    "add_training_sample",
                    "train",
                    "train_with_reward",
                    "update_loss",
                ]:
                    if hasattr(model_interface, method):
                        interface_methods.append(method)
                    if hasattr(underlying_model, method):
                        underlying_methods.append(method)

                logger.debug(f"Available methods on interface: {interface_methods}")
                logger.debug(f"Available methods on underlying model: {underlying_methods}")

                training_success = False

                # Try training based on model type and available methods
                if isinstance(model_interface, RLAgentInterface):
                    # RL Agent Training
                    training_success = await self._train_rl_model(
                        underlying_model, model_name, model_input, prediction, reward
                    )

                elif isinstance(model_interface, CNNModelInterface):
                    # CNN Model Training
                    training_success = await self._train_cnn_model(
                        underlying_model, model_name, record, prediction, reward
                    )

                elif "extrema" in model_name.lower():
                    # Extrema Trainer - doesn't need traditional training
                    logger.debug(
                        f"Extrema trainer {model_name} doesn't require outcome-based training"
                    )
                    training_success = True

                elif "cob_rl" in model_name.lower():
                    # COB RL Model Training
                    training_success = await self._train_cob_rl_model(
                        underlying_model, model_name, model_input, prediction, reward
                    )

                else:
                    # Generic model training
                    training_success = await self._train_generic_model(
                        underlying_model, model_name, model_input, prediction, reward
                    )

                if training_success:
                    logger.debug(f"Successfully trained {model_name} on outcome")
                else:
                    logger.warning(f"Failed to train {model_name} on outcome")

            except Exception as e:
                logger.error(f"Error in universal training for {model_name}: {e}")
                # Fallback to basic training if available
                try:
                    await self._train_model_fallback(
                        model_name, underlying_model, model_input, prediction, reward
                    )
                except Exception as fallback_error:
                    logger.error(f"Fallback training also failed for {model_name}: {fallback_error}")

        except Exception as e:
            logger.error(f"Error training model {model_name} on outcome: {e}")

    async def _train_rl_model(
        self, model, model_name: str, model_input, prediction: Dict, reward: float
    ) -> bool:
        """Train RL model (DQN) with experience replay"""
        try:
            # Convert prediction action to action index
            action_names = ["SELL", "HOLD", "BUY"]
            if prediction["action"] not in action_names:
                logger.warning(f"Invalid action {prediction['action']} for RL training")
                return False

            action_idx = action_names.index(prediction["action"])

            # Properly convert model_input to numpy array state
            state = self._convert_to_rl_state(model_input, model_name)
            if state is None:
                logger.warning(
                    f"Failed to convert model_input to RL state for {model_name}"
                )
                return False

            # Validate state format
            if not isinstance(state, np.ndarray):
                logger.warning(
                    f"State is not numpy array for {model_name}: {type(state)}"
                )
                return False

            if state.dtype == object:
                logger.warning(
                    f"State contains object dtype for {model_name}, attempting conversion"
                )
                try:
                    state = state.astype(np.float32)
                except (ValueError, TypeError) as e:
                    logger.error(
                        f"Cannot convert object state to float32 for {model_name}: {e}"
                    )
                    return False

            # Ensure state is 1D and finite
            if state.ndim > 1:
                state = state.flatten()

            # Replace any non-finite values
            state = np.nan_to_num(state, nan=0.0, posinf=1.0, neginf=-1.0)

            logger.debug(
                f"Converted state for {model_name}: shape={state.shape}, dtype={state.dtype}"
            )

            # Add experience to memory
            if hasattr(model, "remember"):
                model.remember(
                    state=state,
                    action=action_idx,
                    reward=reward,
                    next_state=state,  # Simplified - using same state
                    done=True,
                )
                logger.debug(
                    f"Added experience to {model_name}: action={prediction['action']}, reward={reward:.3f}"
                )

                # Trigger training if enough experiences
                memory_size = len(getattr(model, "memory", []))
                batch_size = getattr(model, "batch_size", 32)
                if memory_size >= batch_size:
                    logger.debug(
                        f"Training {model_name} with {memory_size} experiences"
                    )

                    # Ensure model is in training mode
                    if hasattr(model, "policy_net"):
                        model.policy_net.train()

                    training_start_time = time.time()
                    training_loss = model.replay()
                    training_duration_ms = (time.time() - training_start_time) * 1000

                    if training_loss is not None and training_loss > 0:
                        self.update_model_loss(model_name, training_loss)
                        self._update_model_training_statistics(
                            model_name, training_loss, training_duration_ms
                        )
                        logger.debug(
                            f"RL training completed for {model_name}: loss={training_loss:.4f}, time={training_duration_ms:.1f}ms"
                        )
                        return True
                    elif training_loss == 0.0:
                        logger.warning(
                            f"RL training returned zero loss for {model_name} - possible gradient issue"
                        )
                        # Still update training statistics
                        self._update_model_training_statistics(
                            model_name, training_duration_ms=training_duration_ms
                        )
                        return False  # Training failed
                    else:
                        # Still update training statistics even if no loss returned
                        self._update_model_training_statistics(
                            model_name, training_duration_ms=training_duration_ms
                        )
                else:
                    logger.debug(
                        f"Not enough experiences for {model_name}: {memory_size}/{batch_size}"
                    )
                    return True  # Experience added successfully, training will happen later

            return False

        except Exception as e:
            logger.error(f"Error training RL model {model_name}: {e}")
            return False

    def _convert_to_rl_state(
        self, model_input, model_name: str
    ) -> Optional[np.ndarray]:
        """Convert various model input formats to RL state numpy array"""
        try:
            # Method 1: BaseDataInput with get_feature_vector
            if hasattr(model_input, "get_feature_vector"):
                state = model_input.get_feature_vector()
                if isinstance(state, np.ndarray):
                    return state
                logger.debug(f"get_feature_vector returned non-array: {type(state)}")

            # Method 2: Already a numpy array
            if isinstance(model_input, np.ndarray):
                return model_input

            # Method 3: Dictionary with feature data
            if isinstance(model_input, dict):
                # Check if dictionary is empty - this is the main issue!
                if not model_input:
                    logger.warning(
                        f"Empty dictionary passed as model_input for {model_name}, using build_base_data_input fallback"
                    )
                    # Use the same data source as the new training system
                    try:
                        # Try to get symbol from the record context or use default
                        symbol = "ETH/USDT"  # Default symbol
                        base_data = self.build_base_data_input(symbol)
                        if base_data and hasattr(base_data, "get_feature_vector"):
                            state = base_data.get_feature_vector()
                            if isinstance(state, np.ndarray) and state.size > 0:
                                logger.info(
                                    f"Generated fresh state for {model_name} from build_base_data_input: shape={state.shape}"
                                )
                                return state
                    except Exception as e:
                        logger.debug(f"build_base_data_input fallback failed for {model_name}: {e}")
                    
                    # Fallback to data provider method
                    return self._generate_fresh_state_fallback(model_name)

                # Try to extract features from dictionary
                if "features" in model_input:
                    features = model_input["features"]
                    if isinstance(features, np.ndarray):
                        return features

                # Try to build features from dictionary values
                feature_list = []
                for key, value in model_input.items():
                    if isinstance(value, (int, float)):
                        feature_list.append(value)
                    elif isinstance(value, np.ndarray):
                        feature_list.extend(value.flatten())
                    elif isinstance(value, (list, tuple)):
                        for item in value:
                            if isinstance(item, (int, float)):
                                feature_list.append(item)

                if feature_list:
                    return np.array(feature_list, dtype=np.float32)
                else:
                    logger.warning(
                        f"No numerical features found in dictionary for {model_name}, using data provider fallback"
                    )
                    return self._generate_fresh_state_fallback(model_name)

            # Method 4: List or tuple
            if isinstance(model_input, (list, tuple)):
                try:
                    return np.array(model_input, dtype=np.float32)
                except (ValueError, TypeError):
                    logger.warning(
                        f"Cannot convert list/tuple to numpy array for {model_name}"
                    )

            # Method 5: Single numeric value
            if isinstance(model_input, (int, float)):
                return np.array([model_input], dtype=np.float32)

            # Method 6: Final fallback - generate fresh state
            logger.warning(
                f"Cannot convert model_input to RL state for {model_name}: {type(model_input)}, using fresh state fallback"
            )
            return self._generate_fresh_state_fallback(model_name)

        except Exception as e:
            logger.error(
                f"Error converting model_input to RL state for {model_name}: {e}"
            )
            return self._generate_fresh_state_fallback(model_name)

    def _generate_fresh_state_fallback(self, model_name: str) -> np.ndarray:
        """Generate a fresh state from current market data when model_input is empty/invalid"""
        try:
            # Try to use build_base_data_input first (same as new training system)
            try:
                symbol = "ETH/USDT"  # Default symbol
                base_data = self.build_base_data_input(symbol)
                if base_data and hasattr(base_data, "get_feature_vector"):
                    state = base_data.get_feature_vector()
                    if isinstance(state, np.ndarray) and state.size > 0:
                        logger.info(
                            f"Generated fresh state for {model_name} from build_base_data_input: shape={state.shape}"
                        )
                        return state
            except Exception as e:
                logger.debug(
                    f"build_base_data_input fresh state generation failed for {model_name}: {e}"
                )

            # Fallback to data provider method
            if hasattr(self, "data_provider") and self.data_provider:
                try:
                    # Build fresh BaseDataInput with current market data
                    base_data = self.data_provider.build_base_data_input("ETH/USDT")
                    if base_data and hasattr(base_data, "get_feature_vector"):
                        state = base_data.get_feature_vector()
                        if isinstance(state, np.ndarray) and state.size > 0:
                            logger.info(
                                f"Generated fresh state for {model_name} from data provider: shape={state.shape}"
                            )
                            return state
                except Exception as e:
                    logger.debug(
                        f"Data provider fresh state generation failed for {model_name}: {e}"
                    )

            # Try to get state from model registry
            if hasattr(self, "model_registry") and self.model_registry:
                try:
                    model_interface = self.model_registry.models.get(model_name)
                    if model_interface and hasattr(
                        model_interface, "get_current_state"
                    ):
                        state = model_interface.get_current_state()
                        if isinstance(state, np.ndarray) and state.size > 0:
                            logger.info(
                                f"Generated fresh state for {model_name} from model interface: shape={state.shape}"
                            )
                            return state
                except Exception as e:
                    logger.debug(
                        f"Model interface fresh state generation failed for {model_name}: {e}"
                    )

            # Final fallback: create a reasonable default state with proper dimensions
            # Use the expected state size for DQN models (403 features)
            default_state_size = 403
            if "cnn" in model_name.lower():
                default_state_size = 500  # Larger for CNN models
            elif "cob" in model_name.lower():
                default_state_size = 2000  # Much larger for COB models

            logger.warning(
                f"Using default zero state for {model_name} with size {default_state_size}"
            )
            return np.zeros(default_state_size, dtype=np.float32)

        except Exception as e:
            logger.error(f"Error generating fresh state fallback for {model_name}: {e}")
            # Ultimate fallback
            return np.zeros(403, dtype=np.float32)

    async def _train_cnn_model(
        self, model, model_name: str, record: Dict, prediction: Dict, reward: float
    ) -> bool:
        """Train CNN model directly (no adapter)"""
        try:
            # Direct CNN model training (no adapter)
            if (
                hasattr(self, "cnn_model")
                and self.cnn_model
                and "cnn" in model_name.lower()
            ):
                symbol = record.get("symbol", "ETH/USDT")
                actual_action = prediction["action"]

                # Create training sample from record
                model_input = record.get("model_input")
                
                # If model_input is None, try to generate fresh state for training
                if model_input is None:
                    logger.debug(f"No stored model input for {model_name}, generating fresh state")
                    try:
                        # Generate fresh input state for training
                        if hasattr(self, 'data_provider') and self.data_provider:
                            # Use data provider to generate current market state
                            fresh_state = self._generate_fresh_state_fallback(model_name)
                            if fresh_state is not None and len(fresh_state) > 0:
                                model_input = fresh_state
                                logger.debug(f"Generated fresh training state for {model_name}: shape={fresh_state.shape if hasattr(fresh_state, 'shape') else len(fresh_state)}")
                            else:
                                logger.warning(f"Failed to generate fresh state for {model_name}")
                        else:
                            logger.warning(f"No data provider available for generating fresh state for {model_name}")
                    except Exception as e:
                        logger.warning(f"Error generating fresh state for {model_name}: {e}")
                
                if model_input is not None:
                    # Convert to tensor and ensure device placement
                    device = next(self.cnn_model.parameters()).device

                    if hasattr(model_input, "get_feature_vector"):
                        features = model_input.get_feature_vector()
                    elif isinstance(model_input, np.ndarray):
                        features = model_input
                    else:
                        features = np.array(model_input, dtype=np.float32)

                    features_tensor = torch.tensor(
                        features, dtype=torch.float32, device=device
                    )
                    if features_tensor.dim() == 1:
                        features_tensor = features_tensor.unsqueeze(0)

                    # Convert action to index
                    actions = ["BUY", "SELL", "HOLD"]
                    action_idx = (
                        actions.index(actual_action) if actual_action in actions else 2
                    )
                    action_tensor = torch.tensor(
                        [action_idx], dtype=torch.long, device=device
                    )
                    reward_tensor = torch.tensor(
                        [reward], dtype=torch.float32, device=device
                    )

                    # Perform training step
                    self.cnn_model.train()
                    self.cnn_optimizer.zero_grad()

                    # Forward pass
                    (
                        q_values,
                        extrema_pred,
                        price_direction_pred,
                        features_refined,
                        advanced_pred,
                    ) = self.cnn_model(features_tensor)

                    # Calculate primary Q-value loss
                    q_values_selected = q_values.gather(
                        1, action_tensor.unsqueeze(1)
                    ).squeeze(1)
                    target_q = reward_tensor  # Simplified target
                    q_loss = nn.MSELoss()(q_values_selected, target_q)

                    # Calculate auxiliary losses for price direction and extrema
                    total_loss = q_loss

                    # Price direction loss
                    if (
                        price_direction_pred is not None
                        and price_direction_pred.shape[0] > 0
                    ):
                        # Supervised vector target from recent inferences if available
                        vector_target = None
                        try:
                            vector_target = self._compute_recent_direction_vector(model_name, symbol)
                        except Exception:
                            vector_target = None

                        price_direction_loss = self._calculate_cnn_price_direction_loss(
                            price_direction_pred, reward_tensor, action_tensor, vector_target
                        )
                        if price_direction_loss is not None:
                            total_loss = total_loss + 0.2 * price_direction_loss

                    # Extrema loss
                    if extrema_pred is not None and extrema_pred.shape[0] > 0:
                        extrema_loss = self._calculate_cnn_extrema_loss(
                            extrema_pred, reward_tensor, action_tensor
                        )
                        if extrema_loss is not None:
                            total_loss = total_loss + 0.1 * extrema_loss

                    loss = total_loss

                    # Backward pass
                    training_start_time = time.time()
                    loss.backward()

                    # Gradient clipping
                    torch.nn.utils.clip_grad_norm_(
                        self.cnn_model.parameters(), max_norm=1.0
                    )

                    # Optimizer step
                    self.cnn_optimizer.step()
                    training_duration_ms = (time.time() - training_start_time) * 1000

                    # Update statistics
                    current_loss = loss.item()
                    self.update_model_loss(model_name, current_loss)
                    self._update_model_training_statistics(
                        model_name, current_loss, training_duration_ms
                    )

                    logger.debug(
                        f"CNN direct training completed: loss={current_loss:.4f}, time={training_duration_ms:.1f}ms"
                    )
                    return True
                else:
                    logger.warning(f"No model input available for CNN training for {model_name}. This prevents the model from learning.")
                    
                    # Try one more time to generate training data from current market conditions
                    try:
                        if hasattr(self, 'data_provider') and self.data_provider:
                            # Create minimal training sample from current market data
                            symbol = record.get("symbol", "ETH/USDT")
                            current_price = self._get_current_price(symbol)
                            
                            # Get variables from function scope
                            actual_action = prediction["action"]
                            pred_confidence = prediction.get("confidence", 0.5)
                            
                            # Create a basic feature vector (this is a fallback)
                            basic_features = np.array([
                                current_price / 10000.0,  # Normalized price
                                pred_confidence,           # Model confidence
                                reward,                    # Current reward
                                1.0 if actual_action == "BUY" else 0.0,
                                1.0 if actual_action == "SELL" else 0.0,
                                1.0 if actual_action == "HOLD" else 0.0
                            ], dtype=np.float32)
                            
                            # Pad to expected size if needed
                            expected_size = 512  # Adjust based on your model's expected input size
                            if len(basic_features) < expected_size:
                                padding = np.zeros(expected_size - len(basic_features), dtype=np.float32)
                                basic_features = np.concatenate([basic_features, padding])
                            
                            logger.info(f"Created fallback training features for {model_name}: shape={basic_features.shape}")
                            
                            # Now perform training with the fallback features
                            device = next(self.cnn_model.parameters()).device
                            features_tensor = torch.tensor(basic_features, dtype=torch.float32, device=device).unsqueeze(0)
                            
                            # Convert action to index
                            actions = ["BUY", "SELL", "HOLD"]
                            action_idx = actions.index(actual_action) if actual_action in actions else 2
                            action_tensor = torch.tensor([action_idx], dtype=torch.long, device=device)
                            reward_tensor = torch.tensor([reward], dtype=torch.float32, device=device)
                            
                            # Perform minimal training step
                            self.cnn_model.train()
                            self.cnn_optimizer.zero_grad()
                            
                            # Forward pass
                            q_values, _, _, _, _ = self.cnn_model(features_tensor)
                            
                            # Calculate basic loss
                            q_values_selected = q_values.gather(1, action_tensor.unsqueeze(1)).squeeze(1)
                            loss = nn.MSELoss()(q_values_selected, reward_tensor)
                            
                            # Backward pass
                            loss.backward()
                            torch.nn.utils.clip_grad_norm_(self.cnn_model.parameters(), max_norm=1.0)
                            self.cnn_optimizer.step()
                            
                            logger.info(f"Fallback CNN training completed for {model_name}: loss={loss.item():.4f}")
                            return True
                            
                    except Exception as fallback_error:
                        logger.error(f"Fallback CNN training failed for {model_name}: {fallback_error}")
                    
                    # If we reach here, even fallback training failed
                    logger.error(f"All CNN training methods failed for {model_name}. Model will not learn from this prediction.")
                    return False

            # Try model interface training methods
            elif hasattr(model, "add_training_sample"):
                symbol = record.get("symbol", "ETH/USDT")
                actual_action = prediction["action"]
                model.add_training_sample(symbol, actual_action, reward)
                logger.debug(
                    f"Added training sample to {model_name}: action={actual_action}, reward={reward:.3f}"
                )

                # If model has train method, trigger training
                if hasattr(model, "train") and callable(getattr(model, "train")):
                    try:
                        training_start_time = time.time()
                        training_results = model.train(epochs=1)
                        training_duration_ms = (
                            time.time() - training_start_time
                        ) * 1000

                        if training_results and "loss" in training_results:
                            current_loss = training_results["loss"]
                            self.update_model_loss(model_name, current_loss)
                            self._update_model_training_statistics(
                                model_name, current_loss, training_duration_ms
                            )
                            logger.debug(
                                f"Model {model_name} training completed: loss={current_loss:.4f}"
                            )
                        else:
                            self._update_model_training_statistics(
                                model_name, training_duration_ms=training_duration_ms
                            )
                    except Exception as e:
                        logger.error(f"Error training {model_name}: {e}")

                return True

            # Basic acknowledgment for other training methods
            elif hasattr(model, "train"):
                logger.debug(f"Using basic train method for {model_name}")
                logger.debug(
                    f"CNN model {model_name} training acknowledged (basic train method available)"
                )
                return True

            return False

        except Exception as e:
            logger.error(f"Error training CNN model {model_name}: {e}")
            return False

    async def _train_cob_rl_model(
        self, model, model_name: str, model_input, prediction: Dict, reward: float
    ) -> bool:
        """Train COB RL model"""
        try:
            # COB RL models might have specific training methods
            if hasattr(model, "remember"):
                action_names = ["SELL", "HOLD", "BUY"]
                action_idx = action_names.index(prediction["action"])

                # Convert model_input to proper format
                state = self._convert_to_rl_state(model_input, model_name)
                if state is None:
                    logger.warning(
                        f"Failed to convert model_input for COB RL training: {type(model_input)}"
                    )
                    return False

                model.remember(
                    state=state,
                    action=action_idx,
                    reward=reward,
                    next_state=state,
                    done=True,
                )
                logger.debug(
                    f"Added experience to COB RL model: action={prediction['action']}, reward={reward:.3f}"
                )

                # Trigger training if enough experiences
                if hasattr(model, "train") and hasattr(model, "memory"):
                    memory_size = (
                        len(model.memory) if hasattr(model.memory, "__len__") else 0
                    )
                    if memory_size >= getattr(model, "batch_size", 32):
                        training_loss = model.train()
                        if training_loss is not None:
                            self.update_model_loss(model_name, training_loss)
                            logger.debug(
                                f"COB RL training completed: loss={training_loss:.4f}"
                            )
                            return True
                return True  # Experience added successfully

            # Try alternative training methods for COB RL
            elif hasattr(model, "update_model") or hasattr(model, "train"):
                logger.debug(
                    f"Using alternative training method for COB RL model {model_name}"
                )
                # For now, just acknowledge that training was attempted
                logger.debug(f"COB RL model {model_name} training acknowledged")
                return True

            # If no training methods available, still return success to avoid warnings
            logger.debug(
                f"COB RL model {model_name} doesn't require traditional training"
            )
            return True

        except Exception as e:
            logger.error(f"Error training COB RL model {model_name}: {e}")
            return False

    async def _train_generic_model(
        self, model, model_name: str, model_input, prediction: Dict, reward: float
    ) -> bool:
        """Train generic model with available methods"""
        try:
            # Try various generic training methods
            if hasattr(model, "train_with_reward"):
                loss = model.train_with_reward(model_input, reward)
                if loss is not None:
                    self.update_model_loss(model_name, loss)
                    logger.debug(
                        f"Generic training completed for {model_name}: loss={loss:.4f}"
                    )
                    return True

            elif hasattr(model, "update_loss"):
                model.update_loss(reward)
                logger.debug(f"Updated loss for {model_name}: reward={reward:.3f}")
                return True

            elif hasattr(model, "train_on_outcome"):
                target = 1 if reward > 0 else 0
                loss = model.train_on_outcome(model_input, target)
                if loss is not None:
                    self.update_model_loss(model_name, loss)
                    logger.debug(
                        f"Outcome training completed for {model_name}: loss={loss:.4f}"
                    )
                    return True

            return False

        except Exception as e:
            logger.error(f"Error training generic model {model_name}: {e}")
            return False

    async def _train_model_fallback(
        self, model_name: str, model, model_input, prediction: Dict, reward: float
    ) -> bool:
        """Fallback training methods for models that don't fit standard patterns"""
        try:
            # Try to access direct model instances for legacy support
            if (
                "dqn" in model_name.lower()
                and hasattr(self, "rl_agent")
                and self.rl_agent
            ):
                return await self._train_rl_model(
                    self.rl_agent, model_name, model_input, prediction, reward
                )

            elif (
                "cnn" in model_name.lower()
                and hasattr(self, "cnn_model")
                and self.cnn_model
            ):
                # Create a fake record for CNN training
                fake_record = {"symbol": "ETH/USDT", "model_input": model_input}
                return await self._train_cnn_model(
                    self.cnn_model, model_name, fake_record, prediction, reward
                )

            elif (
                "cob" in model_name.lower()
                and hasattr(self, "cob_rl_agent")
                and self.cob_rl_agent
            ):
                return await self._train_cob_rl_model(
                    self.cob_rl_agent, model_name, model_input, prediction, reward
                )

            return False

        except Exception as e:
            logger.error(f"Error in fallback training for {model_name}: {e}")
            return False

    def _calculate_rsi(self, prices: pd.Series, period: int = 14) -> float:
        """Calculate RSI indicator"""
        try:
            delta = prices.diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
            rs = gain / loss
            rsi = 100 - (100 / (1 + rs))
            return rsi.iloc[-1] if not rsi.empty else 50.0
        except:
            return 50.0

    async def _get_cnn_predictions(
        self, model: CNNModelInterface, symbol: str, base_data=None
    ) -> List[Prediction]:
        """Get predictions from CNN model using pre-built base data"""
        predictions = []
        try:
            # Use pre-built base data if provided, otherwise build it
            if base_data is None:
                base_data = self.data_provider.build_base_data_input(symbol)
                if not base_data:
                    logger.warning(
                        f"Cannot build BaseDataInput for CNN prediction: {symbol}"
                    )
                    return predictions

            # Direct CNN model inference (no adapter needed)
            if hasattr(self, "cnn_model") and self.cnn_model:
                try:
                    # Get feature vector from base_data
                    features = base_data.get_feature_vector()

                    # Convert to tensor and ensure proper device placement
                    device = next(self.cnn_model.parameters()).device
                    import torch as torch_module  # Explicit import to avoid scoping issues

                    features_tensor = torch_module.tensor(
                        features, dtype=torch_module.float32, device=device
                    )

                    # Ensure batch dimension
                    if features_tensor.dim() == 1:
                        features_tensor = features_tensor.unsqueeze(0)

                    # Set model to evaluation mode
                    self.cnn_model.eval()

                    # Get prediction from CNN model
                    with torch_module.no_grad():
                        (
                            q_values,
                            extrema_pred,
                            price_pred,
                            features_refined,
                            advanced_pred,
                        ) = self.cnn_model(features_tensor)

                        # Convert to probabilities using softmax
                        action_probs = torch_module.softmax(q_values, dim=1)
                        action_idx = torch_module.argmax(action_probs, dim=1).item()
                        confidence = float(action_probs[0, action_idx].item())

                        # Map action index to action string
                        actions = ["BUY", "SELL", "HOLD"]
                        action = actions[action_idx]

                        # Create probabilities dictionary
                        probabilities = {
                            "BUY": float(action_probs[0, 0].item()),
                            "SELL": float(action_probs[0, 1].item()),
                            "HOLD": float(action_probs[0, 2].item()),
                        }

                        # Extract price direction predictions if available
                        price_direction_data = None
                        if price_pred is not None:
                            # Process price direction predictions
                            if hasattr(
                                model.model, "process_price_direction_predictions"
                            ):
                                try:
                                    price_direction_data = (
                                        model.model.process_price_direction_predictions(
                                            price_pred
                                        )
                                    )
                                except Exception as e:
                                    logger.debug(
                                        f"Error processing CNN price direction: {e}"
                                    )

                            # Fallback to old format for compatibility
                            price_prediction = (
                                price_pred.squeeze(0).cpu().numpy().tolist()
                            )

                        prediction = Prediction(
                            action=action,
                            confidence=confidence,
                            probabilities=probabilities,
                            timeframe="multi",  # Multi-timeframe prediction
                            timestamp=datetime.now(),
                            model_name=model.name,  # Use the actual model name
                            metadata={
                                "feature_size": len(base_data.get_feature_vector()),
                                "data_sources": [
                                    "ohlcv_1s",
                                    "ohlcv_1m",
                                    "ohlcv_1h",
                                    "ohlcv_1d",
                                    "btc",
                                    "cob",
                                    "indicators",
                                ],
                                "price_prediction": price_prediction,
                                "price_direction": price_direction_data,
                                "extrema_prediction": (
                                    extrema_pred.squeeze(0).cpu().numpy().tolist()
                                    if extrema_pred is not None
                                    else None
                                ),
                            },
                        )
                        predictions.append(prediction)

                        logger.debug(
                            f"Added CNN prediction: {action} ({confidence:.3f})"
                        )

                except Exception as e:
                    logger.error(f"Error using direct CNN model: {e}")
                    import traceback

                    traceback.print_exc()

            # Remove this fallback - direct CNN inference should work above
            if not predictions:
                logger.debug(
                    f"No CNN predictions generated for {symbol} - this is expected if CNN model is not properly initialized"
                )

                try:
                    # Use the already available base_data (no need to rebuild)
                    if not base_data:
                        logger.warning(
                            f"No BaseDataInput available for CNN fallback: {symbol}"
                        )
                        return predictions

                    # Convert to unified feature vector (7850 features)
                    feature_vector = base_data.get_feature_vector()

                    # Use the model's act method with unified input
                    if hasattr(model.model, "act"):
                        # Convert to tensor format expected by enhanced_cnn
                        device = torch_module.device(
                            "cuda" if torch_module.cuda.is_available() else "cpu"
                        )
                        features_tensor = torch_module.tensor(
                            feature_vector, dtype=torch_module.float32, device=device
                        )

                        # Call the model's act method
                        action_idx, confidence, action_probs = model.model.act(
                            features_tensor, explore=False
                        )

                        # Build prediction with unified timeframe result
                        action_names = [
                            "BUY",
                            "SELL",
                            "HOLD",
                        ]  # Note: enhanced_cnn uses this order
                        best_action = action_names[action_idx]

                        # Get price direction vectors from CNN model if available
                        price_direction_data = None
                        if hasattr(model.model, "get_price_direction_vector"):
                            try:
                                price_direction_data = (
                                    model.model.get_price_direction_vector()
                                )
                            except Exception as e:
                                logger.debug(
                                    f"Error getting price direction from CNN: {e}"
                                )

                        pred = Prediction(
                            action=best_action,
                            confidence=float(confidence),
                            probabilities={
                                "BUY": float(action_probs[0]),
                                "SELL": float(action_probs[1]),
                                "HOLD": float(action_probs[2]),
                            },
                            timeframe="unified",  # Indicates this uses all timeframes
                            timestamp=datetime.now(),
                            model_name=model.name,
                            metadata={
                                "feature_vector_size": len(feature_vector),
                                "unified_input": True,
                                "fallback_method": "direct_model_inference",
                                "price_direction": price_direction_data,
                            },
                        )
                        predictions.append(pred)

                        # Note: Inference data will be stored in main prediction loop to avoid duplication

                        # Capture for dashboard
                        current_price = self._get_current_price(symbol)
                        if current_price is not None:
                            predicted_price = current_price * (
                                1
                                + (
                                    0.01
                                    * (
                                        confidence
                                        if best_action == "BUY"
                                        else -confidence if best_action == "SELL" else 0
                                    )
                                )
                            )
                            self.capture_cnn_prediction(
                                symbol,
                                direction=action_idx,
                                confidence=confidence,
                                current_price=current_price,
                                predicted_price=predicted_price,
                            )

                        logger.info(
                            f"CNN fallback successful for {symbol}: {best_action} (confidence: {confidence:.3f})"
                        )

                    else:
                        logger.debug(
                            f"CNN model {model.name} fallback not needed - direct inference succeeded"
                        )

                except Exception as e:
                    logger.error(f"CNN fallback inference failed for {symbol}: {e}")
                    # Don't continue with old timeframe-by-timeframe approach

            # Trigger immediate training if previous inference data exists for this model
            if predictions and model.name in self.last_inference:
                logger.debug(
                    f"Triggering immediate training for CNN model {model.name} with previous inference data"
                )
                await self._trigger_immediate_training_for_model(model.name, symbol)

        except Exception as e:
            logger.error(f"Orch: Error getting CNN predictions: {e}")
        return predictions

    async def _get_rl_prediction(
        self, model: RLAgentInterface, symbol: str, base_data=None
    ) -> Optional[Prediction]:
        """Get prediction from RL agent using pre-built base data"""
        try:
            # Use pre-built base data if provided, otherwise build it
            if base_data is None:
                base_data = self.data_provider.build_base_data_input(symbol)
                if not base_data:
                    logger.warning(
                        f"Cannot build BaseDataInput for RL prediction: {symbol}"
                    )
                    return None

            # Convert BaseDataInput to RL state format
            state_features = base_data.get_feature_vector()

            # Get current state for RL agent using the pre-built base data
            state = self._get_rl_state(symbol, base_data)
            if state is None:
                return None

            # Get RL agent's action, confidence, and q_values from the underlying model
            if hasattr(model.model, "act_with_confidence"):
                # Call act_with_confidence and handle different return formats
                result = model.model.act_with_confidence(state)

                if len(result) == 3:
                    # EnhancedCNN format: (action, confidence, q_values)
                    action_idx, confidence, raw_q_values = result
                elif len(result) == 2:
                    # DQN format: (action, confidence)
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                    action_idx, confidence = result
                else:
<<<<<<< HEAD
                    action_idx = result[0] if isinstance(result, (list, tuple)) else result
                    confidence = 0.6
            else:
                action_idx = model.model.act(cob_state)
                confidence = 0.6
            
            # Convert to action name
            action_names = ['BUY', 'SELL', 'HOLD']
            if 0 <= action_idx < len(action_names):
                action = action_names[action_idx]
=======
                    logger.error(
                        f"Unexpected return format from act_with_confidence: {len(result)} values"
                    )
                    return None
            elif hasattr(model.model, "act"):
                action_idx = model.model.act(state, explore=False)
                confidence = 0.7  # Default confidence for basic act method
                raw_q_values = None  # No raw q_values from simple act
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            else:
                return None
<<<<<<< HEAD
            
            # Store prediction in database for tracking
            if (hasattr(self, 'enhanced_training_system') and 
                self.enhanced_training_system and 
                hasattr(self.enhanced_training_system, 'store_model_prediction')):
                
                current_price = self._get_current_price_safe(symbol)
                if current_price > 0:
                    prediction_id = self.enhanced_training_system.store_model_prediction(
                        model_name=f"COB_RL_{model.model_name}" if hasattr(model, 'model_name') else "COB_RL",
                        symbol=symbol,
                        prediction_type=action,
                        confidence=confidence,
                        current_price=current_price
                    )
                    logger.debug(f"Stored COB RL prediction {prediction_id} for {symbol}")
            
            # Create prediction object
            prediction = Prediction(
                model_name=f"COB_RL_{model.model_name}" if hasattr(model, 'model_name') else "COB_RL",
                symbol=symbol,
                signal=action,
                confidence=confidence,
                reasoning=f"COB RL model prediction based on order book imbalance",
                features=cob_state.tolist() if isinstance(cob_state, np.ndarray) else [],
                metadata={
                    'action_idx': action_idx,
                    'cob_state_size': len(cob_state) if cob_state is not None else 0
                }
            )
            
            return prediction
        
        except Exception as e:
            logger.error(f"Error getting COB RL prediction for {symbol}: {e}")
            return None
    
    async def _get_generic_prediction(self, model, symbol: str) -> Optional[Prediction]:
        """Get prediction from generic model interface"""
        try:
            # Placeholder for generic model prediction
            logger.debug(f"Getting generic prediction from {model} for {symbol}")
            return None
        except Exception as e:
            logger.error(f"Error getting generic prediction for {symbol}: {e}")
            return None
        
    def _get_rl_state(self, symbol: str) -> Optional[np.ndarray]:
        """Build RL state vector for DQN agent"""
        try:
            # Use data provider to get comprehensive RL state
            if hasattr(self.data_provider, 'get_dqn_state_for_inference'):
                symbols_timeframes = [(symbol, '1m'), (symbol, '5m'), (symbol, '1h')]
                state = self.data_provider.get_dqn_state_for_inference(symbols_timeframes, target_size=100)
                if state is not None:
                    return state
            
            # Fallback: build basic state from market data
            market_features = []
            
            # Get latest price data
            latest_data = self.data_provider.get_latest_data(symbol)
            if latest_data and 'close' in latest_data:
                current_price = float(latest_data['close'])
                market_features.extend([
                    current_price,
                    latest_data.get('volume', 0.0),
                    latest_data.get('high', current_price) - latest_data.get('low', current_price),  # Range
                    latest_data.get('open', current_price)
                ])
            else:
                market_features.extend([4300.0, 100.0, 10.0, 4295.0])  # Default values
            
            # Pad to standard size
            while len(market_features) < 100:
                market_features.append(0.0)
            
            return np.array(market_features[:100], dtype=np.float32)
            
=======

            action_names = ["SELL", "HOLD", "BUY"]
            action = action_names[action_idx]

            # Convert raw_q_values to list if they are a tensor
            q_values_for_capture = None
            if raw_q_values is not None and hasattr(raw_q_values, "tolist"):
                q_values_for_capture = raw_q_values.tolist()
            elif raw_q_values is not None and isinstance(raw_q_values, list):
                q_values_for_capture = raw_q_values

            # Create prediction object with safe probability calculation
            probabilities = {}
            if q_values_for_capture and len(q_values_for_capture) == len(action_names):
                # Use actual q_values if they match the expected length
                probabilities = {
                    action_names[i]: float(q_values_for_capture[i])
                    for i in range(len(action_names))
                }
            else:
                # Use default uniform probabilities if q_values are unavailable or mismatched
                default_prob = 1.0 / len(action_names)
                probabilities = {name: default_prob for name in action_names}
                if q_values_for_capture:
                    logger.warning(
                        f"Q-values length mismatch: expected {len(action_names)}, got {len(q_values_for_capture)}. Using default probabilities."
                    )

            # Get price direction vectors from DQN model if available
            price_direction_data = None
            if hasattr(model.model, "get_price_direction_vector"):
                try:
                    price_direction_data = model.model.get_price_direction_vector()
                except Exception as e:
                    logger.debug(f"Error getting price direction from DQN: {e}")

            prediction = Prediction(
                action=action,
                confidence=float(confidence),
                probabilities=probabilities,
                timeframe="mixed",  # RL uses mixed timeframes
                timestamp=datetime.now(),
                model_name=model.name,
                metadata={
                    "state_size": len(state),
                    "price_direction": price_direction_data,
                },
            )

            # Capture DQN prediction for dashboard visualization
            current_price = self._get_current_price(symbol)
            if current_price:
                # Only pass q_values if they exist, otherwise pass empty list
                q_values_to_pass = (
                    q_values_for_capture if q_values_for_capture is not None else []
                )
                self.capture_dqn_prediction(
                    symbol,
                    action_idx,
                    float(confidence),
                    current_price,
                    q_values_to_pass,
                )

            # Trigger immediate training if previous inference data exists for this model
            if prediction and model.name in self.last_inference:
                logger.debug(
                    f"Triggering immediate training for RL model {model.name} with previous inference data"
                )
                await self._trigger_immediate_training_for_model(model.name, symbol)

            return prediction

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        except Exception as e:
            logger.debug(f"Error building RL state for {symbol}: {e}")
            return None
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
    def _get_cob_state(self, symbol: str) -> Optional[np.ndarray]:
        """Build COB state vector for COB RL agent"""
        try:
            # Get COB data from integration
            if hasattr(self, 'cob_integration') and self.cob_integration:
                cob_snapshot = self.cob_integration.get_cob_snapshot(symbol)
                if cob_snapshot:
                    # Extract features from COB snapshot
                    features = []
                    
                    # Add bid/ask imbalance
                    bid_volume = sum([level['volume'] for level in cob_snapshot.get('bids', [])])
                    ask_volume = sum([level['volume'] for level in cob_snapshot.get('asks', [])])
                    if bid_volume + ask_volume > 0:
                        imbalance = (bid_volume - ask_volume) / (bid_volume + ask_volume)
                    else:
                        imbalance = 0.0
                    features.append(imbalance)
                    
                    # Add spread
                    if cob_snapshot.get('bids') and cob_snapshot.get('asks'):
                        spread = cob_snapshot['asks'][0]['price'] - cob_snapshot['bids'][0]['price']
                        features.append(spread)
                    else:
                        features.append(0.0)
                    
                    # Pad to standard size
                    while len(features) < 50:
                        features.append(0.0)
                    
                    return np.array(features[:50], dtype=np.float32)
            
            # Fallback state
            return np.zeros(50, dtype=np.float32)
            
        except Exception as e:
            logger.debug(f"Error building COB state for {symbol}: {e}")
            return None
    
    
    async def _get_generic_prediction(self, model: ModelInterface, symbol: str) -> Optional[Prediction]:
        """Get prediction from generic model"""
        try:
            # Get feature matrix for the model
            feature_matrix = self.data_provider.get_feature_matrix(
                symbol=symbol,
                timeframes=self.config.timeframes[:3],  # Use first 3 timeframes
                window_size=20
            )
            
            if feature_matrix is not None:
                # Ensure feature_matrix is properly shaped and limited
                if isinstance(feature_matrix, np.ndarray):
                    # Flatten and limit features to prevent shape mismatches
                    feature_matrix = feature_matrix.flatten()
                    if len(feature_matrix) > 2000:  # Limit to 2000 features for generic models
                        feature_matrix = feature_matrix[:2000]
                    elif len(feature_matrix) < 2000:  # Pad with zeros
                        padded = np.zeros(2000)
                        padded[:len(feature_matrix)] = feature_matrix
                        feature_matrix = padded
                
                prediction_result = model.predict(feature_matrix)
                
                # Handle different return formats from model.predict()
                if prediction_result is None:
                    return None
                
                # Check if it's a tuple (action_probs, confidence)
                if isinstance(prediction_result, tuple) and len(prediction_result) == 2:
                    action_probs, confidence = prediction_result
                elif isinstance(prediction_result, dict):
                    # Handle dictionary return format
                    action_probs = prediction_result.get('probabilities', None)
                    confidence = prediction_result.get('confidence', 0.7)
                else:
                    # Assume it's just action probabilities
                    action_probs = prediction_result
                    confidence = 0.7  # Default confidence
                
                if action_probs is not None:
                    action_names = ['SELL', 'HOLD', 'BUY']
                    best_action_idx = np.argmax(action_probs)
                    best_action = action_names[best_action_idx]
                    
                    prediction = Prediction(
                        action=best_action,
                        confidence=float(confidence),
                        probabilities={name: float(prob) for name, prob in zip(action_names, action_probs)},
                        timeframe='mixed',
                        timestamp=datetime.now(),
                        model_name=model.name,
                        metadata={'generic_model': True}
=======

    async def _get_generic_prediction(
        self, model: ModelInterface, symbol: str, base_data=None
    ) -> Optional[Prediction]:
        """Get prediction from generic model using pre-built base data"""
        try:
            # Use pre-built base data if provided, otherwise build it
            if base_data is None:
                base_data = self.data_provider.build_base_data_input(symbol)
                if not base_data:
                    logger.warning(
                        f"Cannot build BaseDataInput for generic prediction: {symbol}"
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                    )
                    return None

            # Convert to feature vector for generic models
            feature_vector = base_data.get_feature_vector()

            # For backward compatibility, reshape to matrix format if model expects it
            # Most generic models expect a 2D matrix, so reshape the unified vector
            feature_matrix = feature_vector.reshape(1, -1)  # Shape: (1, 7850)

            prediction_result = model.predict(feature_matrix)

            # Handle different return formats from model.predict()
            if prediction_result is None:
                return None

            # Check if it's a tuple (action_probs, confidence)
            if isinstance(prediction_result, tuple) and len(prediction_result) == 2:
                action_probs, confidence = prediction_result
            elif isinstance(prediction_result, dict):
                # Handle dictionary return format
                action_probs = prediction_result.get("probabilities", None)
                confidence = prediction_result.get("confidence", 0.7)
            else:
                # Assume it's just action probabilities (e.g., a list or numpy array)
                action_probs = prediction_result
                confidence = 0.7  # Default confidence

            if action_probs is not None:
                # Ensure action_probs is a numpy array for argmax
                if not isinstance(action_probs, np.ndarray):
                    action_probs = np.array(action_probs)

                action_names = ["SELL", "HOLD", "BUY"]
                best_action_idx = np.argmax(action_probs)
                best_action = action_names[best_action_idx]

                prediction = Prediction(
                    action=best_action,
                    confidence=float(confidence),
                    probabilities={
                        name: float(prob)
                        for name, prob in zip(action_names, action_probs)
                    },
                    timeframe="unified",  # Now uses unified multi-timeframe data
                    timestamp=datetime.now(),
                    model_name=model.name,
                    metadata={
                        "generic_model": True,
                        "unified_input": True,
                        "feature_vector_size": len(feature_vector),
                    },
                )

                return prediction

            return None

        except Exception as e:
            logger.error(f"Error getting generic prediction: {e}")
            return None

    def _get_rl_state(self, symbol: str, base_data=None) -> Optional[np.ndarray]:
        """Get current state for RL agent using pre-built base data"""
        try:
            # Use pre-built base data if provided, otherwise build it
            if base_data is None:
                base_data = self.data_provider.build_base_data_input(symbol)
                if not base_data:
                    logger.debug(f"Cannot build BaseDataInput for RL state: {symbol}")
                    return None

            # Validate base_data has the required method
            if not hasattr(base_data, 'get_feature_vector'):
                logger.debug(f"BaseDataInput for {symbol} missing get_feature_vector method")
                return None

            # Get unified feature vector (7850 features including all timeframes and COB data)
            feature_vector = base_data.get_feature_vector()
            
<<<<<<< HEAD
            if feature_matrix is not None:
                # Flatten the feature matrix for RL agent
                # Shape: (n_timeframes, window_size, n_features) -> (n_timeframes * window_size * n_features,)
                state = feature_matrix.flatten()
                
                # Add extrema features if available
                if self.extrema_trainer:
                    try:
                        extrema_features = self.extrema_trainer.get_context_features_for_model(symbol)
                        if extrema_features is not None:
                            state = np.concatenate([state, extrema_features.flatten()])
                            logger.debug(f"Enhanced RL state with Extrema data for {symbol}")
                    except Exception as extrema_error:
                        logger.debug(f"Could not enhance RL state with Extrema data: {extrema_error}")

                # Get real-time portfolio information from the trading executor
                position_size = 0.0
                balance = 1.0  # Default to a normalized value if not available
                unrealized_pnl = 0.0

            if self.trading_executor:
                position = self.trading_executor.get_current_position(symbol)
                if position:
                    position_size = position.get('quantity', 0.0)

                if hasattr(self.trading_executor, "get_balance"):
                    current_balance = self.trading_executor.get_balance()
                else:
                    # TODO(Guideline: ensure integrations call real APIs) Expose a balance accessor on TradingExecutor for decision-state enrichment.
                    logger.warning("TradingExecutor lacks get_balance(); implement real balance access per guidelines")
                    current_balance = {}
                if current_balance and current_balance.get('total', 0) > 0:
                    balance = min(1.0, current_balance.get('free', 0) / current_balance.get('total', 1))

                    unrealized_pnl = self._get_current_position_pnl(symbol, self.data_provider.get_current_price(symbol))

                additional_state = np.array([position_size, balance, unrealized_pnl])
                
                return np.concatenate([state, additional_state])
=======
            # Validate feature vector
            if feature_vector is None or len(feature_vector) == 0:
                logger.debug(f"Empty feature vector for RL state: {symbol}")
                return None
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            
            # Check if all features are zero (invalid state)
            if all(f == 0 for f in feature_vector):
                logger.debug(f"All features are zero for RL state: {symbol}")
                return None
            
            # Convert to numpy array if needed
            if not isinstance(feature_vector, np.ndarray):
                feature_vector = np.array(feature_vector, dtype=np.float32)

            # Return the full unified feature vector for RL agent
            # The DQN agent is now initialized with the correct size to match this
            return feature_vector

        except Exception as e:
            logger.error(f"Error creating RL state for {symbol}: {e}")
            return None
    
<<<<<<< HEAD
    # SINGLE-USE FUNCTION - Called only once in codebase
    def _combine_predictions(self, symbol: str, price: float, 
                           predictions: List[Prediction],
                           timestamp: datetime) -> TradingDecision:
        """Combine all predictions into a final decision with aggressiveness and P&L feedback"""
        try:
            reasoning = {
                'predictions': len(predictions),
                # 'weights': {},  # Now handled by ModelManager
                'models_used': [pred.model_name for pred in predictions]
=======
    def _determine_decision_source(self, models_used: List[str], confidence: float) -> str:
        """Determine the source of a trading decision based on contributing models"""
        try:
            if not models_used:
                return "no_models"
            
            # If only one model contributed, use that as source
            if len(models_used) == 1:
                model_name = models_used[0]
                # Map internal model names to user-friendly names
                model_mapping = {
                    "dqn_agent": "DQN",
                    "cnn_model": "CNN", 
                    "cob_rl": "COB-RL",
                    "decision_fusion": "Fusion",
                    "extrema_trainer": "Extrema",
                    "transformer": "Transformer"
                }
                return model_mapping.get(model_name, model_name)
            
            # Multiple models - determine primary contributor
            # Priority order: COB-RL > DQN > CNN > Others
            priority_order = ["cob_rl", "dqn_agent", "cnn_model", "decision_fusion", "transformer", "extrema_trainer"]
            
            for priority_model in priority_order:
                if priority_model in models_used:
                    model_mapping = {
                        "cob_rl": "COB-RL",
                        "dqn_agent": "DQN",
                        "cnn_model": "CNN",
                        "decision_fusion": "Fusion",
                        "transformer": "Transformer",
                        "extrema_trainer": "Extrema"
                    }
                    primary_model = model_mapping.get(priority_model, priority_model)
                    
                    # If high confidence, show primary model
                    if confidence > 0.7:
                        return primary_model
                    else:
                        # Lower confidence, show it's a combination
                        return f"{primary_model}+{len(models_used)-1}"
            
            # Fallback: show number of models
            return f"Ensemble({len(models_used)})"
            
        except Exception as e:
            logger.error(f"Error determining decision source: {e}")
            return "orchestrator"

    def _combine_predictions(
        self,
        symbol: str,
        price: float,
        predictions: List[Prediction],
        timestamp: datetime,
    ) -> TradingDecision:
        """Combine all predictions into a final decision with aggressiveness and P&L feedback"""
        try:
            reasoning = {
                "predictions": len(predictions),
                "weights": self.model_weights.copy(),
                "models_used": [pred.model_name for pred in predictions],
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            }

            # Get current position P&L for feedback
            current_position_pnl = self._get_current_position_pnl(symbol, price)

            # Initialize action scores
            action_scores = {"BUY": 0.0, "SELL": 0.0, "HOLD": 0.0}
            total_weight = 0.0

            # Process all predictions (filter out disabled models)
            for pred in predictions:
                # Check if model inference is enabled
                if not self.is_model_inference_enabled(pred.model_name):
                    logger.debug(f"Skipping disabled model {pred.model_name} in decision making")
                    continue
                # Check routing toggle: even if inference happened, we may ignore it in decision fusion/programmatic fusion
                if not self.is_model_routing_enabled(pred.model_name):
                    logger.debug(f"Routing disabled for {pred.model_name}; excluding from decision aggregation")
                    continue
                
                # DEBUG: Log individual model predictions
                logger.debug(f"Model {pred.model_name}: {pred.action} (confidence: {pred.confidence:.3f})")
                
                # Get model weight
<<<<<<< HEAD
                model_weight = 0.1  # Default weight, now managed by ModelManager
                
=======
                model_weight = self.model_weights.get(pred.model_name, 0.1)

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                # Weight by confidence and timeframe importance
                timeframe_weight = self._get_timeframe_weight(pred.timeframe)
                weighted_confidence = pred.confidence * timeframe_weight * model_weight

                action_scores[pred.action] += weighted_confidence
                total_weight += weighted_confidence

            # Normalize scores
            if total_weight > 0:
                for action in action_scores:
                    action_scores[action] /= total_weight

            # Choose best action - safe way to handle max with key function
            if action_scores:
                # Add small random component to break ties and prevent pure bias
                import random
                for action in action_scores:
                    # Add tiny random noise (±0.001) to break exact ties
                    action_scores[action] += random.uniform(-0.001, 0.001)
                
                best_action = max(action_scores.keys(), key=lambda k: action_scores[k])
                best_confidence = action_scores[best_action]
                
                # DEBUG: Log action scores to understand bias
                logger.debug(f"Action scores for {symbol}: BUY={action_scores['BUY']:.3f}, SELL={action_scores['SELL']:.3f}, HOLD={action_scores['HOLD']:.3f}")
                logger.debug(f"Selected action: {best_action} (confidence: {best_confidence:.3f})")
            else:
                best_action = "HOLD"
                best_confidence = 0.0

            # Calculate aggressiveness-adjusted thresholds
            entry_threshold, exit_threshold = self._calculate_aggressiveness_thresholds(
                current_position_pnl, symbol
            )

            # SIGNAL CONFIRMATION: Only execute signals that meet confirmation criteria
            # Apply confidence thresholds and signal accumulation for trend confirmation
            reasoning["execute_every_signal"] = False
            reasoning["models_aggregated"] = [pred.model_name for pred in predictions]
            reasoning["aggregated_confidence"] = best_confidence

            # Calculate dynamic aggressiveness based on recent performance
            entry_aggressiveness = self._calculate_dynamic_entry_aggressiveness(symbol)

            # Adjust confidence threshold based on entry aggressiveness
            # Higher aggressiveness = lower threshold (more trades)
            # entry_aggressiveness: 0.0 = very conservative, 1.0 = very aggressive
            base_threshold = self.confidence_threshold
            aggressiveness_factor = (
                1.0 - entry_aggressiveness
            )  # Invert: high agg = low factor
            dynamic_threshold = base_threshold * aggressiveness_factor

            # Ensure minimum threshold for safety (don't go below 1% confidence)
            dynamic_threshold = max(0.01, dynamic_threshold)

            # Apply dynamic confidence threshold for signal confirmation
            if best_action != "HOLD":
                if best_confidence < dynamic_threshold:
                    logger.debug(
                        f"Signal below dynamic confidence threshold: {best_action} {symbol} "
                        f"(confidence: {best_confidence:.3f} < {dynamic_threshold:.3f}, "
                        f"base: {base_threshold:.3f}, aggressiveness: {entry_aggressiveness:.2f})"
                    )
                    best_action = "HOLD"
                    best_confidence = 0.0
                else:
                    logger.info(
                        f"SIGNAL ACCEPTED: {best_action} {symbol} "
                        f"(confidence: {best_confidence:.3f} >= {dynamic_threshold:.3f}, "
                        f"aggressiveness: {entry_aggressiveness:.2f})"
                    )
                    # Add signal to accumulator for trend confirmation
                    signal_data = {
                        "action": best_action,
                        "confidence": best_confidence,
                        "timestamp": timestamp,
                        "models": reasoning["models_aggregated"],
                    }

                    # Check if we have enough confirmations
                    confirmed_action = self._check_signal_confirmation(
                        symbol, signal_data
                    )
                    if confirmed_action:
                        logger.info(
                            f"SIGNAL CONFIRMED: {confirmed_action} (confidence: {best_confidence:.3f}) "
                            f"from aggregated models: {reasoning['models_aggregated']}"
                        )
                        best_action = confirmed_action
                        reasoning["signal_confirmed"] = True
                        reasoning["confirmations_received"] = len(
                            self.signal_accumulator[symbol]
                        )
                    else:
                        logger.debug(
                            f"Signal accumulating: {best_action} {symbol} "
                            f"({len(self.signal_accumulator[symbol])}/{self.required_confirmations} confirmations)"
                        )
                        best_action = "HOLD"
                        best_confidence = 0.0
                        reasoning["rejected_reason"] = "awaiting_confirmation"

            # Add P&L-based decision adjustment
            best_action, best_confidence = self._apply_pnl_feedback(
                best_action, best_confidence, current_position_pnl, symbol, reasoning
            )

            # Get memory usage stats
            try:
<<<<<<< HEAD
                memory_usage = self.model_manager.get_storage_stats() if hasattr(self.model_manager, 'get_storage_stats') else {}
=======
                memory_usage = {}
                if hasattr(self.model_registry, "get_memory_stats"):
                    memory_usage = self.model_registry.get_memory_stats()
                else:
                    # Fallback memory usage calculation
                    for model_name in self.model_weights:
                        memory_usage[model_name] = 50.0  # Default MB estimate
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            except Exception:
                memory_usage = {}

            # Get exit aggressiveness (entry aggressiveness already calculated above)
            exit_aggressiveness = self._calculate_dynamic_exit_aggressiveness(
                symbol, current_position_pnl
            )

            # Determine decision source based on contributing models
            source = self._determine_decision_source(reasoning.get("models_used", []), best_confidence)

            # Create final decision
            decision = TradingDecision(
                action=best_action,
                confidence=best_confidence,
                symbol=symbol,
                price=price,
                timestamp=timestamp,
                reasoning=reasoning,
                memory_usage=memory_usage.get("models", {}) if memory_usage else {},
                source=source,
                entry_aggressiveness=entry_aggressiveness,
                exit_aggressiveness=exit_aggressiveness,
                current_position_pnl=current_position_pnl,
            )

            # logger.info(f"Decision for {symbol}: {best_action} (confidence: {best_confidence:.3f}, "
            #            f"entry_agg: {entry_aggressiveness:.2f}, exit_agg: {exit_aggressiveness:.2f}, "
            #            f"pnl: ${current_position_pnl:.2f})")

            # Trigger training on each decision (especially for executed trades)
            self._trigger_training_on_decision(decision, price)

            return decision

        except Exception as e:
            logger.error(f"Error combining predictions for {symbol}: {e}")
            # Return safe default
            return TradingDecision(
                action="HOLD",
                confidence=0.0,
                symbol=symbol,
                source="error_fallback",
                price=price,
                timestamp=timestamp,
                reasoning={"error": str(e)},
                memory_usage={},
                entry_aggressiveness=0.5,
                exit_aggressiveness=0.5,
                current_position_pnl=0.0,
            )
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
=======

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def _get_timeframe_weight(self, timeframe: str) -> float:
        """Get importance weight for a timeframe"""
        # Higher timeframes get more weight in decision making
        weights = {
            "1m": 0.1,
            "5m": 0.2,
            "15m": 0.3,
            "30m": 0.4,
            "1h": 0.6,
            "4h": 0.8,
            "1d": 1.0,
        }
        return weights.get(timeframe, 0.5)
<<<<<<< HEAD
    
    # Model performance and weight adaptation removed - handled by ModelManager
    # Use self.model_manager for all model performance tracking
    
    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_recent_decisions(self, symbol: str, limit: int = 10) -> List[TradingDecision]:
=======

    def update_model_performance(self, model_name: str, was_correct: bool):
        """Update performance tracking for a model"""
        if model_name in self.model_performance:
            self.model_performance[model_name]["total"] += 1
            if was_correct:
                self.model_performance[model_name]["correct"] += 1

            # Update accuracy
            total = self.model_performance[model_name]["total"]
            correct = self.model_performance[model_name]["correct"]
            self.model_performance[model_name]["accuracy"] = (
                correct / total if total > 0 else 0.0
            )

    def adapt_weights(self):
        """Dynamically adapt model weights based on performance"""
        try:
            for model_name, performance in self.model_performance.items():
                if performance["total"] > 0:
                    # Adjust weight based on relative performance
                    accuracy = performance["correct"] / performance["total"]
                    self.model_weights[model_name] = accuracy

                logger.info(
                    f"Adapted {model_name} weight: {self.model_weights[model_name]}"
                )

        except Exception as e:
            logger.error(f"Error adapting weights: {e}")

    def get_recent_decisions(
        self, symbol: str, limit: int = 10
    ) -> List[TradingDecision]:
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Get recent decisions for a symbol"""
        if symbol in self.recent_decisions:
            return self.recent_decisions[symbol][-limit:]
        return []
<<<<<<< HEAD
    
    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_performance_metrics(self) -> Dict[str, Any]:
        """Get performance metrics for the orchestrator"""
        return {
            # 'model_performance': {},  # Now handled by ModelManager
            # 'weights': {},  # Now handled by ModelManager
            'configuration': {
                'confidence_threshold': self.confidence_threshold,
                'decision_frequency': self.decision_frequency
=======
    def get_performance_metrics(self) -> Dict[str, Any]:
        """Get performance metrics for the orchestrator"""
        return {
            "model_performance": self.model_performance.copy(),
            "weights": self.model_weights.copy(),
            "configuration": {
                "confidence_threshold": self.confidence_threshold,
                # 'decision_frequency': self.decision_frequency
            },
            "recent_activity": {
                symbol: len(decisions)
                for symbol, decisions in self.recent_decisions.items()
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            },
        }
<<<<<<< HEAD
    
    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_model_states(self) -> Dict[str, Dict]:
        """Get current model states with REAL checkpoint data - SSOT for dashboard"""
        try:
            # Cache checkpoint data to avoid repeated loading
            if not hasattr(self, '_checkpoint_cache'):
                self._checkpoint_cache = {}
                self._checkpoint_cache_time = {}
            
            # Only refresh checkpoint data every 60 seconds to avoid spam
            import time
            current_time = time.time()
            cache_expiry = 60  # seconds
            
            from NN.training.model_manager import load_best_checkpoint
            
            # Update each model with REAL checkpoint data (cached)
            # Note: COB_RL removed - functionality integrated into Enhanced CNN
            for model_name in ['dqn_agent', 'enhanced_cnn', 'extrema_trainer', 'decision', 'transformer']:
=======

    def get_model_states(self) -> Dict[str, Dict]:
        """Get current model states with REAL checkpoint data - SSOT for dashboard"""
        try:
            # ENHANCED: Load actual checkpoint metadata for each model
            from utils.checkpoint_manager import load_best_checkpoint

            # Update each model with REAL checkpoint data
            for model_name in [
                "dqn_agent",
                "enhanced_cnn",
                "extrema_trainer",
                "decision",
                "cob_rl",
            ]:
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                try:
                    # Check if we need to refresh cache for this model
                    needs_refresh = (
                        model_name not in self._checkpoint_cache or
                        current_time - self._checkpoint_cache_time.get(model_name, 0) > cache_expiry
                    )
                    
                    if needs_refresh:
                        result = load_best_checkpoint(model_name)
                        self._checkpoint_cache[model_name] = result
                        self._checkpoint_cache_time[model_name] = current_time
                    
                    result = self._checkpoint_cache[model_name]
                    if result:
                        file_path, metadata = result

                        # Map model names to internal keys
                        internal_key = {
<<<<<<< HEAD
                            'dqn_agent': 'dqn',
                            'enhanced_cnn': 'cnn', 
                            'extrema_trainer': 'extrema_trainer',
                            'decision': 'decision',
                            'transformer': 'transformer'
=======
                            "dqn_agent": "dqn",
                            "enhanced_cnn": "cnn",
                            "extrema_trainer": "extrema_trainer",
                            "decision": "decision",
                            "cob_rl": "cob_rl",
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                        }.get(model_name, model_name)

                        if internal_key in self.model_states:
                            # Load REAL checkpoint data
                            self.model_states[internal_key]["current_loss"] = getattr(
                                metadata, "loss", None
                            ) or getattr(metadata, "val_loss", None)
                            self.model_states[internal_key]["best_loss"] = getattr(
                                metadata, "loss", None
                            ) or getattr(metadata, "val_loss", None)
                            self.model_states[internal_key]["checkpoint_loaded"] = True
                            self.model_states[internal_key][
                                "checkpoint_filename"
                            ] = metadata.checkpoint_id
                            self.model_states[internal_key]["performance_score"] = (
                                getattr(metadata, "performance_score", 0.0)
                            )
                            self.model_states[internal_key]["created_at"] = str(
                                getattr(metadata, "created_at", "Unknown")
                            )

                            # Set initial loss from checkpoint if available
                            if self.model_states[internal_key]["initial_loss"] is None:
                                # Try to infer initial loss from performance improvement
                                if hasattr(metadata, "accuracy") and metadata.accuracy:
                                    # Estimate initial loss from current accuracy (inverse relationship)
                                    estimated_initial = max(
                                        0.1, 2.0 - (metadata.accuracy * 2.0)
                                    )
                                    self.model_states[internal_key][
                                        "initial_loss"
                                    ] = estimated_initial

                            logger.debug(
                                f"Loaded REAL checkpoint data for {model_name}: loss={self.model_states[internal_key]['current_loss']}"
                            )
                    else:
                        # No checkpoint found - mark as fresh
                        internal_key = {
                            "dqn_agent": "dqn",
                            "enhanced_cnn": "cnn",
                            "extrema_trainer": "extrema_trainer",
                            "decision": "decision",
                            "cob_rl": "cob_rl",
                        }.get(model_name, model_name)

                        if internal_key in self.model_states:
                            self.model_states[internal_key]["checkpoint_loaded"] = False
                            self.model_states[internal_key][
                                "checkpoint_filename"
                            ] = "none (fresh start)"

                except Exception as e:
                    logger.debug(f"No checkpoint found for {model_name}: {e}")

            # ADDITIONAL: Update from live training if models are actively training
            if (
                self.rl_agent
                and hasattr(self.rl_agent, "losses")
                and len(self.rl_agent.losses) > 0
            ):
                recent_losses = self.rl_agent.losses[-10:]  # Last 10 training steps
                if recent_losses:
                    live_loss = sum(recent_losses) / len(recent_losses)
                    # Only update if we have a live loss that's different from checkpoint
                    if (
                        abs(live_loss - (self.model_states["dqn"]["current_loss"] or 0))
                        > 0.001
                    ):
                        self.model_states["dqn"]["current_loss"] = live_loss
                        logger.debug(
                            f"Updated DQN with live training loss: {live_loss:.4f}"
                        )

            if self.cnn_model and hasattr(self.cnn_model, "training_loss"):
                if (
                    self.cnn_model.training_loss
                    and abs(
                        self.cnn_model.training_loss
                        - (self.model_states["cnn"]["current_loss"] or 0)
                    )
                    > 0.001
                ):
                    self.model_states["cnn"][
                        "current_loss"
                    ] = self.cnn_model.training_loss
                    logger.debug(
                        f"Updated CNN with live training loss: {self.cnn_model.training_loss:.4f}"
                    )

            if self.extrema_trainer and hasattr(
                self.extrema_trainer, "best_detection_accuracy"
            ):
                # Convert accuracy to loss estimate
                if self.extrema_trainer.best_detection_accuracy > 0:
                    estimated_loss = max(
                        0.001, 1.0 - self.extrema_trainer.best_detection_accuracy
                    )
                    self.model_states["extrema_trainer"][
                        "current_loss"
                    ] = estimated_loss
                    self.model_states["extrema_trainer"]["best_loss"] = estimated_loss

            # NO LONGER SETTING SYNTHETIC INITIAL LOSS VALUES
            # Keep all None values as None if no real data is available
            # This prevents the "fake progress" issue where Current Loss = Initial Loss

            # Only set initial_loss from actual training history if available
            for model_key, model_state in self.model_states.items():
                # Leave initial_loss as None if no real training history exists
                # Leave current_loss as None if model isn't actively training
                # Leave best_loss as None if no checkpoints exist with real performance data
                pass  # No synthetic data generation

            return self.model_states

        except Exception as e:
            logger.error(f"Error getting model states: {e}")
            # Return None values instead of synthetic data
            return {
                "dqn": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
                "cnn": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
                "cob_rl": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
                "decision": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
                "extrema_trainer": {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                },
            }
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
=======

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def _initialize_decision_fusion(self):
        """Initialize the decision fusion neural network for learning model effectiveness"""
        try:
            if not self.decision_fusion_enabled:
                return

            # Create enhanced decision fusion network
            class DecisionFusionNet(nn.Module):
                def __init__(self, input_size=128, hidden_size=256):
                    super().__init__()
                    self.input_size = input_size
                    self.hidden_size = hidden_size

                    # Enhanced architecture for complex decision making
                    self.fc1 = nn.Linear(input_size, hidden_size)
                    self.fc2 = nn.Linear(hidden_size, hidden_size)
<<<<<<< HEAD
                    self.fc3 = nn.Linear(hidden_size, 3)  # BUY, SELL, HOLD
                    self.dropout = nn.Dropout(0.2)
                    
                # UNUSED FUNCTION - Not called anywhere in codebase
=======
                    self.fc3 = nn.Linear(hidden_size, hidden_size // 2)
                    self.fc4 = nn.Linear(hidden_size // 2, 3)  # BUY, SELL, HOLD

                    self.dropout = nn.Dropout(0.3)
                    # Use LayerNorm instead of BatchNorm1d for single-sample training compatibility
                    self.layer_norm1 = nn.LayerNorm(hidden_size)
                    self.layer_norm2 = nn.LayerNorm(hidden_size)
                    self.layer_norm3 = nn.LayerNorm(hidden_size // 2)

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                def forward(self, x):
                    x = torch.relu(self.layer_norm1(self.fc1(x)))
                    x = self.dropout(x)
                    x = torch.relu(self.layer_norm2(self.fc2(x)))
                    x = self.dropout(x)
                    x = torch.relu(self.layer_norm3(self.fc3(x)))
                    x = self.dropout(x)
                    return torch.softmax(self.fc4(x), dim=1)

                def save(self, filepath: str):
                    """Save the decision fusion network"""
                    torch.save(
                        {
                            "model_state_dict": self.state_dict(),
                            "input_size": self.input_size,
                            "hidden_size": self.hidden_size,
                        },
                        filepath,
                    )
                    logger.info(f"Decision fusion network saved to {filepath}")

                def load(self, filepath: str):
                    """Load the decision fusion network"""
                    checkpoint = torch.load(
                        filepath,
                        map_location=self.device if hasattr(self, "device") else "cpu",
                    )
                    self.load_state_dict(checkpoint["model_state_dict"])
                    logger.info(f"Decision fusion network loaded from {filepath}")

            # Get decision fusion configuration
            decision_fusion_config = self.config.orchestrator.get("decision_fusion", {})
            input_size = decision_fusion_config.get("input_size", 128)
            hidden_size = decision_fusion_config.get("hidden_size", 256)

            self.decision_fusion_network = DecisionFusionNet(
                input_size=input_size, hidden_size=hidden_size
            )
            # Move decision fusion network to the device
            self.decision_fusion_network.to(self.device)

            # Initialize decision fusion mode
            self.decision_fusion_mode = decision_fusion_config.get("mode", "neural")
            self.decision_fusion_enabled = decision_fusion_config.get("enabled", True)
            self.decision_fusion_history_length = decision_fusion_config.get(
                "history_length", 20
            )
            self.decision_fusion_training_interval = decision_fusion_config.get(
                "training_interval", 100
            )
            self.decision_fusion_min_samples = decision_fusion_config.get(
                "min_samples_for_training", 50
            )

            # Initialize decision fusion training data
            self.decision_fusion_training_data = []
            self.decision_fusion_decisions_count = 0

            # Try to load existing checkpoint
            try:
                from utils.checkpoint_manager import load_best_checkpoint

                # Try to load decision fusion checkpoint
                result = load_best_checkpoint("decision_fusion")
                if result:
                    file_path, metadata = result
                    # Load the checkpoint into the network
                    checkpoint = torch.load(file_path, map_location=self.device)
                    
                    # Load model state
                    if 'model_state_dict' in checkpoint:
                        self.decision_fusion_network.load_state_dict(checkpoint['model_state_dict'])
                    
                    # Update model states - FIX: Use correct key "decision_fusion"
                    if "decision_fusion" not in self.model_states:
                        self.model_states["decision_fusion"] = {}
                    
                    self.model_states["decision_fusion"]["initial_loss"] = (
                        metadata.performance_metrics.get("loss", 0.0)
                    )
                    self.model_states["decision_fusion"]["current_loss"] = (
                        metadata.performance_metrics.get("loss", 0.0)
                    )
                    self.model_states["decision_fusion"]["best_loss"] = (
                        metadata.performance_metrics.get("loss", 0.0)
                    )
                    self.model_states["decision_fusion"]["checkpoint_loaded"] = True
                    self.model_states["decision_fusion"][
                        "checkpoint_filename"
                    ] = metadata.checkpoint_id
                    
                    loss_str = f"{metadata.performance_metrics.get('loss', 0.0):.4f}"
                    logger.info(
                        f"Decision fusion network loaded from checkpoint: {metadata.checkpoint_id} (loss={loss_str})"
                    )
                else:
                    logger.info(
                        "No existing decision fusion checkpoint found, starting fresh"
                    )
            except Exception as e:
                logger.warning(f"Error loading decision fusion checkpoint: {e}")
                logger.info("Decision fusion network starting fresh")

            # Initialize optimizer for decision fusion training
            self.decision_fusion_optimizer = torch.optim.Adam(
                self.decision_fusion_network.parameters(), 
                lr=decision_fusion_config.get("learning_rate", 0.001)
            )
            
            logger.info(f"Decision fusion network initialized on device: {self.device}")
            logger.info(f"Decision fusion mode: {self.decision_fusion_mode}")
            logger.info(f"Decision fusion optimizer initialized with lr={decision_fusion_config.get('learning_rate', 0.001)}")

        except Exception as e:
            logger.warning(f"Decision fusion initialization failed: {e}")
            self.decision_fusion_enabled = False
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
=======

    async def _train_decision_fusion_programmatic(self):
        """Train decision fusion model in programmatic mode"""
        try:
            if not self.decision_fusion_network or len(self.decision_fusion_training_data) < self.decision_fusion_min_samples:
                return
                
            logger.info(f"Training decision fusion model with {len(self.decision_fusion_training_data)} samples")
            
            # Prepare training data
            inputs = []
            targets = []
            
            for sample in self.decision_fusion_training_data[-100:]:  # Use last 100 samples
                if 'input_features' in sample and 'outcome' in sample:
                    inputs.append(sample['input_features'])
                    # Convert outcome to target (1.0 for correct, 0.0 for incorrect)
                    target = 1.0 if sample['outcome']['correct'] else 0.0
                    targets.append(target)
            
            if len(inputs) < 10:  # Need minimum samples
                return
                
            # Convert to tensors
            inputs_tensor = torch.tensor(inputs, dtype=torch.float32, device=self.device)
            targets_tensor = torch.tensor(targets, dtype=torch.float32, device=self.device)
            
            # Training step
            self.decision_fusion_network.train()
            optimizer = torch.optim.Adam(self.decision_fusion_network.parameters(), lr=0.001)
            
            optimizer.zero_grad()
            outputs = self.decision_fusion_network(inputs_tensor)
            loss = torch.nn.MSELoss()(outputs.squeeze(), targets_tensor)
            loss.backward()
            optimizer.step()
            
            # Update statistics
            current_loss = loss.item()
            self.update_model_loss("decision_fusion", current_loss)
            
            logger.info(f"Decision fusion training completed: loss={current_loss:.4f}, samples={len(inputs)}")
            
            # Save checkpoint: ensure first save after minimum samples, then periodic saves
            if (len(self.decision_fusion_training_data) == self.decision_fusion_min_samples) or \
               (self.decision_fusion_decisions_count % (self.decision_fusion_training_interval * 5) == 0):
                self._save_decision_fusion_checkpoint()
                
        except Exception as e:
            logger.error(f"Error training decision fusion in programmatic mode: {e}")

    def _save_decision_fusion_checkpoint(self):
        """Save decision fusion model checkpoint"""
        try:
            if not self.decision_fusion_network or not self.checkpoint_manager:
                return
            
            # Get current performance score
            model_stats = self.model_statistics.get('decision_fusion')
            performance_score = 0.5  # Default score
            
            if model_stats and model_stats.accuracy is not None:
                performance_score = model_stats.accuracy
            elif hasattr(self, 'decision_fusion_performance_score'):
                performance_score = self.decision_fusion_performance_score
            
            # Create checkpoint data
            checkpoint_data = {
                'model_state_dict': self.decision_fusion_network.state_dict(),
                'optimizer_state_dict': self.decision_fusion_optimizer.state_dict() if hasattr(self, 'decision_fusion_optimizer') else None,
                'epoch': self.decision_fusion_decisions_count,
                'loss': 1.0 - performance_score,  # Convert performance to loss
                'performance_score': performance_score,
                'timestamp': datetime.now().isoformat(),
                'model_name': 'decision_fusion',
                'training_data_count': len(self.decision_fusion_training_data)
            }
            
            # Save checkpoint using checkpoint manager
            checkpoint_path = self.checkpoint_manager.save_model_checkpoint(
                model_name="decision_fusion",
                model_data=checkpoint_data,
                loss=1.0 - performance_score,
                performance_score=performance_score
            )
            
            if checkpoint_path:
                logger.info(f"Decision fusion checkpoint saved: {checkpoint_path}")
                
                # Update model state
                if 'decision_fusion' not in self.model_states:
                    self.model_states['decision_fusion'] = {}
                
                self.model_states['decision_fusion'].update({
                    'checkpoint_loaded': True,
                    'checkpoint_filename': checkpoint_path.name if hasattr(checkpoint_path, 'name') else str(checkpoint_path),
                    'current_loss': 1.0 - performance_score,
                    'best_loss': min(self.model_states['decision_fusion'].get('best_loss', float('inf')), 1.0 - performance_score),
                    'last_training': datetime.now(),
                    'performance_score': performance_score
                })
                
                logger.info(f"Decision fusion model state updated with checkpoint info")
            else:
                logger.warning("Failed to save decision fusion checkpoint")
                
        except Exception as e:
            logger.error(f"Error saving decision fusion checkpoint: {e}")

    def _create_decision_fusion_input(
        self,
        symbol: str,
        predictions: List[Prediction],
        current_price: float,
        timestamp: datetime,
    ) -> torch.Tensor:
        """Create input features for the decision fusion network"""
        try:
            features = []

            # 1. Market data features (standard input)
            market_data = self._get_current_market_data(symbol)
            if market_data:
                # Price features
                features.extend(
                    [
                        current_price,
                        market_data.get("volume", 0.0),
                        market_data.get("rsi", 50.0) / 100.0,  # Normalize RSI
                        market_data.get("macd", 0.0),
                        market_data.get("bollinger_upper", current_price)
                        / current_price
                        - 1.0,
                        market_data.get("bollinger_lower", current_price)
                        / current_price
                        - 1.0,
                    ]
                )
            else:
                # Fallback features
                features.extend([current_price, 0.0, 0.5, 0.0, 0.0, 0.0])

            # 2. Model prediction features (up to 20 recent decisions per model)
            model_names = ["dqn", "cnn", "transformer", "cob_rl"]
            for model_name in model_names:
                model_stats = self.model_statistics.get(model_name)
                if model_stats:
                    # Model performance metrics
                    features.extend(
                        [
                            model_stats.accuracy or 0.0,
                            model_stats.average_loss or 0.0,
                            model_stats.best_loss or 0.0,
                            model_stats.total_inferences or 0.0,
                            model_stats.total_trainings or 0.0,
                        ]
                    )

                    # Recent predictions (up to 20)
                    recent_predictions = list(model_stats.predictions_history)[
                        -self.decision_fusion_history_length :
                    ]
                    for pred in recent_predictions:
                        # Action encoding: BUY=0, SELL=1, HOLD=2
                        action_encoding = {"BUY": 0.0, "SELL": 1.0, "HOLD": 2.0}.get(
                            pred["action"], 2.0
                        )
                        features.extend([action_encoding, pred["confidence"]])

                    # Pad with zeros if less than 20 predictions
                    padding_needed = self.decision_fusion_history_length - len(
                        recent_predictions
                    )
                    features.extend([0.0, 0.0] * padding_needed)
                else:
                    # No model stats available
                    features.extend(
                        [0.0, 0.0, 0.0, 0.0, 0.0]
                        + [0.0, 0.0] * self.decision_fusion_history_length
                    )

            # 3. Current predictions features
            for pred in predictions:
                action_encoding = {"BUY": 0.0, "SELL": 1.0, "HOLD": 2.0}.get(
                    pred.action, 2.0
                )
                features.extend([action_encoding, pred.confidence])

            # 4. Position and P&L features
            current_position_pnl = self._get_current_position_pnl(symbol, current_price)
            has_position = self._has_open_position(symbol)
            features.extend(
                [
                    current_position_pnl,
                    1.0 if has_position else 0.0,
                    self.entry_aggressiveness,
                    self.exit_aggressiveness,
                ]
            )

            # 5. Time-based features
            features.extend(
                [
                    timestamp.hour / 24.0,  # Hour of day (0-1)
                    timestamp.minute / 60.0,  # Minute of hour (0-1)
                    timestamp.weekday() / 7.0,  # Day of week (0-1)
                ]
            )

            # Ensure we have the expected input size
            expected_size = self.decision_fusion_network.input_size
            if len(features) < expected_size:
                features.extend([0.0] * (expected_size - len(features)))
            elif len(features) > expected_size:
                features = features[:expected_size]

            # Log input feature statistics for debugging
            if len(features) > 0:
                feature_array = np.array(features)
                logger.debug(f"Decision fusion input features: size={len(features)}, "
                           f"mean={np.mean(feature_array):.4f}, "
                           f"std={np.std(feature_array):.4f}, "
                           f"min={np.min(feature_array):.4f}, "
                           f"max={np.max(feature_array):.4f}")

            return torch.tensor(
                features, dtype=torch.float32, device=self.device
            ).unsqueeze(0)

        except Exception as e:
            logger.error(f"Error creating decision fusion input: {e}")
            # Return zero tensor as fallback
            return torch.zeros(
                1, self.decision_fusion_network.input_size, device=self.device
            )

    def _make_decision_fusion_decision(
        self,
        symbol: str,
        predictions: List[Prediction],
        current_price: float,
        timestamp: datetime,
    ) -> TradingDecision:
        """Use the decision fusion network to make trading decisions"""
        try:
            # Create input features
            input_features = self._create_decision_fusion_input(
                symbol, predictions, current_price, timestamp
            )

            # DEBUG: Log decision fusion input features
            logger.info(f"=== DECISION FUSION INPUT FEATURES ===")
            logger.info(f"  Input shape: {input_features.shape}")
            # logger.info(f"  Input features (first 20): {input_features[0, :20].cpu().numpy()}")
            # logger.info(f"  Input features (last 20): {input_features[0, -20:].cpu().numpy()}")
            logger.info(f"  Input features mean: {input_features.mean().item():.4f}")
            logger.info(f"  Input features std: {input_features.std().item():.4f}")

            # Get decision fusion network prediction
            with torch.no_grad():
                output = self.decision_fusion_network(input_features)
                probabilities = output.squeeze().cpu().numpy()

            # DEBUG: Log decision fusion outputs
            logger.info(f"=== DECISION FUSION OUTPUTS ===")
            logger.info(f"  Raw output shape: {output.shape}")
            logger.info(f"  Probabilities: BUY={probabilities[0]:.4f}, SELL={probabilities[1]:.4f}, HOLD={probabilities[2]:.4f}")
            logger.info(f"  Probability sum: {probabilities.sum():.4f}")

            # Convert probabilities to action and confidence
            action_idx = np.argmax(probabilities)
            actions = ["BUY", "SELL", "HOLD"]
            best_action = actions[action_idx]
            best_confidence = float(probabilities[action_idx])

            # DEBUG: Check for overconfidence
            if best_confidence > 0.95:
                self.decision_fusion_overconfidence_count += 1
                logger.warning(f"DECISION FUSION OVERCONFIDENCE DETECTED: {best_confidence:.3f} for {best_action} (count: {self.decision_fusion_overconfidence_count})")
                
                if self.decision_fusion_overconfidence_count >= self.max_overconfidence_threshold:
                    logger.error(f"Decision fusion overconfidence threshold reached ({self.max_overconfidence_threshold}). Disabling model.")
                    self.disable_decision_fusion_temporarily("overconfidence threshold exceeded")
                    # Fallback to programmatic method
                    return self._combine_predictions(
                        symbol, current_price, predictions, timestamp
                    )

            # Get current position P&L
            current_position_pnl = self._get_current_position_pnl(symbol, current_price)

            # Create reasoning
            reasoning = {
                "method": "decision_fusion_neural",
                "predictions_count": len(predictions),
                "models_used": [pred.model_name for pred in predictions],
                "fusion_probabilities": {
                    "BUY": float(probabilities[0]),
                    "SELL": float(probabilities[1]),
                    "HOLD": float(probabilities[2]),
                },
                "input_features_size": input_features.shape[1],
                "decision_fusion_mode": self.decision_fusion_mode,
            }

            # Apply P&L feedback
            best_action, best_confidence = self._apply_pnl_feedback(
                best_action, best_confidence, current_position_pnl, symbol, reasoning
            )

            # Get memory usage
            memory_usage = {}
            try:
                if hasattr(self.model_registry, "get_memory_stats"):
                    memory_usage = self.model_registry.get_memory_stats()
            except Exception:
                pass

            # Determine decision source, honoring routing toggles: only count models whose routing is enabled
            try:
                routed_models = [m for m in reasoning.get("models_used", []) if self.is_model_routing_enabled(m)]
            except Exception:
                routed_models = reasoning.get("models_used", [])
            source = self._determine_decision_source(routed_models, best_confidence)
            
            # Create final decision
            decision = TradingDecision(
                action=best_action,
                confidence=best_confidence,
                symbol=symbol,
                price=current_price,
                timestamp=timestamp,
                reasoning=reasoning,
                memory_usage=memory_usage.get("models", {}) if memory_usage else {},
                source=source,
                entry_aggressiveness=self.entry_aggressiveness,
                exit_aggressiveness=self.exit_aggressiveness,
                current_position_pnl=current_position_pnl,
            )

            # Add to training data for future training
            self._add_decision_fusion_training_sample(
                decision, predictions, current_price
            )

            # Trigger training on decision
            self._trigger_training_on_decision(decision, current_price)

            return decision

        except Exception as e:
            logger.error(f"Error in decision fusion decision: {e}")
            # Fallback to programmatic method
            return self._combine_predictions(
                symbol, current_price, predictions, timestamp
            )

    def _store_decision_fusion_inference(
        self,
        decision: TradingDecision,
        predictions: List[Prediction],
        current_price: float,
    ):
        """Store decision fusion inference for later training (like other models)"""
        try:
            # Create input features for decision fusion
            input_features = self._create_decision_fusion_input(
                decision.symbol, predictions, current_price, decision.timestamp
            )
            
            # Store inference record
            inference_record = {
                "model_name": "decision_fusion",
                "symbol": decision.symbol,
                "action": decision.action,
                "confidence": decision.confidence,
                "probabilities": {"BUY": 0.33, "SELL": 0.33, "HOLD": 0.34},
                "input_features": input_features,
                "timestamp": decision.timestamp,
                "price": current_price,
                "predictions_count": len(predictions),
                "models_used": [pred.model_name for pred in predictions]
            }
            
            # Store in database for later training
            asyncio.create_task(self._store_inference_data_async(
                "decision_fusion",
                input_features,
                Prediction(
                    action=decision.action,
                    confidence=decision.confidence,
                    probabilities={"BUY": 0.33, "SELL": 0.33, "HOLD": 0.34},
                    timeframe="1m",
                    timestamp=decision.timestamp,
                    model_name="decision_fusion"
                ),
                decision.timestamp,
                decision.symbol
            ))
            
            # Update inference statistics
            self._update_model_statistics(
                "decision_fusion",
                prediction=Prediction(
                    action=decision.action,
                    confidence=decision.confidence,
                    probabilities={"BUY": 0.33, "SELL": 0.33, "HOLD": 0.34},
                    timeframe="1m",
                    timestamp=decision.timestamp,
                    model_name="decision_fusion"
                )
            )
            
            logger.debug(f"Stored decision fusion inference: {decision.action} (confidence: {decision.confidence:.3f})")

        except Exception as e:
            logger.error(f"Error storing decision fusion inference: {e}")

    def _add_decision_fusion_training_sample(
        self,
        decision: TradingDecision,
        predictions: List[Prediction],
        current_price: float,
    ):
        """Add decision fusion training sample (legacy method - kept for compatibility)"""
        try:
            # Create training sample
            training_sample = {
                "input_features": self._create_decision_fusion_input(
                    decision.symbol, predictions, current_price, decision.timestamp
                ),
                "target_action": decision.action,
                "target_confidence": decision.confidence,
                "timestamp": decision.timestamp,
                "price": current_price,
            }

            self.decision_fusion_training_data.append(training_sample)
            self.decision_fusion_decisions_count += 1

            # Update inference statistics for decision fusion
            self._update_model_statistics(
                "decision_fusion",
                prediction=Prediction(
                    action=decision.action,
                    confidence=decision.confidence,
                    probabilities={"BUY": 0.33, "SELL": 0.33, "HOLD": 0.34},
                    timeframe="1m",
                    timestamp=decision.timestamp,
                    model_name="decision_fusion"
                )
            )

            # Train decision fusion network periodically
            if (
                self.decision_fusion_decisions_count
                % self.decision_fusion_training_interval
                == 0
                and len(self.decision_fusion_training_data)
                >= self.decision_fusion_min_samples
            ):
                self._train_decision_fusion_network()

        except Exception as e:
            logger.error(f"Error adding decision fusion training sample: {e}")
    def _train_decision_fusion_network(self):
        """Train the decision fusion network on collected data"""
        try:
            if (
                len(self.decision_fusion_training_data)
                < self.decision_fusion_min_samples
            ):
                return

            logger.info(
                f"Training decision fusion network with {len(self.decision_fusion_training_data)} samples"
            )

            # Prepare training data
            inputs = []
            targets = []

            for sample in self.decision_fusion_training_data:
                inputs.append(sample["input_features"])

                # Create target (one-hot encoding)
                action_idx = {"BUY": 0, "SELL": 1, "HOLD": 2}[sample["target_action"]]
                target = torch.zeros(3, device=self.device)
                target[action_idx] = 1.0
                targets.append(target)

            # Stack tensors
            inputs = torch.cat(inputs, dim=0)
            targets = torch.stack(targets, dim=0)

            # Train the network
            optimizer = torch.optim.Adam(
                self.decision_fusion_network.parameters(), lr=0.001
            )
            criterion = nn.CrossEntropyLoss()

            self.decision_fusion_network.train()
            optimizer.zero_grad()

            outputs = self.decision_fusion_network(inputs)
            loss = criterion(outputs, targets)

            loss.backward()
            optimizer.step()

            # Update model statistics for decision fusion
            self._update_model_training_statistics(
                "decision_fusion", 
                loss=loss.item(), 
                training_duration_ms=None
            )

            # Measure and log performance
            self._measure_decision_fusion_performance(loss.item())

            logger.info(f"Decision fusion training completed. Loss: {loss.item():.4f}")

            # Clear training data after training
            self.decision_fusion_training_data = []

        except Exception as e:
            logger.error(f"Error training decision fusion network: {e}")

    async def _train_decision_fusion_on_outcome(
        self,
        record: Dict,
        was_correct: bool,
        price_change_pct: float,
        sophisticated_reward: float,
    ):
        """Train decision fusion model based on outcome (like other models)"""
        try:
            if not self.decision_fusion_enabled or self.decision_fusion_network is None:
                return

            # Get the stored input features
            input_features = record.get("input_features")
            if input_features is None:
                logger.warning("No input features found for decision fusion training")
                return
            
            # Validate input features
            if not isinstance(input_features, torch.Tensor):
                logger.warning(f"Invalid input features type: {type(input_features)}")
                return
            
            if input_features.dim() != 2 or input_features.size(0) != 1:
                logger.warning(f"Invalid input features shape: {input_features.shape}")
                return

            # Create target based on outcome
            predicted_action = record.get("action", "HOLD")
            
            # Determine if the decision was correct based on price movement
            # Use realistic microstructure thresholds (approx 0.1%)
            if predicted_action == "BUY" and price_change_pct > 0.001:
                target_action = "BUY"
            elif predicted_action == "SELL" and price_change_pct < -0.001:
                target_action = "SELL"
            elif predicted_action == "HOLD" and abs(price_change_pct) < 0.001:
                target_action = "HOLD"
            else:
                # Decision was wrong - use opposite action as target
                if predicted_action == "BUY":
                    target_action = "SELL" if price_change_pct < 0 else "HOLD"
                elif predicted_action == "SELL":
                    target_action = "BUY" if price_change_pct > 0 else "HOLD"
                else:  # HOLD
                    target_action = "BUY" if price_change_pct > 0.1 else "SELL"

            # Create target tensor
            action_idx = {"BUY": 0, "SELL": 1, "HOLD": 2}[target_action]
            target = torch.zeros(3, device=self.device)
            target[action_idx] = 1.0

            # Train the network
            self.decision_fusion_network.train()
            optimizer = torch.optim.Adam(
                self.decision_fusion_network.parameters(), lr=0.001
            )
            criterion = nn.CrossEntropyLoss()

            optimizer.zero_grad()
            
            # Forward pass - LayerNorm works with single samples
            output = self.decision_fusion_network(input_features)
            loss = criterion(output, target.unsqueeze(0))

            # Log training details for debugging
            logger.debug(f"Decision fusion training: input_shape={input_features.shape}, "
                        f"output_shape={output.shape}, target_shape={target.unsqueeze(0).shape}, "
                        f"loss={loss.item():.4f}")

            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Set back to eval mode for inference
            self.decision_fusion_network.eval()

            # Update training statistics
            self._update_model_training_statistics(
                "decision_fusion", 
                loss=loss.item()
            )

            # Measure and log performance
            self._measure_decision_fusion_performance(loss.item())

            logger.info(
                f"Decision fusion trained on outcome: {predicted_action} -> {target_action} "
                f"(price_change: {price_change_pct:+.3f}%, reward: {sophisticated_reward:.4f}, loss: {loss.item():.4f})"
            )

        except Exception as e:
            logger.error(f"Error training decision fusion on outcome: {e}")

        except Exception as e:
            logger.warning(f"Decision fusion initialization failed: {e}")
            self.decision_fusion_enabled = False

    def _measure_decision_fusion_performance(self, loss: float):
        """Measure and track decision fusion model performance"""
        try:
            # Initialize decision fusion statistics if not exists
            if "decision_fusion" not in self.model_statistics:
                self.model_statistics["decision_fusion"] = ModelStatistics("decision_fusion")
            
            # Update statistics
            stats = self.model_statistics["decision_fusion"]
            stats.update_training_stats(loss=loss)
            
            # Calculate performance metrics
            if len(stats.losses) > 1:
                recent_losses = list(stats.losses)[-10:]  # Last 10 losses
                avg_loss = sum(recent_losses) / len(recent_losses)
                loss_trend = (recent_losses[-1] - recent_losses[0]) / len(recent_losses)
                
                # Performance score (lower loss = higher score)
                performance_score = max(0.0, 1.0 - avg_loss)
                
                logger.info(f"Decision Fusion Performance: avg_loss={avg_loss:.4f}, trend={loss_trend:.4f}, score={performance_score:.3f}")
                
                # Update model states for dashboard
                if "decision_fusion" not in self.model_states:
                    self.model_states["decision_fusion"] = {}
                
                self.model_states["decision_fusion"].update({
                    "current_loss": loss,
                    "average_loss": avg_loss,
                    "performance_score": performance_score,
                    "training_count": stats.total_trainings,
                    "loss_trend": loss_trend,
                    "last_training_time": stats.last_training_time.isoformat() if stats.last_training_time else None
                })
                
        except Exception as e:
            logger.error(f"Error measuring decision fusion performance: {e}")

    def _initialize_transformer_model(self):
        """Initialize the transformer model for advanced sequence modeling"""
        try:
            from NN.models.advanced_transformer_trading import (
                create_trading_transformer,
                TradingTransformerConfig,
            )

            # Create transformer configuration
            config = TradingTransformerConfig(
                d_model=512,
                n_heads=8,
                n_layers=8,
                seq_len=100,
                n_actions=3,
                use_multi_scale_attention=True,
                use_market_regime_detection=True,
                use_uncertainty_estimation=True,
                use_deep_attention=True,
                use_residual_connections=True,
                use_layer_norm_variants=True,
            )

            # Create transformer model and trainer
            self.primary_transformer, self.primary_transformer_trainer = (
                create_trading_transformer(config)
            )

            # Try to load existing checkpoint
            try:
                from utils.checkpoint_manager import load_best_checkpoint

                result = load_best_checkpoint("transformer", "transformer")
                if result:
                    file_path, metadata = result
                    self.primary_transformer_trainer.load_model(file_path)
                    self.model_states["transformer"] = {
                        "initial_loss": None,
                        "current_loss": metadata.performance_metrics.get("loss", None),
                        "best_loss": metadata.performance_metrics.get("loss", None),
                        "checkpoint_loaded": True,
                        "checkpoint_filename": metadata.checkpoint_id,
                    }
                    logger.info(
                        f"Transformer model loaded from checkpoint: {metadata.checkpoint_id}"
                    )
                else:
                    logger.info(
                        "No existing transformer checkpoint found, starting fresh"
                    )
                    self.model_states["transformer"] = {
                        "initial_loss": None,
                        "current_loss": None,
                        "best_loss": None,
                        "checkpoint_loaded": False,
                        "checkpoint_filename": "none (fresh start)",
                    }
            except Exception as e:
                logger.warning(f"Error loading transformer checkpoint: {e}")
                logger.info("Transformer model starting fresh")
                self.model_states["transformer"] = {
                    "initial_loss": None,
                    "current_loss": None,
                    "best_loss": None,
                    "checkpoint_loaded": False,
                    "checkpoint_filename": "none (fresh start)",
                }

            logger.info("Transformer model initialized")

        except Exception as e:
            logger.warning(f"Transformer model initialization failed: {e}")
            self.primary_transformer = None
            self.primary_transformer_trainer = None

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def _initialize_enhanced_training_system(self):
        """Initialize the enhanced real-time training system"""
        try:
            if not self.training_enabled:
                logger.info("Enhanced training system disabled")
                return

            if not ENHANCED_TRAINING_AVAILABLE:
                logger.info(
                    "EnhancedRealtimeTrainingSystem not available - using built-in training"
                )
                # Keep training enabled - we have built-in training capabilities
                return
<<<<<<< HEAD
            
            # Initialize enhanced training system directly (no external training_integration module needed)
            try:
                from NN.training.enhanced_realtime_training import EnhancedRealtimeTrainingSystem

                self.enhanced_training_system = EnhancedRealtimeTrainingSystem(
                    orchestrator=self,
                    data_provider=self.data_provider,
                    dashboard=None
                )

                logger.info("Enhanced training system initialized successfully")

                # Auto-start training by default
                logger.info("🚀 Auto-starting enhanced real-time training...")
                self.start_enhanced_training()
                
            except ImportError as e:
                logger.error(f"Failed to import EnhancedRealtimeTrainingSystem: {e}")
                self.training_enabled = False
                return
            
            logger.info("Enhanced real-time training system initialized")
            logger.info("  - Real-time model training: ENABLED")
            logger.info("  - Comprehensive feature extraction: ENABLED")
            logger.info("  - Enhanced reward calculation: ENABLED")
            logger.info("  - Forward-looking predictions: ENABLED")
                    
=======

            # Initialize the enhanced training system
            if EnhancedRealtimeTrainingSystem is not None:
                self.enhanced_training_system = EnhancedRealtimeTrainingSystem(
                    orchestrator=self,
                    data_provider=self.data_provider,
                    dashboard=None,  # Will be set by dashboard when available
                )

                logger.info("Enhanced real-time training system initialized")
                logger.info("  - Real-time model training: ENABLED")
                logger.info("  - Comprehensive feature extraction: ENABLED")
                logger.info("  - Enhanced reward calculation: ENABLED")
                logger.info("  - Forward-looking predictions: ENABLED")
            else:
                logger.warning("EnhancedRealtimeTrainingSystem class not available")
                self.training_enabled = False

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        except Exception as e:
            logger.error(f"Error initializing enhanced training system: {e}")
            self.training_enabled = False
            self.enhanced_training_system = None

<<<<<<< HEAD
    # SINGLE-USE FUNCTION - Called only once in codebase
=======
    # Public wrapper to match dashboard expectation
    def initialize_enhanced_training_system(self):
        try:
            return self._initialize_enhanced_training_system()
        except Exception as e:
            logger.error(f"Error in initialize_enhanced_training_system: {e}")
            return None

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def start_enhanced_training(self):
        """Start the enhanced real-time training system"""
        try:
            if not self.training_enabled or not self.enhanced_training_system:
                logger.warning("Enhanced training system not available")
                # Still start enhanced reward system + timeframe coordinator unconditionally
                try:
                    from core.enhanced_reward_system_integration import start_enhanced_rewards_for_orchestrator
                    import asyncio as _asyncio
                    _asyncio.create_task(start_enhanced_rewards_for_orchestrator(self, symbols=[self.symbol] + self.ref_symbols))
                    logger.info("Enhanced reward system started (without enhanced training)")
                except Exception as e:
                    logger.error(f"Error starting enhanced reward system: {e}")
                return False

<<<<<<< HEAD
            # Check if the enhanced training system has a start_training method
            if hasattr(self.enhanced_training_system, 'start_training'):
                self.enhanced_training_system.start_training()
                logger.info("Enhanced real-time training started")
                return True
            else:
                logger.warning("Enhanced training system does not have start_training method")
=======
            if hasattr(self.enhanced_training_system, "start_training"):
                self.enhanced_training_system.start_training()
                logger.info("Enhanced real-time training started")
                
                # Start Enhanced Reward System integration
                try:
                    from core.enhanced_reward_system_integration import start_enhanced_rewards_for_orchestrator
                    # Fire and forget task to start integration
                    import asyncio as _asyncio
                    _asyncio.create_task(start_enhanced_rewards_for_orchestrator(self, symbols=[self.symbol] + self.ref_symbols))
                    logger.info("Enhanced reward system started")
                except Exception as e:
                    logger.error(f"Error starting enhanced reward system: {e}")
                return True
            else:
                logger.warning(
                    "Enhanced training system does not have start_training method"
                )
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                return False

        except Exception as e:
            logger.error(f"Error starting enhanced training: {e}")
            return False

    # UNUSED FUNCTION - Not called anywhere in codebase
    def stop_enhanced_training(self):
        """Stop the enhanced real-time training system"""
        try:
<<<<<<< HEAD
            if self.enhanced_training_system and hasattr(self.enhanced_training_system, 'stop_training'):
=======
            if self.enhanced_training_system and hasattr(
                self.enhanced_training_system, "stop_training"
            ):
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
                self.enhanced_training_system.stop_training()
                logger.info("Enhanced real-time training stopped")
                return True
            return False

        except Exception as e:
            logger.error(f"Error stopping enhanced training: {e}")
            return False

<<<<<<< HEAD
    # UNUSED FUNCTION - Not called anywhere in codebase
=======
    def _initialize_text_export_manager(self):
        """Initialize the text data export manager"""
        try:
            self.text_export_manager = TextExportManager(
                data_provider=self.data_provider,
                orchestrator=self
            )
            
            # Configure with current symbols
            export_config = {
                'main_symbol': self.symbol,
                'ref1_symbol': self.ref_symbols[0] if self.ref_symbols else 'BTC/USDT',
                'ref2_symbol': 'SPX',  # Default to SPX for now
                'ref3_symbol': 'SOL/USDT',
                'export_dir': 'NN/training/samples/txt',
                'export_format': 'PIPE'
            }
            
            self.text_export_manager.export_config.update(export_config)
            logger.info("Text export manager initialized")
            logger.info(f"  - Main symbol: {export_config['main_symbol']}")
            logger.info(f"  - Reference symbols: {export_config['ref1_symbol']}, {export_config['ref2_symbol']}")
            logger.info(f"  - Export directory: {export_config['export_dir']}")
            
        except Exception as e:
            logger.error(f"Error initializing text export manager: {e}")
            self.text_export_manager = None

    def _initialize_llm_proxy(self):
        """Initialize LLM proxy for trading signals"""
        try:
            # Get LLM configuration from config file or use defaults
            llm_config = self.config.get('llm_proxy', {})
            
            llm_proxy_config = LLMConfig(
                base_url=llm_config.get('base_url', 'http://localhost:1234'),
                model=llm_config.get('model', 'openai/gpt-oss-20b'),
                temperature=llm_config.get('temperature', 0.7),
                max_tokens=llm_config.get('max_tokens', -1),
                timeout=llm_config.get('timeout', 30),
                api_key=llm_config.get('api_key')
            )
            
            self.llm_proxy = LLMProxy(
                config=llm_proxy_config,
                data_dir='NN/training/samples/txt'
            )
            
            logger.info("LLM proxy initialized")
            logger.info(f"  - Model: {llm_proxy_config.model}")
            logger.info(f"  - Base URL: {llm_proxy_config.base_url}")
            logger.info(f"  - Temperature: {llm_proxy_config.temperature}")
            
        except Exception as e:
            logger.error(f"Error initializing LLM proxy: {e}")
            self.llm_proxy = None

    def start_text_export(self) -> bool:
        """Start text data export"""
        try:
            if not hasattr(self, 'text_export_manager') or not self.text_export_manager:
                logger.warning("Text export manager not initialized")
                return False
            
            return self.text_export_manager.start_export()
        except Exception as e:
            logger.error(f"Error starting text export: {e}")
            return False

    def stop_text_export(self) -> bool:
        """Stop text data export"""
        try:
            if not hasattr(self, 'text_export_manager') or not self.text_export_manager:
                return True
            
            return self.text_export_manager.stop_export()
        except Exception as e:
            logger.error(f"Error stopping text export: {e}")
            return False

    def get_text_export_status(self) -> Dict[str, Any]:
        """Get text export status"""
        try:
            if not hasattr(self, 'text_export_manager') or not self.text_export_manager:
                return {'enabled': False, 'initialized': False, 'error': 'Not initialized'}
            
            return self.text_export_manager.get_export_status()
        except Exception as e:
            logger.error(f"Error getting text export status: {e}")
            return {'enabled': False, 'initialized': False, 'error': str(e)}

    def start_llm_proxy(self) -> bool:
        """Start LLM proxy for trading signals"""
        try:
            if not hasattr(self, 'llm_proxy') or not self.llm_proxy:
                logger.warning("LLM proxy not initialized")
                return False
            
            self.llm_proxy.start()
            logger.info("LLM proxy started")
            return True
        except Exception as e:
            logger.error(f"Error starting LLM proxy: {e}")
            return False

    def stop_llm_proxy(self) -> bool:
        """Stop LLM proxy"""
        try:
            if not hasattr(self, 'llm_proxy') or not self.llm_proxy:
                return True
            
            self.llm_proxy.stop()
            logger.info("LLM proxy stopped")
            return True
        except Exception as e:
            logger.error(f"Error stopping LLM proxy: {e}")
            return False

    def get_llm_proxy_status(self) -> Dict[str, Any]:
        """Get LLM proxy status"""
        try:
            if not hasattr(self, 'llm_proxy') or not self.llm_proxy:
                return {'enabled': False, 'initialized': False, 'error': 'Not initialized'}
            
            return self.llm_proxy.get_status()
        except Exception as e:
            logger.error(f"Error getting LLM proxy status: {e}")
            return {'enabled': False, 'initialized': False, 'error': str(e)}

    def get_latest_llm_signal(self, symbol: str = 'ETH'):
        """Get latest LLM trading signal"""
        try:
            if not hasattr(self, 'llm_proxy') or not self.llm_proxy:
                return None
            
            return self.llm_proxy.get_latest_signal(symbol)
        except Exception as e:
            logger.error(f"Error getting LLM signal: {e}")
            return None

    def update_llm_config(self, new_config: Dict[str, Any]) -> bool:
        """Update LLM proxy configuration"""
        try:
            if not hasattr(self, 'llm_proxy') or not self.llm_proxy:
                logger.warning("LLM proxy not initialized")
                return False
            
            # Create new config
            llm_proxy_config = LLMConfig(
                base_url=new_config.get('base_url', 'http://localhost:1234'),
                model=new_config.get('model', 'openai/gpt-oss-20b'),
                temperature=new_config.get('temperature', 0.7),
                max_tokens=new_config.get('max_tokens', -1),
                timeout=new_config.get('timeout', 30),
                api_key=new_config.get('api_key')
            )
            
            # Stop current proxy
            was_running = self.llm_proxy.is_running
            if was_running:
                self.llm_proxy.stop()
            
            # Update config
            self.llm_proxy.update_config(llm_proxy_config)
            
            # Restart if it was running
            if was_running:
                self.llm_proxy.start()
            
            logger.info("LLM proxy configuration updated")
            return True
            
        except Exception as e:
            logger.error(f"Error updating LLM config: {e}")
            return False

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def get_enhanced_training_stats(self) -> Dict[str, Any]:
        """Get enhanced training system statistics with orchestrator integration"""
        try:
            if not self.enhanced_training_system:
                return {
                    "training_enabled": False,
                    "system_available": ENHANCED_TRAINING_AVAILABLE,
                    "error": "Training system not initialized",
                }

            # Get base stats from enhanced training system
            stats = {}
            if hasattr(self.enhanced_training_system, "get_training_statistics"):
                stats = self.enhanced_training_system.get_training_statistics()

            stats["training_enabled"] = self.training_enabled
            stats["system_available"] = ENHANCED_TRAINING_AVAILABLE

            # Add orchestrator-specific training integration data
<<<<<<< HEAD
            stats['orchestrator_integration'] = {
                'models_connected': len([m for m in [self.rl_agent, self.cnn_model, self.cob_rl_agent, self.decision_model] if m is not None]),
                'cob_integration_active': self.cob_integration is not None,
                'decision_fusion_enabled': self.decision_fusion_enabled,
                'symbols_tracking': len(self.symbols),
                'recent_decisions_count': sum(len(decisions) for decisions in self.recent_decisions.values()),
                # 'model_weights': {},  # Now handled by ModelManager
                'realtime_processing': self.realtime_processing
=======
            stats["orchestrator_integration"] = {
                "models_connected": len(
                    [
                        m
                        for m in [
                            self.rl_agent,
                            self.cnn_model,
                            self.cob_rl_agent,
                            self.decision_model,
                        ]
                        if m is not None
                    ]
                ),
                "cob_integration_active": self.cob_integration is not None,
                "decision_fusion_enabled": self.decision_fusion_enabled,
                "symbols_tracking": len(self.symbols),
                "recent_decisions_count": sum(
                    len(decisions) for decisions in self.recent_decisions.values()
                ),
                "model_weights": self.model_weights.copy(),
                "realtime_processing": self.realtime_processing,
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
            }

            # Add model-specific training status from orchestrator
            stats["model_training_status"] = {}
            model_mappings = {
                "dqn": self.rl_agent,
                "cnn": self.cnn_model,
                "cob_rl": self.cob_rl_agent,
                "decision": self.decision_model,
            }

            for model_name, model in model_mappings.items():
                if model:
                    model_stats = {
                        "model_loaded": True,
                        "memory_usage": 0,
                        "training_steps": 0,
                        "last_loss": None,
                        "checkpoint_loaded": self.model_states.get(model_name, {}).get(
                            "checkpoint_loaded", False
                        ),
                    }

                    # Get memory usage
                    if hasattr(model, "memory") and model.memory:
                        model_stats["memory_usage"] = len(model.memory)

                    # Get training steps
                    if hasattr(model, "training_steps"):
                        model_stats["training_steps"] = model.training_steps

                    # Get last loss
                    if hasattr(model, "losses") and model.losses:
                        model_stats["last_loss"] = model.losses[-1]

                    stats["model_training_status"][model_name] = model_stats
                else:
                    stats["model_training_status"][model_name] = {
                        "model_loaded": False,
                        "memory_usage": 0,
                        "training_steps": 0,
                        "last_loss": None,
                        "checkpoint_loaded": False,
                    }

            # Add prediction tracking stats
            stats["prediction_tracking"] = {
                "dqn_predictions_tracked": sum(
                    len(preds) for preds in self.recent_dqn_predictions.values()
                ),
                "cnn_predictions_tracked": sum(
                    len(preds) for preds in self.recent_cnn_predictions.values()
                ),
                "accuracy_history_tracked": sum(
                    len(history)
                    for history in self.prediction_accuracy_history.values()
                ),
                "symbols_with_predictions": [
                    symbol
                    for symbol in self.symbols
                    if len(self.recent_dqn_predictions.get(symbol, [])) > 0
                    or len(self.recent_cnn_predictions.get(symbol, [])) > 0
                ],
            }

            # Add COB integration stats if available
            if self.cob_integration:
                stats["cob_integration_stats"] = {
                    "latest_cob_data_symbols": list(self.latest_cob_data.keys()),
                    "cob_features_available": list(self.latest_cob_features.keys()),
                    "cob_state_available": list(self.latest_cob_state.keys()),
                    "feature_history_length": {
                        symbol: len(history)
                        for symbol, history in self.cob_feature_history.items()
                    },
                }

            return stats

        except Exception as e:
            logger.error(f"Error getting training stats: {e}")
            return {
                "training_enabled": self.training_enabled,
                "system_available": ENHANCED_TRAINING_AVAILABLE,
                "error": str(e),
            }

    # UNUSED FUNCTION - Not called anywhere in codebase
    def set_training_dashboard(self, dashboard):
        """Set the dashboard reference for the training system"""
        try:
            if self.enhanced_training_system:
                self.enhanced_training_system.dashboard = dashboard
                logger.info("Dashboard reference set for enhanced training system")

        except Exception as e:
            logger.error(f"Error setting training dashboard: {e}")

    def set_cold_start_training_enabled(self, enabled: bool) -> bool:
        """Enable or disable cold start training (excessive training during cold start)

        Args:
            enabled: Whether to enable cold start training

        Returns:
            bool: True if setting was applied successfully
        """
        try:
            # Store the setting
            self.cold_start_enabled = enabled

            # Adjust training frequency based on cold start mode
            if enabled:
                # High frequency training during cold start
                self.training_frequency = "high"
                logger.info(
                    "ORCHESTRATOR: Cold start training ENABLED - Excessive training on every signal"
                )
            else:
                # Normal training frequency
                self.training_frequency = "normal"
                logger.info(
                    "ORCHESTRATOR: Cold start training DISABLED - Normal training frequency"
                )

            return True

        except Exception as e:
            logger.error(f"Error setting cold start training: {e}")
            return False

    def get_universal_data_stream(self, current_time: Optional[datetime] = None):
        """Get universal data stream for external consumers like dashboard - DELEGATED to data provider"""
        try:
            if self.data_provider and hasattr(self.data_provider, "universal_adapter"):
                return self.data_provider.universal_adapter.get_universal_data_stream(
                    current_time
                )
            elif self.universal_adapter:
                return self.universal_adapter.get_universal_data_stream(current_time)
            return None
        except Exception as e:
            logger.error(f"Error getting universal data stream: {e}")
            return None
<<<<<<< HEAD
    
    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_universal_data_for_model(self, model_type: str = 'cnn') -> Optional[Dict[str, Any]]:
        """Get formatted universal data for specific model types"""
=======

    def get_universal_data_for_model(
        self, model_type: str = "cnn"
    ) -> Optional[Dict[str, Any]]:
        """Get formatted universal data for specific model types - DELEGATED to data provider"""
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        try:
            if self.data_provider and hasattr(self.data_provider, "universal_adapter"):
                stream = (
                    self.data_provider.universal_adapter.get_universal_data_stream()
                )
                if stream:
                    return self.data_provider.universal_adapter.format_for_model(
                        stream, model_type
                    )
            elif self.universal_adapter:
                stream = self.universal_adapter.get_universal_data_stream()
                if stream:
                    return self.universal_adapter.format_for_model(stream, model_type)
            return None
        except Exception as e:
            logger.error(f"Error getting universal data for {model_type}: {e}")
            return None

    def get_cob_data(self, symbol: str) -> Optional[Dict[str, Any]]:
        """Get COB data for symbol - DELEGATED to data provider"""
        try:
            if self.data_provider:
                return self.data_provider.get_latest_cob_data(symbol)
            return None
        except Exception as e:
            logger.error(f"Error getting COB data for {symbol}: {e}")
            return None

    def get_combined_model_data(self, symbol: str) -> Optional[Dict[str, Any]]:
        """Get combined OHLCV + COB data for models - DELEGATED to data provider"""
        try:
            if self.data_provider:
                return self.data_provider.get_combined_ohlcv_cob_data(symbol)
            return None
        except Exception as e:
            logger.error(f"Error getting combined model data for {symbol}: {e}")
            return None

    def _get_current_position_pnl(self, symbol: str, current_price: float = None) -> float:
        """Get current position P&L for the symbol"""
        try:
            if self.trading_executor and hasattr(
                self.trading_executor, "get_current_position"
            ):
                position = self.trading_executor.get_current_position(symbol)
                if position:
                    # If current_price is provided, calculate P&L manually
                    if current_price is not None:
                        entry_price = position.get("price", 0)
                        size = position.get("size", 0)
                        side = position.get("side", "LONG")

                        if entry_price and size > 0:
                            if side.upper() == "LONG":
                                pnl = (current_price - entry_price) * size
                            else:  # SHORT
                                pnl = (entry_price - current_price) * size
                            return pnl
                    else:
                        # Use unrealized_pnl from position if available
                        if position.get("size", 0) > 0:
                            return float(position.get("unrealized_pnl", 0.0))
            return 0.0
        except Exception as e:
            logger.debug(f"Error getting position P&L for {symbol}: {e}")
            return 0.0

    def _has_open_position(self, symbol: str) -> bool:
        """Check if there's an open position for the symbol"""
        try:
            if self.trading_executor and hasattr(
                self.trading_executor, "get_current_position"
            ):
                position = self.trading_executor.get_current_position(symbol)
                return position is not None and position.get("size", 0) > 0
            return False
        except Exception:
            return False
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
    def _calculate_aggressiveness_thresholds(self, current_pnl: float, symbol: str) -> tuple:
=======


    def _calculate_position_enhanced_reward_for_dqn(self, base_reward, action, position_pnl, has_position):
        """
        Calculate position-enhanced reward for DQN to incentivize profitable trades and closing losing ones
        
        Args:
            base_reward: Original reward from confidence/execution
            action: Action taken ('BUY', 'SELL', 'HOLD')
            position_pnl: Current position P&L
            has_position: Whether we have an open position
            
        Returns:
            Enhanced reward that incentivizes profitable behavior
        """
        try:
            enhanced_reward = base_reward
            
            if has_position and position_pnl != 0.0:
                # Position-based reward adjustments (similar to CNN but tuned for DQN)
                pnl_factor = position_pnl / 100.0  # Normalize P&L to reasonable scale
                
                if position_pnl > 0:  # Profitable position
                    if action == "HOLD":
                        # Reward holding profitable positions (let winners run)
                        enhanced_reward += abs(pnl_factor) * 0.4
                    elif action in ["BUY", "SELL"]:
                        # Moderate reward for taking action on profitable positions
                        enhanced_reward += abs(pnl_factor) * 0.2
                        
                elif position_pnl < 0:  # Losing position
                    if action == "HOLD":
                        # Strong penalty for holding losing positions (cut losses)
                        enhanced_reward -= abs(pnl_factor) * 1.0
                    elif action in ["BUY", "SELL"]:
                        # Strong reward for taking action to close losing positions
                        enhanced_reward += abs(pnl_factor) * 0.8
                        
            # Ensure reward doesn't become extreme (DQN is more sensitive to reward scale)
            enhanced_reward = max(-2.0, min(2.0, enhanced_reward))
            
            return enhanced_reward
            
        except Exception as e:
            logger.error(f"Error calculating position-enhanced reward for DQN: {e}")
            return base_reward

    def _close_all_positions(self):
        """Close all open positions when clearing session"""
        try:
            if not self.trading_executor:
                logger.debug("No trading executor available - cannot close positions")
                return

            # Get list of symbols to check for positions
            symbols_to_check = [self.symbol] + self.ref_symbols
            positions_closed = 0

            for symbol in symbols_to_check:
                try:
                    # Check if there's an open position
                    if self._has_open_position(symbol):
                        logger.info(f"Closing open position for {symbol}")

                        # Get current position details
                        if hasattr(self.trading_executor, "get_current_position"):
                            position = self.trading_executor.get_current_position(
                                symbol
                            )
                            if position:
                                side = position.get("side", "LONG")
                                size = position.get("size", 0)

                                # Determine close action (opposite of current position)
                                close_action = (
                                    "SELL" if side.upper() == "LONG" else "BUY"
                                )

                                # Execute close order
                                if hasattr(self.trading_executor, "execute_trade"):
                                    result = self.trading_executor.execute_trade(
                                        symbol=symbol,
                                        action=close_action,
                                        size=size,
                                        reason="Session clear - closing all positions",
                                    )

                                    if result and result.get("success"):
                                        positions_closed += 1
                                        logger.info(
                                            f"Closed {side} position for {symbol}: {size} units"
                                        )
                                    else:
                                        logger.warning(
                                            f"⚠️ Failed to close position for {symbol}: {result}"
                                        )
                                else:
                                    logger.warning(
                                        f"Trading executor has no execute_trade method"
                                    )

                except Exception as e:
                    logger.error(f"Error closing position for {symbol}: {e}")
                    continue

            if positions_closed > 0:
                logger.info(
                    f"Closed {positions_closed} open positions during session clear"
                )
            else:
                logger.debug("No open positions to close")

        except Exception as e:
            logger.error(f"Error closing positions during session clear: {e}")

    def _calculate_aggressiveness_thresholds(
        self, current_pnl: float, symbol: str
    ) -> tuple:
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Calculate confidence thresholds based on aggressiveness settings"""
        # Base thresholds
        base_entry_threshold = self.confidence_threshold
        base_exit_threshold = self.confidence_threshold_close

        # Get aggressiveness settings (could be from config or adaptive)
        entry_agg = getattr(self, "entry_aggressiveness", 0.5)
        exit_agg = getattr(self, "exit_aggressiveness", 0.5)

        # Adjust thresholds based on aggressiveness
        # More aggressive = lower threshold (more trades)
        # Less aggressive = higher threshold (fewer, higher quality trades)
        entry_threshold = base_entry_threshold * (
            1.5 - entry_agg
        )  # 0.5 agg = 1.0x, 1.0 agg = 0.5x
        exit_threshold = base_exit_threshold * (1.5 - exit_agg)

        # Ensure minimum thresholds
        entry_threshold = max(0.05, entry_threshold)
        exit_threshold = max(0.02, exit_threshold)

        return entry_threshold, exit_threshold
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
    def _apply_pnl_feedback(self, action: str, confidence: float, current_pnl: float, 
                           symbol: str, reasoning: dict) -> tuple:
=======

    def _apply_pnl_feedback(
        self,
        action: str,
        confidence: float,
        current_pnl: float,
        symbol: str,
        reasoning: dict,
    ) -> tuple:
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Apply P&L-based feedback to decision making"""
        try:
            # If we have a losing position, be more aggressive about cutting losses
            if current_pnl < -10.0:  # Losing more than $10
                if action == "SELL" and self._has_open_position(symbol):
                    # Boost confidence for exit signals when losing
                    confidence = min(1.0, confidence * 1.2)
                    reasoning["pnl_loss_cut_boost"] = True
                elif action == "BUY":
                    # Reduce confidence for new entries when losing
                    confidence *= 0.8
                    reasoning["pnl_loss_entry_reduction"] = True

            # If we have a winning position, be more conservative about exits
            elif current_pnl > 5.0:  # Winning more than $5
                if action == "SELL" and self._has_open_position(symbol):
                    # Reduce confidence for exit signals when winning (let profits run)
                    confidence *= 0.9
                    reasoning["pnl_profit_hold"] = True
                elif action == "BUY":
                    # Slightly boost confidence for entries when on a winning streak
                    confidence = min(1.0, confidence * 1.05)
                    reasoning["pnl_winning_streak_boost"] = True

            reasoning["current_pnl"] = current_pnl
            return action, confidence

        except Exception as e:
            logger.debug(f"Error applying P&L feedback: {e}")
            return action, confidence
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
=======

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def _calculate_dynamic_entry_aggressiveness(self, symbol: str) -> float:
        """Calculate dynamic entry aggressiveness based on recent performance"""
        try:
            # Start with base aggressiveness
            base_agg = getattr(self, "entry_aggressiveness", 0.5)

            # Get recent decisions for this symbol
            recent_decisions = self.get_recent_decisions(symbol, limit=10)
            if len(recent_decisions) < 3:
                return base_agg

            # Calculate win rate
            winning_decisions = sum(
                1 for d in recent_decisions if d.reasoning.get("was_profitable", False)
            )
            win_rate = winning_decisions / len(recent_decisions)

            # Adjust aggressiveness based on performance
            if win_rate > 0.7:  # High win rate - be more aggressive
                return min(1.0, base_agg + 0.2)
            elif win_rate < 0.3:  # Low win rate - be more conservative
                return max(0.1, base_agg - 0.2)
            else:
                return base_agg

        except Exception as e:
            logger.debug(f"Error calculating dynamic entry aggressiveness: {e}")
            return 0.5
<<<<<<< HEAD
    
    # SINGLE-USE FUNCTION - Called only once in codebase
    def _calculate_dynamic_exit_aggressiveness(self, symbol: str, current_pnl: float) -> float:
=======

    def _calculate_dynamic_exit_aggressiveness(
        self, symbol: str, current_pnl: float
    ) -> float:
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
        """Calculate dynamic exit aggressiveness based on P&L and market conditions"""
        try:
            # Start with base aggressiveness
            base_agg = getattr(self, "exit_aggressiveness", 0.5)

            # Adjust based on current P&L
            if current_pnl < -20.0:  # Large loss - be very aggressive about cutting
                return min(1.0, base_agg + 0.3)
            elif current_pnl < -5.0:  # Small loss - be more aggressive
                return min(1.0, base_agg + 0.1)
            elif current_pnl > 20.0:  # Large profit - be less aggressive (let it run)
                return max(0.1, base_agg - 0.2)
            elif current_pnl > 5.0:  # Small profit - slightly less aggressive
                return max(0.2, base_agg - 0.1)
            else:
                return base_agg

        except Exception as e:
            logger.debug(f"Error calculating dynamic exit aggressiveness: {e}")
            return 0.5
<<<<<<< HEAD
    
    # UNUSED FUNCTION - Not called anywhere in codebase
=======

>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b
    def set_trading_executor(self, trading_executor):
        """Set the trading executor for position tracking"""
        self.trading_executor = trading_executor
        logger.info("Trading executor set for position tracking and P&L feedback")

<<<<<<< HEAD
    # SINGLE-USE FUNCTION - Called only once in codebase
    def _get_current_price(self, symbol: str) -> float:
        """Get current price for symbol"""
        try:
            # Try to get from data provider
            if self.data_provider:
                try:
                    # Try different methods to get current price
                    if hasattr(self.data_provider, 'get_latest_data'):
                        latest_data = self.data_provider.get_latest_data(symbol)
                        if latest_data and 'price' in latest_data:
                            return float(latest_data['price'])
                        elif latest_data and 'close' in latest_data:
                            return float(latest_data['close'])
                    elif hasattr(self.data_provider, 'get_current_price'):
                        return float(self.data_provider.get_current_price(symbol))
                    elif hasattr(self.data_provider, 'get_latest_candle'):
                        latest_candle = self.data_provider.get_latest_candle(symbol, '1m')
                        if latest_candle and 'close' in latest_candle:
                            return float(latest_candle['close'])
                except Exception as e:
                    logger.debug(f"Could not get price from data provider: {e}")
            # Try to get from universal adapter
            if self.universal_adapter:
                try:
                    data_stream = self.universal_adapter.get_latest_data(symbol)
                    if data_stream and hasattr(data_stream, 'current_price'):
                        return float(data_stream.current_price)
                except Exception as e:
                    logger.debug(f"Could not get price from universal adapter: {e}")
            # TODO(Guideline: no synthetic fallback) Provide a real-time or cached market price here instead of hardcoding.
            raise RuntimeError("Current price unavailable; per guidelines do not substitute synthetic values.")
        except Exception as e:
            logger.error(f"Error getting current price for {symbol}: {e}")
            # Return default price based on symbol
            raise RuntimeError("Current price unavailable; per guidelines do not substitute synthetic values.")

    # SINGLE-USE FUNCTION - Called only once in codebase
    def _generate_fallback_prediction(self, symbol: str) -> Dict[str, Any]:
        """Fallback predictions were removed to avoid synthetic signals."""
        # TODO(Guideline: no synthetic data / no stubs) Provide a real degraded-mode signal pipeline or remove this hook entirely.
        raise RuntimeError("Fallback predictions disabled per guidelines; supply real model output instead.")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def capture_dqn_prediction(self, symbol: str, action_idx: int, confidence: float, price: float, q_values: List[float] = None):
        """Capture DQN prediction for dashboard visualization"""
        try:
            if symbol not in self.recent_dqn_predictions:
                self.recent_dqn_predictions[symbol] = deque(maxlen=100)
            prediction_data = {
                'timestamp': datetime.now(),
                'action': ['SELL', 'HOLD', 'BUY'][action_idx],
                'confidence': confidence,
                'price': price,
                'q_values': q_values or [0.33, 0.33, 0.34]
            }
            self.recent_dqn_predictions[symbol].append(prediction_data)
        except Exception as e:
            logger.debug(f"Error capturing DQN prediction: {e}")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def capture_cnn_prediction(self, symbol: str, direction: int, confidence: float, current_price: float, predicted_price: float):
        """Capture CNN prediction for dashboard visualization"""
        try:
            if symbol not in self.recent_cnn_predictions:
                self.recent_cnn_predictions[symbol] = deque(maxlen=50)
            prediction_data = {
                'timestamp': datetime.now(),
                'direction': ['DOWN', 'SAME', 'UP'][direction],
                'confidence': confidence,
                'current_price': current_price,
                'predicted_price': predicted_price
            }
            self.recent_cnn_predictions[symbol].append(prediction_data)
        except Exception as e:
            logger.debug(f"Error capturing CNN prediction: {e}")

    async def _get_cob_rl_prediction(self, model: COBRLModelInterface, symbol: str) -> Optional[Prediction]:
        """Get prediction from COB RL model"""
        try:
            cob_feature_matrix = self.get_cob_feature_matrix(symbol, sequence_length=1)
            if cob_feature_matrix is None:
                return None

            # The model expects a 1D array of features
            cob_features = cob_feature_matrix.flatten()

            prediction_result = model.predict(cob_features)

            if prediction_result:
                direction_map = {0: 'SELL', 1: 'HOLD', 2: 'BUY'}
                action = direction_map.get(prediction_result['predicted_direction'], 'HOLD')
                
                prediction = Prediction(
                    action=action,
                    confidence=float(prediction_result['confidence']),
                    probabilities={direction_map.get(i, 'HOLD'): float(prob) for i, prob in enumerate(prediction_result['probabilities'])},
                    timeframe='cob',
                    timestamp=datetime.now(),
                    model_name=model.name,
                    metadata={'value': prediction_result['value']}
                )
                return prediction
            return None
        except Exception as e:
            logger.error(f"Error getting COB RL prediction: {e}")
            return None

    def _initialize_data_stream_monitor(self) -> None:
        """Initialize the data stream monitor and start streaming immediately.
        Managed by orchestrator to avoid external process control.
        """
        try:
            from data_stream_monitor import get_data_stream_monitor
            self.data_stream_monitor = get_data_stream_monitor(
                orchestrator=self,
                data_provider=self.data_provider,
                training_system=getattr(self, 'training_manager', None)
            )
            if not getattr(self.data_stream_monitor, 'is_streaming', False):
                self.data_stream_monitor.start_streaming()
            logger.info("Data stream monitor initialized and started by orchestrator")
        except Exception as e:
            logger.warning(f"Data stream monitor initialization failed: {e}")
            self.data_stream_monitor = None

    # UNUSED FUNCTION - Not called anywhere in codebase
    def start_data_stream(self) -> bool:
        """Start data streaming if not already active."""
        try:
            if not getattr(self, 'data_stream_monitor', None):
                self._initialize_data_stream_monitor()
            if self.data_stream_monitor and not self.data_stream_monitor.is_streaming:
                self.data_stream_monitor.start_streaming()
            return True
        except Exception as e:
            logger.error(f"Failed to start data stream: {e}")
            return False

    # UNUSED FUNCTION - Not called anywhere in codebase
    def stop_data_stream(self) -> bool:
        """Stop data streaming if active."""
        try:
            if getattr(self, 'data_stream_monitor', None) and self.data_stream_monitor.is_streaming:
                self.data_stream_monitor.stop_streaming()
            return True
        except Exception as e:
            logger.error(f"Failed to stop data stream: {e}")
            return False

    # SINGLE-USE FUNCTION - Called only once in codebase
    def get_data_stream_status(self) -> Dict[str, any]:
        """Return current data stream status and buffer sizes."""
        status = {
            'connected': False,
            'streaming': False,
            'buffers': {}
        }
        monitor = getattr(self, 'data_stream_monitor', None)
        if not monitor:
            return status
        try:
            status['connected'] = monitor.orchestrator is not None and monitor.data_provider is not None
            status['streaming'] = bool(monitor.is_streaming)
            status['buffers'] = {name: len(buf) for name, buf in monitor.data_streams.items()}
        except Exception:
            pass
        return status

    # UNUSED FUNCTION - Not called anywhere in codebase
    def save_data_snapshot(self, filepath: str = None) -> str:
        """Save a snapshot of current data stream buffers to a file.
        
        Args:
            filepath: Optional path for the snapshot file. If None, generates timestamped name.
            
        Returns:
            Path to the saved snapshot file.
        """
        if not getattr(self, 'data_stream_monitor', None):
            raise RuntimeError("Data stream monitor not initialized")
        
        if not filepath:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filepath = f"data_snapshots/snapshot_{timestamp}.json"
        
        # Ensure directory exists
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        
        try:
            snapshot_data = self.data_stream_monitor.save_snapshot(filepath)
            logger.info(f"Data snapshot saved to: {filepath}")
            return filepath
        except Exception as e:
            logger.error(f"Failed to save data snapshot: {e}")
            raise

    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_stream_summary(self) -> Dict[str, any]:
        """Get a summary of current data stream activity."""
        status = self.get_data_stream_status()
        summary = {
            'status': status,
            'total_samples': sum(status.get('buffers', {}).values()),
            'active_streams': [name for name, count in status.get('buffers', {}).items() if count > 0],
            'last_update': datetime.now().isoformat()
        }
        
        # Add some sample data if available
        if getattr(self, 'data_stream_monitor', None):
            try:
                sample_data = {}
                for stream_name, buffer in self.data_stream_monitor.data_streams.items():
                    if len(buffer) > 0:
                        sample_data[stream_name] = buffer[-1]  # Latest sample
                summary['sample_data'] = sample_data
            except Exception:
                pass
        
        return summary

    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_cob_data(self, symbol: str, limit: int = 300) -> List:
        """Get COB data for a symbol with specified limit."""
        try:
            if hasattr(self, 'cob_integration') and self.cob_integration:
                return self.cob_integration.get_cob_history(symbol, limit)
            return []
        except Exception as e:
            logger.error(f"Error getting COB data: {e}")
            return []

    # SINGLE-USE FUNCTION - Called only once in codebase
    def _load_historical_data_for_models(self):
        """Load 300 historical candles for all required timeframes and symbols for model training"""
        logger.info("Loading 300 historical candles for model training and RL context...")
        
        try:
            # Required data for models:
            # ETH/USDT: 1m, 1h, 1d (300 candles each)
            # BTC/USDT: 1m (300 candles)
            
            symbols_timeframes = [
                ('ETH/USDT', '1m'),
                ('ETH/USDT', '1h'), 
                ('ETH/USDT', '1d'),
                ('BTC/USDT', '1m')
            ]
            
            loaded_data = {}
            total_candles = 0
            
            for symbol, timeframe in symbols_timeframes:
                try:
                    logger.info(f"Loading {symbol} {timeframe} historical data...")
                    df = self.data_provider.get_historical_data(symbol, timeframe, limit=300)
                    
                    if df is not None and not df.empty:
                        loaded_data[f"{symbol}_{timeframe}"] = df
                        total_candles += len(df)
                        logger.info(f"Loaded {len(df)} {timeframe} candles for {symbol}")
                        
                        # Store in data provider's historical cache for quick access
                        cache_key = f"{symbol}_{timeframe}_300"
                        if not hasattr(self.data_provider, 'model_data_cache'):
                            self.data_provider.model_data_cache = {}
                        self.data_provider.model_data_cache[cache_key] = df
                        
                    else:
                        logger.warning(f"❌ No {timeframe} data available for {symbol}")
                        
                except Exception as e:
                    logger.error(f"Error loading {symbol} {timeframe} data: {e}")
            
            # Initialize model context data
            if hasattr(self, 'extrema_trainer') and self.extrema_trainer:
                logger.info("Initializing ExtremaTrainer with historical context...")
                self.extrema_trainer.initialize_context_data()
            
            # CRITICAL: Initialize ALL models with historical data (using data provider's normalized methods)
            self._initialize_models_with_historical_data(symbols_timeframes)
            
            logger.info(f"🎯 Historical data loading complete: {total_candles} total candles loaded")
            logger.info(f"📊 Available datasets: {list(loaded_data.keys())}")
            
        except Exception as e:
            logger.error(f"Error in historical data loading: {e}")

    # SINGLE-USE FUNCTION - Called only once in codebase
    def _initialize_models_with_historical_data(self, symbols_timeframes: List[Tuple[str, str]]):
        """Initialize all NN models with historical data using data provider's normalized methods"""
        try:
            logger.info("Initializing models with normalized historical data from data provider...")
            
            # Use data provider's multi-symbol feature preparation
            symbol_features = self.data_provider.get_multi_symbol_features_for_inference(symbols_timeframes, limit=300)
            
            # Initialize CNN with multi-symbol data
            if hasattr(self, 'cnn_model') and self.cnn_model:
                logger.info("Initializing CNN with multi-symbol historical features...")
                self._initialize_cnn_with_provider_data()
            
            # Initialize DQN with multi-symbol states
            if hasattr(self, 'rl_agent') and self.rl_agent:
                logger.info("Initializing DQN with multi-symbol state vectors...")
                self._initialize_dqn_with_provider_data(symbols_timeframes)
            
            # Initialize Transformer with sequence data
            if hasattr(self, 'transformer_model') and self.transformer_model:
                logger.info("Initializing Transformer with multi-symbol sequences...")
                self._initialize_transformer_with_provider_data(symbols_timeframes)
            
            # Initialize Decision Fusion with comprehensive features
            if hasattr(self, 'decision_fusion') and self.decision_fusion:
                logger.info("Initializing Decision Fusion with multi-symbol features...")
                self._initialize_decision_with_provider_data(symbol_features)
                
            logger.info("All models initialized with data provider's normalized historical data")
            
        except Exception as e:
            logger.error(f"Error initializing models with historical data: {e}")

    # SINGLE-USE FUNCTION - Called only once in codebase
    def _initialize_cnn_with_provider_data(self):
        """Initialize CNN using data provider's normalized feature extraction"""
        try:
            # Create combined feature matrix: [ETH_1m, ETH_1h, ETH_1d, BTC_1m]
            combined_features = []
            
            # ETH features (1m, 1h, 1d)
            for timeframe in ['1m', '1h', '1d']:
                features = self.data_provider.get_cnn_features_for_inference('ETH/USDT', timeframe, window_size=60)
                if features is not None:
                    combined_features.append(features)
            
            # BTC features (1m)
            btc_features = self.data_provider.get_cnn_features_for_inference('BTC/USDT', '1m', window_size=60)
            if btc_features is not None:
                combined_features.append(btc_features)
            
            if combined_features:
                # Concatenate all features
                full_features = np.concatenate(combined_features)
                logger.info(f"CNN initialized with {len(full_features)} multi-symbol normalized features")
                
                # Store for model access
                if not hasattr(self, 'model_historical_features'):
                    self.model_historical_features = {}
                self.model_historical_features['cnn'] = full_features
                
        except Exception as e:
            logger.error(f"Error initializing CNN with provider data: {e}")

    # SINGLE-USE FUNCTION - Called only once in codebase
    def _initialize_dqn_with_provider_data(self, symbols_timeframes: List[Tuple[str, str]]):
        """Initialize DQN using data provider's normalized state vector creation"""
        try:
            # Use data provider's DQN state creation
            state_vector = self.data_provider.get_dqn_state_for_inference(symbols_timeframes, target_size=100)
            
            if state_vector is not None:
                logger.info(f"DQN initialized with {len(state_vector)} dimensional normalized multi-symbol state")
                
                # Store for model access
                if not hasattr(self, 'model_historical_features'):
                    self.model_historical_features = {}
                self.model_historical_features['dqn'] = state_vector
                
        except Exception as e:
            logger.error(f"Error initializing DQN with provider data: {e}")

    # SINGLE-USE FUNCTION - Called only once in codebase
    def _initialize_transformer_with_provider_data(self, symbols_timeframes: List[Tuple[str, str]]):
        """Initialize Transformer using data provider's normalized sequence creation"""
        try:
            # Use data provider's transformer sequence creation
            sequences = self.data_provider.get_transformer_sequences_for_inference(symbols_timeframes, seq_length=150)
            
            if sequences:
                logger.info(f"Transformer initialized with {len(sequences)} normalized multi-symbol sequences")
                
                # Store for model access
                if not hasattr(self, 'model_historical_features'):
                    self.model_historical_features = {}
                self.model_historical_features['transformer'] = sequences
                
        except Exception as e:
            logger.error(f"Error initializing Transformer with provider data: {e}")

    # SINGLE-USE FUNCTION - Called only once in codebase
    def _initialize_decision_with_provider_data(self, symbol_features: Dict[str, Dict[str, pd.DataFrame]]):
        """Initialize Decision Fusion using data provider's feature aggregation"""
        try:
            # Aggregate all available features for decision fusion
            all_features = {}
            
            for symbol in symbol_features:
                for timeframe in symbol_features[symbol]:
                    data = symbol_features[symbol][timeframe]
                    if data is not None and not data.empty:
                        key = f"{symbol}_{timeframe}"
                        all_features[key] = {
                            'latest_price': data['close'].iloc[-1],
                            'volume': data['volume'].iloc[-1],
                            'price_change': data['close'].pct_change().iloc[-1] if len(data) > 1 else 0,
                            'volatility': data['close'].std() if len(data) > 1 else 0
                        }
            
            if all_features:
                logger.info(f"Decision Fusion initialized with {len(all_features)} normalized symbol-timeframe combinations")
                
                # Store for model access
                if not hasattr(self, 'model_historical_features'):
                    self.model_historical_features = {}
                self.model_historical_features['decision'] = all_features
                
        except Exception as e:
            logger.error(f"Error initializing Decision Fusion with provider data: {e}")

    # UNUSED FUNCTION - Not called anywhere in codebase
    def get_ohlcv_data(self, symbol: str, timeframe: str, limit: int = 300) -> List:
        """Get OHLCV data for a symbol with specified timeframe and limit."""
        try:
            ohlcv_df = self.data_provider.get_ohlcv(symbol, timeframe, limit=limit)
            if ohlcv_df is None or ohlcv_df.empty:
                return []
            
            # Convert to list of dictionaries
            result = []
            for _, row in ohlcv_df.iterrows():
                data_point = {
                    'timestamp': row.name.isoformat() if hasattr(row.name, 'isoformat') else str(row.name),
                    'open': float(row['open']),
                    'high': float(row['high']),
                    'low': float(row['low']),
                    'close': float(row['close']),
                    'volume': float(row['volume'])
                }
                result.append(data_point)
            
            return result
        except Exception as e:
            logger.error(f"Error getting OHLCV data: {e}")
            return []

    def chain_inference(self, symbol: str, n_steps: int = 10) -> List[Dict]:
        """
        Chain n inference steps using real models instead of mock predictions.
        Each step uses the previous prediction as input for the next prediction.
        
        Args:
            symbol: Trading symbol (e.g., 'ETH/USDT')
            n_steps: Number of chained predictions to generate
            
        Returns:
            List of prediction dictionaries with timestamps
        """
        try:
            logger.info(f"🔗 Starting chained inference for {symbol} with {n_steps} steps")
            
            predictions = []
            current_data = None
            
            for step in range(n_steps):
                try:
                    # Get current market data for the first step
                    if step == 0:
                        current_data = self._get_current_market_data(symbol)
                        if not current_data:
                            logger.warning(f"No market data available for {symbol}")
                            break
                    
                    # Run inference with available models
                    step_predictions = []
                    
                    # CNN Model inference
                    if hasattr(self, 'cnn_model') and self.cnn_model:
                        try:
                            cnn_pred = self.cnn_model.predict(current_data)
                            if cnn_pred:
                                step_predictions.append({
                                    'model': 'CNN',
                                    'prediction': cnn_pred,
                                    'confidence': cnn_pred.get('confidence', 0.5)
                                })
                        except Exception as e:
                            logger.debug(f"CNN inference error: {e}")
                    
                    # DQN Model inference
                    if hasattr(self, 'dqn_model') and self.dqn_model:
                        try:
                            dqn_pred = self.dqn_model.predict(current_data)
                            if dqn_pred:
                                step_predictions.append({
                                    'model': 'DQN',
                                    'prediction': dqn_pred,
                                    'confidence': dqn_pred.get('confidence', 0.5)
                                })
                        except Exception as e:
                            logger.debug(f"DQN inference error: {e}")
                    
                    # COB RL Model inference
                    if hasattr(self, 'cob_rl_agent') and self.cob_rl_agent:
                        try:
                            cob_pred = self.cob_rl_agent.predict(current_data)
                            if cob_pred:
                                step_predictions.append({
                                    'model': 'COB_RL',
                                    'prediction': cob_pred,
                                    'confidence': cob_pred.get('confidence', 0.5)
                                })
                        except Exception as e:
                            logger.debug(f"COB RL inference error: {e}")
                    
                    if not step_predictions:
                        logger.warning(f"No model predictions available for step {step}")
                        break
                    
                    # Combine predictions (simple average for now)
                    combined_prediction = self._combine_predictions(step_predictions)
                    
                    # Add timestamp for future prediction
                    prediction_time = datetime.now() + timedelta(minutes=step + 1)
                    combined_prediction['timestamp'] = prediction_time
                    combined_prediction['step'] = step
                    
                    predictions.append(combined_prediction)
                    
                    # Update current_data for next iteration using the prediction
                    current_data = self._update_data_with_prediction(current_data, combined_prediction)
                    
                    logger.debug(f"Step {step}: Generated prediction for {prediction_time}")
                    
                except Exception as e:
                    logger.error(f"Error in chained inference step {step}: {e}")
                    break
            
            logger.info(f"Chained inference completed: {len(predictions)} predictions generated")
            return predictions
            
        except Exception as e:
            logger.error(f"Error in chained inference: {e}")
            return []
    
    def _get_current_market_data(self, symbol: str) -> Optional[Dict]:
        """Get current market data for inference"""
        try:
            # This would get real market data - placeholder for now
            return {
                'symbol': symbol,
                'timestamp': datetime.now(),
                'price': 4300.0,  # Placeholder
                'volume': 1000.0,
                'features': [4300.0, 4305.0, 4295.0, 4302.0, 1000.0]  # OHLCV placeholder
            }
        except Exception as e:
            logger.error(f"Error getting market data: {e}")
            return None
    
    def _combine_predictions(self, predictions: List[Dict]) -> Dict:
        """Combine multiple model predictions into a single prediction"""
        try:
            if not predictions:
                return {}
            
            # Simple averaging for now
            avg_confidence = sum(p['confidence'] for p in predictions) / len(predictions)
            
            # Use the prediction with highest confidence
            best_pred = max(predictions, key=lambda x: x['confidence'])
            
            return {
                'prediction': best_pred['prediction'],
                'confidence': avg_confidence,
                'models_used': len(predictions),
                'model': best_pred['model']
            }
            
        except Exception as e:
            logger.error(f"Error combining predictions: {e}")
            return {}
    
    def _update_data_with_prediction(self, current_data: Dict, prediction: Dict) -> Dict:
        """Update current data with the prediction for next iteration"""
        try:
            # Simple update - use predicted price as new current price
            updated_data = current_data.copy()
            pred_data = prediction.get('prediction', {})
            
            if 'price' in pred_data:
                updated_data['price'] = pred_data['price']
            
            # Update timestamp
            updated_data['timestamp'] = prediction.get('timestamp', datetime.now())
            
            return updated_data
            
        except Exception as e:
            logger.error(f"Error updating data with prediction: {e}")
            return current_data
=======
    def get_profitability_reward_multiplier(self) -> float:
        """Get the current profitability reward multiplier from trading executor

        Returns:
            float: Current profitability reward multiplier (0.0 to 2.0)
        """
        try:
            if self.trading_executor and hasattr(
                self.trading_executor, "get_profitability_reward_multiplier"
            ):
                multiplier = self.trading_executor.get_profitability_reward_multiplier()
                logger.debug(
                    f"Current profitability reward multiplier: {multiplier:.2f}"
                )
                return multiplier
            return 0.0
        except Exception as e:
            logger.error(f"Error getting profitability reward multiplier: {e}")
            return 0.0

    def calculate_enhanced_reward(
        self, base_pnl: float, confidence: float = 1.0
    ) -> float:
        """Calculate enhanced reward with profitability multiplier

        Args:
            base_pnl: Base P&L from the trade
            confidence: Confidence level of the prediction (0.0 to 1.0)

        Returns:
            float: Enhanced reward with profitability multiplier applied
        """
        try:
            # Get the dynamic profitability multiplier
            profitability_multiplier = self.get_profitability_reward_multiplier()

            # Base reward is the P&L
            base_reward = base_pnl

            # Apply profitability multiplier only to positive P&L (profitable trades)
            if base_pnl > 0 and profitability_multiplier > 0:
                # Enhance profitable trades with the multiplier
                enhanced_reward = base_pnl * (1.0 + profitability_multiplier)
                logger.debug(
                    f"Enhanced reward: ${base_pnl:.2f} → ${enhanced_reward:.2f} (multiplier: {profitability_multiplier:.2f})"
                )
                return enhanced_reward
            else:
                # No enhancement for losing trades or when multiplier is 0
                return base_reward

        except Exception as e:
            logger.error(f"Error calculating enhanced reward: {e}")
            return base_pnl

    def _trigger_training_on_decision(
        self, decision: TradingDecision, current_price: float
    ):
        """Trigger training on each decision, especially executed trades

        This ensures models learn from every signal outcome, giving more weight
        to executed trades as they have real market feedback.
        """
        try:
            # Only train if training is enabled and we have the enhanced training system
            if not self.training_enabled or not self.enhanced_training_system:
                return

            symbol = decision.symbol
            action = decision.action
            confidence = decision.confidence

            # Create training data from the decision
            training_data = {
                "symbol": symbol,
                "action": action,
                "confidence": confidence,
                "price": current_price,
                "timestamp": decision.timestamp,
                "executed": action != "HOLD",  # Assume non-HOLD actions are executed
                "entry_aggressiveness": decision.entry_aggressiveness,
                "exit_aggressiveness": decision.exit_aggressiveness,
                "reasoning": decision.reasoning,
            }

            # Add to enhanced training system for immediate learning
            if hasattr(self.enhanced_training_system, "add_decision_for_training"):
                self.enhanced_training_system.add_decision_for_training(training_data)
                logger.debug(
                    f"🎓 Added decision to training queue: {action} {symbol} (conf: {confidence:.3f})"
                )

            # Trigger immediate training for executed trades (higher priority)
            if action != "HOLD":
                if hasattr(self.enhanced_training_system, "trigger_immediate_training"):
                    self.enhanced_training_system.trigger_immediate_training(
                        symbol=symbol, priority="high" if confidence > 0.7 else "medium"
                    )
                    logger.info(
                        f"🚀 Triggered immediate training for executed trade: {action} {symbol}"
                    )

            # Train all models on the decision outcome
            self._train_models_on_decision(decision, current_price)

        except Exception as e:
            logger.error(f"Error triggering training on decision: {e}")

    def _train_models_on_decision(
        self, decision: TradingDecision, current_price: float
    ):
        """Train all models on the decision outcome

        This provides immediate feedback to models about their predictions,
        allowing them to learn from each signal they generate.
        """
        try:
            symbol = decision.symbol
            action = decision.action
            confidence = decision.confidence

            # Get current market data for training context - use same data source as CNN model
            base_data = self.build_base_data_input(symbol)
            if not base_data:
                logger.warning(f"No base data available for training {symbol}, skipping model training")
                return

            # Track if any model was trained for checkpoint saving
            models_trained = []

            # Train DQN agent if available and enabled
            if self.rl_agent and hasattr(self.rl_agent, "remember") and self.is_model_training_enabled("dqn"):
                try:
                    # Validate base_data before creating state
                    if not base_data or not hasattr(base_data, 'get_feature_vector'):
                        logger.debug(f"⚠️ Skipping DQN training for {symbol}: no valid base_data")
                    else:
                        # Check if base_data has actual features
                        features = base_data.get_feature_vector()
                        if not features or len(features) == 0 or all(f == 0 for f in features):
                            logger.debug(f"⚠️ Skipping DQN training for {symbol}: no valid features in base_data")
                        else:
                            # Create state representation from base_data (same as CNN model)
                            state = self._create_state_from_base_data(symbol, base_data)
                            
                            # Skip training if no valid state could be created
                            if state is None:
                                logger.debug(f"⚠️ Skipping DQN training for {symbol}: could not create valid state")
                            else:
                                # Map action to DQN action space - CONSISTENT ACTION MAPPING
                                action_mapping = {"BUY": 0, "SELL": 1, "HOLD": 2}
                                dqn_action = action_mapping.get(action, 2)

                                # Get position information for enhanced rewards
                                has_position = self._has_open_position(symbol)
                                position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0

                                # Calculate position-enhanced reward
                                base_reward = confidence if action != "HOLD" else 0.1
                                enhanced_reward = self._calculate_position_enhanced_reward_for_dqn(
                                    base_reward, action, position_pnl, has_position
                                )

                                # Add experience to DQN
                                self.rl_agent.remember(
                                    state=state,
                                    action=dqn_action,
                                    reward=enhanced_reward,
                                    next_state=state,  # Will be updated with actual outcome later
                                    done=False,
                                )

                                models_trained.append("dqn")
                                logger.debug(
                                    f"🧠 Added DQN experience: {action} {symbol} (reward: {enhanced_reward:.3f}, P&L: ${position_pnl:.2f})"
                                )

                except Exception as e:
                    logger.debug(f"Error training DQN on decision: {e}")

            # Train CNN model if available and enabled
            if self.cnn_model and hasattr(self.cnn_model, "add_training_data") and self.is_model_training_enabled("cnn"):
                try:
                    # Create CNN input features from base_data (same as inference)
                    cnn_features = self._create_cnn_features_from_base_data(
                        symbol, base_data
                    )

                    # Create target based on action
                    target_mapping = {
                        "BUY": 0,  # Action indices for CNN
                        "SELL": 1,
                        "HOLD": 2,
                    }
                    target_action = target_mapping.get(action, 2)

                    # Get position information for enhanced rewards
                    has_position = self._has_open_position(symbol)
                    position_pnl = self._get_current_position_pnl(symbol) if has_position else 0.0

                    # Calculate base reward from confidence and add position-based enhancement
                    base_reward = confidence if action != "HOLD" else 0.1

                    # Add training data with position-based reward enhancement
                    self.cnn_model.add_training_data(
                        cnn_features, 
                        target_action, 
                        base_reward,
                        position_pnl=position_pnl,
                        has_position=has_position
                    )

                    models_trained.append("cnn")
                    logger.debug(f"🔍 Added CNN training sample: {action} {symbol} (P&L: ${position_pnl:.2f})")

                except Exception as e:
                    logger.debug(f"Error training CNN on decision: {e}")

            # Train COB RL model if available, enabled, and we have COB data
            if self.cob_rl_agent and symbol in self.latest_cob_data and self.is_model_training_enabled("cob_rl"):
                try:
                    cob_data = self.latest_cob_data[symbol]
                    if hasattr(self.cob_rl_agent, "remember"):
                        # Create COB state representation
                        cob_state = self._create_cob_state_for_training(
                            symbol, cob_data
                        )

                        # Add COB experience
                        self.cob_rl_agent.remember(
                            state=cob_state,
                            action=action,
                            reward=confidence,
                            next_state=cob_state,  # Add required next_state parameter
                            done=False,  # Add required done parameter
                        )

                        models_trained.append("cob_rl")
                        logger.debug(f"📊 Added COB RL experience: {action} {symbol}")

                except Exception as e:
                    logger.debug(f"Error training COB RL on decision: {e}")

            # Train decision fusion model if available and enabled
            if self.decision_fusion_network and self.is_model_training_enabled("decision_fusion"):
                try:
                    # Create decision fusion input
                    # Build market_data on demand (avoid undefined reference)
                    market_snapshot = self._get_current_market_data(symbol)
                    fusion_input = self._create_decision_fusion_training_input(
                        symbol, market_snapshot if market_snapshot else {}
                    )

                    # Create target based on action
                    target_mapping = {
                        "BUY": [1, 0, 0],
                        "SELL": [0, 1, 0],
                        "HOLD": [0, 0, 1],
                    }
                    target = target_mapping.get(action, [0, 0, 1])

                    # Decision fusion network doesn't have add_training_sample method
                    # Instead, we'll store the training data for later batch training
                    if not hasattr(self, 'decision_fusion_training_data'):
                        self.decision_fusion_training_data = []
                    
                    # Convert target list to action string for compatibility
                    target_action = "BUY" if target[0] == 1 else "SELL" if target[1] == 1 else "HOLD"
                    
                    self.decision_fusion_training_data.append({
                        'input_features': fusion_input,
                        'target_action': target_action,
                        'weight': confidence,
                        'timestamp': datetime.now()
                    })
                    
                    # Train the network if we have enough samples
                    if len(self.decision_fusion_training_data) >= 5:  # Train every 5 samples
                        self._train_decision_fusion_network()
                        self.decision_fusion_training_data = []  # Clear after training

                    models_trained.append("decision_fusion")
                    logger.debug(f"🤝 Added decision fusion training sample: {action} {symbol}")

                except Exception as e:
                    logger.debug(f"Error training decision fusion on decision: {e}")

            # CRITICAL FIX: Save checkpoints after training
            if models_trained:
                self._save_training_checkpoints(models_trained, confidence)

        except Exception as e:
            logger.error(f"Error training models on decision: {e}")

    def _save_training_checkpoints(
        self, models_trained: List[str], performance_score: float
    ):
        """Save checkpoints for trained models if performance improved

        This is CRITICAL for preserving training progress across restarts.
        """
        try:
            if not self.checkpoint_manager:
                return

            # Increment training counter
            self.training_iterations += 1

            # Save checkpoints for each trained model
            for model_name in models_trained:
                try:
                    model_obj = None
                    current_loss = None

                    # Get model object and calculate current performance
                    if model_name == "dqn" and self.rl_agent:
                        model_obj = self.rl_agent
                        # Use negative performance score as loss (higher confidence = lower loss)
                        current_loss = 1.0 - performance_score

                    elif model_name == "cnn" and self.cnn_model:
                        model_obj = self.cnn_model
                        current_loss = 1.0 - performance_score

                    elif model_name == "cob_rl" and self.cob_rl_agent:
                        model_obj = self.cob_rl_agent
                        current_loss = 1.0 - performance_score

                    elif model_name == "decision_fusion" and self.decision_fusion_network:
                        model_obj = self.decision_fusion_network
                        current_loss = 1.0 - performance_score

                    if model_obj and current_loss is not None:
                        # Check if this is the best performance so far
                        model_state = self.model_states.get(model_name, {})
                        best_loss = model_state.get("best_loss", float("inf"))

                        # Update current loss
                        model_state["current_loss"] = current_loss
                        model_state["last_training"] = datetime.now()

                        # Save checkpoint if performance improved or every 3rd training
                        should_save = (
                            current_loss < best_loss  # Performance improved
                            or self.training_iterations % 3
                            == 0  # Save every 3rd training iteration
                        )

                        if should_save:
                            # Prepare metadata
                            metadata = {
                                "loss": current_loss,
                                "performance_score": performance_score,
                                "training_iterations": self.training_iterations,
                                "timestamp": datetime.now().isoformat(),
                                "model_type": model_name,
                            }

                            # Save checkpoint
                            checkpoint_path = self.checkpoint_manager.save_checkpoint(
                                model=model_obj,
                                model_name=model_name,
                                performance=current_loss,
                                metadata=metadata,
                            )

                            if checkpoint_path:
                                # Update best performance
                                if current_loss < best_loss:
                                    model_state["best_loss"] = current_loss
                                    model_state["best_checkpoint"] = checkpoint_path
                                    logger.info(
                                        f"💾 Saved BEST checkpoint for {model_name}: {checkpoint_path} (loss: {current_loss:.4f})"
                                    )
                                else:
                                    logger.debug(
                                        f"💾 Saved periodic checkpoint for {model_name}: {checkpoint_path}"
                                    )

                                model_state["last_checkpoint"] = checkpoint_path
                                model_state["checkpoints_saved"] = (
                                    model_state.get("checkpoints_saved", 0) + 1
                                )

                        # Update model state
                        self.model_states[model_name] = model_state

                except Exception as e:
                    logger.error(f"Error saving checkpoint for {model_name}: {e}")

        except Exception as e:
            logger.error(f"Error saving training checkpoints: {e}")

    def _get_current_market_data(self, symbol: str) -> Optional[Dict]:
        """Get current market data for training context"""
        try:
            if not self.data_provider:
                logger.warning(f"No data provider available for {symbol}")
                return None
                
            # Get recent data for training
            df = self.data_provider.get_historical_data(symbol, "1m", limit=100)
            if df is not None and not df.empty:
                return {
                    "ohlcv": df.tail(50).to_dict("records"),  # Last 50 candles
                    "current_price": float(df["close"].iloc[-1]),
                    "volume": float(df["volume"].iloc[-1]),
                    "timestamp": df.index[-1],
                }
            else:
                logger.warning(f"No historical data available for {symbol}")
                return None
        except Exception as e:
            logger.error(f"Error getting market data for training {symbol}: {e}")
            return None

    def _create_state_from_base_data(self, symbol: str, base_data: Any) -> Optional[np.ndarray]:
        """Create state representation for DQN training from base_data (same as CNN model)"""
        try:
            # Validate base_data
            if not base_data or not hasattr(base_data, 'get_feature_vector'):
                logger.debug(f"Invalid base_data for {symbol}: {type(base_data)}")
                return None
            
            # Get feature vector from base_data (same as CNN model)
            features = base_data.get_feature_vector()
            
            if not features or len(features) == 0:
                logger.debug(f"No features available from base_data for {symbol}")
                return None
            
            # Check if all features are zero (invalid state)
            if all(f == 0 for f in features):
                logger.debug(f"All features are zero for {symbol}")
                return None

            # Convert to numpy array
            state = np.array(features, dtype=np.float32)
            
            # Ensure correct dimensions for DQN (403 features)
            if len(state) != 403:
                if len(state) < 403:
                    # Pad with zeros
                    padded_state = np.zeros(403, dtype=np.float32)
                    padded_state[:len(state)] = state
                    state = padded_state
                else:
                    # Truncate
                    state = state[:403]

            return state

        except Exception as e:
            logger.error(f"Error creating state from base_data for {symbol}: {e}")
            return None


    def _create_cnn_features_from_base_data(
        self, symbol: str, base_data: Any
    ) -> np.ndarray:
        """Create CNN features for training from base_data (same as inference)"""
        try:
            # Validate base_data
            if not base_data or not hasattr(base_data, 'get_feature_vector'):
                logger.warning(f"Invalid base_data for CNN training {symbol}: {type(base_data)}")
                return np.zeros((1, 403))  # Default CNN input size
            
            # Get feature vector from base_data (same as CNN inference)
            features = base_data.get_feature_vector()
            
            if not features or len(features) == 0:
                logger.warning(f"No features available from base_data for CNN training {symbol}, using default")
                return np.zeros((1, 403))  # Default CNN input size

            # Convert to numpy array and reshape for CNN
            cnn_features = np.array(features, dtype=np.float32).reshape(1, -1)
            
            # Ensure correct dimensions for CNN (403 features)
            if cnn_features.shape[1] != 403:
                if cnn_features.shape[1] < 403:
                    # Pad with zeros
                    padded_features = np.zeros((1, 403), dtype=np.float32)
                    padded_features[0, :cnn_features.shape[1]] = cnn_features[0]
                    cnn_features = padded_features
                else:
                    # Truncate
                    cnn_features = cnn_features[:, :403]

            return cnn_features

        except Exception as e:
            logger.error(f"Error creating CNN features from base_data for {symbol}: {e}")
            return np.zeros((1, 403))  # Default CNN input size


    def _create_cob_state_for_training(self, symbol: str, cob_data: Dict) -> np.ndarray:
        """Create COB state representation for training"""
        try:
            # Extract COB features for training
            features = []

            # Add bid/ask data
            bids = cob_data.get("bids", [])[:10]  # Top 10 bids
            asks = cob_data.get("asks", [])[:10]  # Top 10 asks

            for bid in bids:
                features.extend([bid.get("price", 0), bid.get("size", 0)])
            for ask in asks:
                features.extend([ask.get("price", 0), ask.get("size", 0)])

            # Add market stats
            stats = cob_data.get("stats", {})
            features.extend(
                [
                    stats.get("spread", 0),
                    stats.get("mid_price", 0),
                    stats.get("bid_volume", 0),
                    stats.get("ask_volume", 0),
                    stats.get("imbalance", 0),
                ]
            )

            # Pad to expected COB state size (2000 features)
            cob_state = np.array(features[:2000])
            if len(cob_state) < 2000:
                cob_state = np.pad(cob_state, (0, 2000 - len(cob_state)), "constant")

            return cob_state

        except Exception as e:
            logger.debug(f"Error creating COB state for training: {e}")
            return np.zeros(2000)

    def _create_decision_fusion_training_input(self, symbol: str, market_data: Dict) -> np.ndarray:
        """Create decision fusion training input from market data"""
        try:
            # Extract features from market data
            ohlcv_data = market_data.get("ohlcv", [])
            if not ohlcv_data:
                return np.zeros(100)  # Default state size

            # Extract features from recent candles
            features = []
            for candle in ohlcv_data[-20:]:  # Last 20 candles
                features.extend(
                    [
                        candle.get("open", 0),
                        candle.get("high", 0),
                        candle.get("low", 0),
                        candle.get("close", 0),
                        candle.get("volume", 0),
                    ]
                )

            # Pad or truncate to expected size
            state = np.array(features[:100])
            if len(state) < 100:
                state = np.pad(state, (0, 100 - len(state)), "constant")

            return state

        except Exception as e:
            logger.debug(f"Error creating decision fusion input: {e}")
            return np.zeros(100)

    def _check_signal_confirmation(
        self, symbol: str, signal_data: Dict
    ) -> Optional[str]:
        """Check if we have enough signal confirmations for trend confirmation with rate limiting"""
        try:
            current_time = signal_data["timestamp"]
            action = signal_data["action"]

            # Initialize signal tracking for this symbol if needed
            if symbol not in self.last_signal_time:
                self.last_signal_time[symbol] = {}
            if symbol not in self.last_confirmed_signal:
                self.last_confirmed_signal[symbol] = {}

            # RATE LIMITING: Check if we recently confirmed the same signal
            if action in self.last_confirmed_signal[symbol]:
                last_confirmed = self.last_confirmed_signal[symbol][action]
                time_since_last = current_time - last_confirmed["timestamp"]
                if time_since_last < self.min_signal_interval:
                    logger.debug(
                        f"Rate limiting: {action} signal for {symbol} too recent "
                        f"({time_since_last.total_seconds():.1f}s < {self.min_signal_interval.total_seconds()}s)"
                    )
                    return None

            # Clean up expired signals
            self.signal_accumulator[symbol] = [
                s
                for s in self.signal_accumulator[symbol]
                if (current_time - s["timestamp"]).total_seconds()
                < self.signal_timeout_seconds
            ]

            # Add new signal
            self.signal_accumulator[symbol].append(signal_data)

            # Check if we have enough confirmations
            if len(self.signal_accumulator[symbol]) < self.required_confirmations:
                return None

            # Check if recent signals are consistent
            recent_signals = self.signal_accumulator[symbol][
                -self.required_confirmations :
            ]
            actions = [s["action"] for s in recent_signals]

            # Count action consensus
            action_counts = {}
            for action_item in actions:
                action_counts[action_item] = action_counts.get(action_item, 0) + 1

            # Find dominant action
            dominant_action = max(action_counts, key=action_counts.get)
            consensus_count = action_counts[dominant_action]

            # Require at least 2/3 consensus
            if consensus_count >= max(2, self.required_confirmations * 0.67):
                # ADDITIONAL RATE LIMITING: Don't confirm if we just confirmed the same action
                if dominant_action in self.last_confirmed_signal[symbol]:
                    last_confirmed = self.last_confirmed_signal[symbol][dominant_action]
                    time_since_last = current_time - last_confirmed["timestamp"]
                    if time_since_last < self.min_signal_interval:
                        logger.debug(
                            f"Rate limiting: Preventing duplicate {dominant_action} confirmation for {symbol}"
                        )
                        return None

                # Record this confirmation
                self.last_confirmed_signal[symbol][dominant_action] = {
                    "timestamp": current_time,
                    "confidence": signal_data["confidence"],
                }

                # Clear accumulator after confirmation
                self.signal_accumulator[symbol] = []

                logger.info(
                    f"Signal confirmed after rate limiting: {dominant_action} for {symbol}"
                )
                return dominant_action

            return None

        except Exception as e:
            logger.error(f"Error checking signal confirmation for {symbol}: {e}")
            return None

    def _initialize_checkpoint_manager(self):
        """Initialize the checkpoint manager for model persistence"""
        try:
            from utils.checkpoint_manager import get_checkpoint_manager

            self.checkpoint_manager = get_checkpoint_manager()

            # Initialize model states dictionary to track performance (only if not already initialized)
            if not hasattr(self, 'model_states') or self.model_states is None:
                self.model_states = {
                    "dqn": {
                        "initial_loss": None,
                        "current_loss": None,
                        "best_loss": float("inf"),
                        "checkpoint_loaded": False,
                    },
                    "cnn": {
                        "initial_loss": None,
                        "current_loss": None,
                        "best_loss": float("inf"),
                        "checkpoint_loaded": False,
                    },
                    "cob_rl": {
                        "initial_loss": None,
                        "current_loss": None,
                        "best_loss": float("inf"),
                        "checkpoint_loaded": False,
                    },
                    "extrema": {
                        "initial_loss": None,
                    "current_loss": None,
                    "best_loss": float("inf"),
                    "checkpoint_loaded": False,
                },
            }

            logger.info("Checkpoint manager initialized for model persistence")
        except Exception as e:
            logger.error(f"Error initializing checkpoint manager: {e}")
            self.checkpoint_manager = None
    def autosave_models(self):
        """Attempt to autosave best model checkpoints periodically."""
        try:
            if not self.checkpoint_manager:
                return
            # CNN autosave when current_loss equals best_loss
            try:
                cnn_stats = self.model_states.get('cnn', {})
                if cnn_stats and cnn_stats.get('current_loss') is not None:
                    if cnn_stats.get('best_loss') is not None and cnn_stats['current_loss'] <= cnn_stats['best_loss']:
                        path = self.checkpoint_manager.save_model_checkpoint(
                            model_name='enhanced_cnn',
                            model=self.cnn_model,
                            metrics={'loss': float(cnn_stats['current_loss'])},
                            metadata={'source': 'autosave'}
                        )
                        if path:
                            logger.info(f"Autosaved CNN checkpoint: {path}")
            except Exception:
                pass
            # COB RL autosave
            try:
                cob_stats = self.model_states.get('cob_rl', {})
                if cob_stats and cob_stats.get('current_loss') is not None:
                    if cob_stats.get('best_loss') is not None and cob_stats['current_loss'] <= cob_stats['best_loss']:
                        self.checkpoint_manager.save_model_checkpoint(
                            model_name='cob_rl',
                            model=self.cob_rl_agent,
                            metrics={'loss': float(cob_stats['current_loss'])},
                            metadata={'source': 'autosave'}
                        )
            except Exception:
                pass
        except Exception as e:
            logger.debug(f"Autosave models skipped: {e}")

    def _schedule_database_cleanup(self):
        """Schedule periodic database cleanup"""
        try:
            # Clean up old inference records (keep 30 days)
            self.inference_logger.cleanup_old_logs(days_to_keep=30)
            logger.info("Database cleanup completed")
        except Exception as e:
            logger.error(f"Database cleanup failed: {e}")

    def log_model_inference(
        self,
        model_name: str,
        symbol: str,
        action: str,
        confidence: float,
        probabilities: Dict[str, float],
        input_features: Any,
        processing_time_ms: float,
        checkpoint_id: str = None,
        metadata: Dict[str, Any] = None,
    ) -> bool:
        """
        Centralized method for models to log their inferences

        This replaces scattered logger.info() calls throughout the codebase
        """
        return log_model_inference(
            model_name=model_name,
            symbol=symbol,
            action=action,
            confidence=confidence,
            probabilities=probabilities,
            input_features=input_features,
            processing_time_ms=processing_time_ms,
            checkpoint_id=checkpoint_id,
            metadata=metadata,
        )

    def get_model_inference_stats(
        self, model_name: str, hours: int = 24
    ) -> Dict[str, Any]:
        """Get inference statistics for a model"""
        return self.inference_logger.get_model_stats(model_name, hours)

    def get_checkpoint_metadata_fast(self, model_name: str) -> Optional[Any]:
        """
        Get checkpoint metadata without loading the full model

        This is much faster than loading the entire checkpoint just to get metadata
        """
        return self.db_manager.get_best_checkpoint_metadata(model_name)

    # === DATA MANAGEMENT ===

    def _log_data_status(self):
        """Log current data status"""
        try:
            logger.info("=== Data Provider Status ===")
            logger.info(
                "Data provider is running and optimized for BaseDataInput building"
            )
        except Exception as e:
            logger.error(f"Error logging data status: {e}")

    def update_data_cache(
        self, data_type: str, symbol: str, data: Any, source: str = "orchestrator"
    ) -> bool:
        """
        Update data cache through data provider

        Args:
            data_type: Type of data ('ohlcv_1s', 'technical_indicators', etc.)
            symbol: Trading symbol
            data: Data to store
            source: Source of the update

        Returns:
            bool: True if updated successfully
        """
        try:
            # Invalidate cache when new data arrives
            if hasattr(self.data_provider, "invalidate_ohlcv_cache"):
                self.data_provider.invalidate_ohlcv_cache(symbol)
            return True
        except Exception as e:
            logger.error(f"Error updating data cache {data_type}/{symbol}: {e}")
            return False

    def get_latest_data(self, data_type: str, symbol: str, count: int = 1) -> List[Any]:
        """
        Get latest data from FIFO queue

        Args:
            data_type: Type of data
            symbol: Trading symbol
            count: Number of latest items to retrieve

        Returns:
            List of latest data items
        """
        try:
            if (
                data_type not in self.data_queues
                or symbol not in self.data_queues[data_type]
            ):
                return []

            with self.data_queue_locks[data_type][symbol]:
                queue = self.data_queues[data_type][symbol]
                if len(queue) == 0:
                    return []

                # Get last 'count' items
                return list(queue)[-count:] if count > 1 else [queue[-1]]

        except Exception as e:
            logger.error(f"Error getting latest data {data_type}/{symbol}: {e}")
            return []

    def get_queue_data(
        self, data_type: str, symbol: str, max_items: int = None
    ) -> List[Any]:
        """
        Get all data from FIFO queue

        Args:
            data_type: Type of data
            symbol: Trading symbol
            max_items: Maximum number of items to return (None for all)

        Returns:
            List of data items
        """
        try:
            if (
                data_type not in self.data_queues
                or symbol not in self.data_queues[data_type]
            ):
                return []

            with self.data_queue_locks[data_type][symbol]:
                queue = self.data_queues[data_type][symbol]
                data_list = list(queue)

                if max_items and len(data_list) > max_items:
                    return data_list[-max_items:]

                return data_list

        except Exception as e:
            logger.error(f"Error getting queue data {data_type}/{symbol}: {e}")
            return []

    def get_queue_status(self) -> Dict[str, Dict[str, int]]:
        """Get status of all data queues"""
        status = {}

        for data_type, symbol_queues in self.data_queues.items():
            status[data_type] = {}
            for symbol, queue in symbol_queues.items():
                with self.data_queue_locks[data_type][symbol]:
                    status[data_type][symbol] = len(queue)

        return status

    def get_detailed_queue_status(self) -> Dict[str, Any]:
        """Get detailed status of all data queues with timestamps and data info"""
        detailed_status = {}

        for data_type, symbol_queues in self.data_queues.items():
            detailed_status[data_type] = {}
            for symbol, queue in symbol_queues.items():
                with self.data_queue_locks[data_type][symbol]:
                    queue_list = list(queue)
                    queue_info = {
                        "count": len(queue_list),
                        "max_size": queue.maxlen,
                        "usage_percent": (
                            (len(queue_list) / queue.maxlen * 100)
                            if queue.maxlen
                            else 0
                        ),
                        "oldest_timestamp": None,
                        "newest_timestamp": None,
                        "data_type_info": None,
                    }

                    if queue_list:
                        # Try to get timestamps from data
                        try:
                            if hasattr(queue_list[0], "timestamp"):
                                queue_info["oldest_timestamp"] = queue_list[
                                    0
                                ].timestamp.isoformat()
                                queue_info["newest_timestamp"] = queue_list[
                                    -1
                                ].timestamp.isoformat()

                            # Add data type specific info
                            if data_type.startswith("ohlcv_"):
                                if hasattr(queue_list[-1], "close"):
                                    queue_info["data_type_info"] = (
                                        f"latest_price={queue_list[-1].close:.2f}"
                                    )
                            elif data_type == "technical_indicators":
                                if isinstance(queue_list[-1], dict):
                                    indicators = list(queue_list[-1].keys())[
                                        :3
                                    ]  # First 3 indicators
                                    queue_info["data_type_info"] = (
                                        f"indicators={indicators}"
                                    )
                            elif data_type == "cob_data":
                                queue_info["data_type_info"] = "cob_snapshot"
                            elif data_type == "model_predictions":
                                if hasattr(queue_list[-1], "action"):
                                    queue_info["data_type_info"] = (
                                        f"latest_action={queue_list[-1].action}"
                                    )
                        except Exception as e:
                            queue_info["data_type_info"] = f"error_getting_info: {e}"

                    detailed_status[data_type][symbol] = queue_info

        return detailed_status

    def log_queue_status(self, detailed: bool = False):
        """Log current queue status for debugging"""
        if detailed:
            status = self.get_detailed_queue_status()
            logger.info("=== Detailed Queue Status ===")
            for data_type, symbols in status.items():
                logger.info(f"{data_type}:")
                for symbol, info in symbols.items():
                    logger.info(
                        f"  {symbol}: {info['count']}/{info['max_size']} ({info['usage_percent']:.1f}%) - {info.get('data_type_info', 'no_info')}"
                    )
        else:
            status = self.get_queue_status()
            logger.info("=== Queue Status ===")
            for data_type, symbols in status.items():
                symbol_counts = [
                    f"{symbol}:{count}" for symbol, count in symbols.items()
                ]
                logger.info(f"{data_type}: {', '.join(symbol_counts)}")

    def ensure_minimum_data(self, data_type: str, symbol: str, min_count: int) -> bool:
        """
        Check if queue has minimum required data

        Args:
            data_type: Type of data
            symbol: Trading symbol
            min_count: Minimum required items

        Returns:
            bool: True if minimum data available
        """
        try:
            if (
                data_type not in self.data_queues
                or symbol not in self.data_queues[data_type]
            ):
                return False

            with self.data_queue_locks[data_type][symbol]:
                return len(self.data_queues[data_type][symbol]) >= min_count

        except Exception as e:
            logger.error(f"Error checking minimum data {data_type}/{symbol}: {e}")
            return False

    def build_base_data_input(self, symbol: str) -> Optional[Any]:
        """
        Build BaseDataInput using optimized data provider (should be instantaneous)

        Args:
            symbol: Trading symbol

        Returns:
            BaseDataInput with consistent data structure and position information
        """
        try:
            # Use data provider's optimized build_base_data_input method
            base_data = self.data_provider.build_base_data_input(symbol)
            
            if base_data:
                # Add position information to the base data
                current_price = self.data_provider.get_current_price(symbol)
                has_position = self._has_open_position(symbol)
                position_pnl = self._get_current_position_pnl(symbol, current_price) if current_price else 0.0
                
                # Get additional position details if available
                position_size = 0.0
                entry_price = 0.0
                time_in_position_minutes = 0.0
                
                if has_position and self.trading_executor and hasattr(self.trading_executor, "get_current_position"):
                    try:
                        position = self.trading_executor.get_current_position(symbol)
                        if position:
                            position_size = position.get("size", 0.0)
                            entry_price = position.get("price", 0.0)
                            entry_time = position.get("entry_time")
                            if entry_time:
                                time_in_position_minutes = (datetime.now() - entry_time).total_seconds() / 60.0
                    except Exception as e:
                        logger.debug(f"Error getting position details for {symbol}: {e}")
                
                # Add position information to base data
                base_data.position_info = {
                    'has_position': has_position,
                    'position_pnl': position_pnl,
                    'position_size': position_size,
                    'entry_price': entry_price,
                    'time_in_position_minutes': time_in_position_minutes
                }
            
            return base_data

        except Exception as e:
            logger.error(f"Error building BaseDataInput for {symbol}: {e}")
            return None

    def _get_latest_indicators(self, symbol: str) -> Dict[str, float]:
        """Get latest technical indicators from queue"""
        try:
            indicators_data = self.get_latest_data("technical_indicators", symbol, 1)
            if indicators_data:
                return indicators_data[0]
            return {}
        except Exception as e:
            logger.error(f"Error getting indicators for {symbol}: {e}")
            return {}

    def _get_latest_cob_data(self, symbol: str) -> Optional[Any]:
        """Get latest COB data from queue"""
        try:
            cob_data = self.get_latest_data("cob_data", symbol, 1)
            if cob_data:
                return cob_data[0]
            return None
        except Exception as e:
            logger.error(f"Error getting COB data for {symbol}: {e}")
            return None

    def _get_recent_model_predictions(self, symbol: str) -> Dict[str, Any]:
        """Get recent model predictions from queue"""
        try:
            predictions_data = self.get_latest_data("model_predictions", symbol, 5)

            # Convert to dict format expected by BaseDataInput
            predictions_dict = {}
            for i, pred in enumerate(predictions_data):
                predictions_dict[f"model_{i}"] = pred

            return predictions_dict
        except Exception as e:
            logger.error(f"Error getting model predictions for {symbol}: {e}")
            return {}

    def _initialize_data_queue_integration(self):
        """Initialize integration between data provider and FIFO queues"""
        try:
            # Register callbacks with data provider to populate FIFO queues
            if hasattr(self.data_provider, "register_data_callback"):
                # Register for different data types
                self.data_provider.register_data_callback("ohlcv", self._on_ohlcv_data)
                self.data_provider.register_data_callback(
                    "technical_indicators", self._on_indicators_data
                )
                self.data_provider.register_data_callback("cob", self._on_cob_data)
                logger.info("Data provider callbacks registered for FIFO queues")
            else:
                # Fallback: Start a background thread to poll data
                self._start_data_polling_thread()
                logger.info("Started data polling thread for FIFO queues")

        except Exception as e:
            logger.error(f"Error initializing data queue integration: {e}")

    def _on_ohlcv_data(self, symbol: str, timeframe: str, data: Any):
        """Callback for new OHLCV data"""
        try:
            data_type = f"ohlcv_{timeframe}"
            if data_type in self.data_queues and symbol in self.data_queues[data_type]:
                self.update_data_queue(data_type, symbol, data)
        except Exception as e:
            logger.error(f"Error processing OHLCV data callback: {e}")

    def _on_indicators_data(self, symbol: str, indicators: Dict[str, float]):
        """Callback for new technical indicators"""
        try:
            self.update_data_queue("technical_indicators", symbol, indicators)
        except Exception as e:
            logger.error(f"Error processing indicators data callback: {e}")

    def _on_cob_data(self, symbol: str, cob_data: Any):
        """Callback for new COB data"""
        try:
            self.update_data_queue("cob_data", symbol, cob_data)
        except Exception as e:
            logger.error(f"Error processing COB data callback: {e}")

    def _start_data_polling_thread(self):
        """Start background thread to poll data and populate queues"""

        def data_polling_worker():
            """Background worker to poll data and update queues"""
            poll_count = 0
            while self.running:
                try:
                    poll_count += 1

                    # Log polling activity every 30 seconds
                    if poll_count % 30 == 1:
                        logger.info(
                            f"Data polling cycle #{poll_count} - checking data sources"
                        )
                    # Poll OHLCV data for all symbols and timeframes
                    for symbol in [self.symbol] + self.ref_symbols:
                        for timeframe in ["1s", "1m", "1h", "1d"]:
                            try:
                                # Get latest data from data provider using correct method
                                if hasattr(self.data_provider, "get_latest_candles"):
                                    df = self.data_provider.get_latest_candles(
                                        symbol, timeframe, limit=1
                                    )
                                    if df is not None and not df.empty:
                                        # Convert DataFrame row to OHLCVBar
                                        latest_row = df.iloc[-1]
                                        from core.data_models import OHLCVBar

                                        ohlcv_bar = OHLCVBar(
                                            symbol=symbol,
                                            timestamp=(
                                                latest_row.name
                                                if hasattr(
                                                    latest_row.name, "to_pydatetime"
                                                )
                                                else datetime.now()
                                            ),
                                            open=float(latest_row["open"]),
                                            high=float(latest_row["high"]),
                                            low=float(latest_row["low"]),
                                            close=float(latest_row["close"]),
                                            volume=float(latest_row["volume"]),
                                            timeframe=timeframe,
                                        )
                                        self.update_data_queue(
                                            f"ohlcv_{timeframe}", symbol, ohlcv_bar
                                        )
                                elif hasattr(self.data_provider, "get_historical_data"):
                                    df = self.data_provider.get_historical_data(
                                        symbol, timeframe, limit=1
                                    )
                                    if df is not None and not df.empty:
                                        # Convert DataFrame row to OHLCVBar
                                        latest_row = df.iloc[-1]
                                        from core.data_models import OHLCVBar

                                        ohlcv_bar = OHLCVBar(
                                            symbol=symbol,
                                            timestamp=(
                                                latest_row.name
                                                if hasattr(
                                                    latest_row.name, "to_pydatetime"
                                                )
                                                else datetime.now()
                                            ),
                                            open=float(latest_row["open"]),
                                            high=float(latest_row["high"]),
                                            low=float(latest_row["low"]),
                                            close=float(latest_row["close"]),
                                            volume=float(latest_row["volume"]),
                                            timeframe=timeframe,
                                        )
                                        self.update_data_queue(
                                            f"ohlcv_{timeframe}", symbol, ohlcv_bar
                                        )
                            except Exception as e:
                                logger.debug(f"Error polling {symbol} {timeframe}: {e}")

                    # Poll technical indicators
                    for symbol in [self.symbol] + self.ref_symbols:
                        try:
                            # Get recent data and calculate basic indicators
                            df = None
                            if hasattr(self.data_provider, "get_latest_candles"):
                                df = self.data_provider.get_latest_candles(
                                    symbol, "1m", limit=50
                                )
                            elif hasattr(self.data_provider, "get_historical_data"):
                                df = self.data_provider.get_historical_data(
                                    symbol, "1m", limit=50
                                )

                            if df is not None and not df.empty and len(df) >= 20:
                                # Calculate basic technical indicators
                                indicators = {}
                                try:
                                    # Use our own RSI implementation to avoid ta library deprecation warnings
                                    if len(df) >= 14:
                                        indicators["rsi"] = self._calculate_rsi(
                                            df["close"], period=14
                                        )
                                    indicators["sma_20"] = (
                                        df["close"].rolling(20).mean().iloc[-1]
                                    )
                                    indicators["ema_12"] = (
                                        df["close"].ewm(span=12).mean().iloc[-1]
                                    )
                                    indicators["ema_26"] = (
                                        df["close"].ewm(span=26).mean().iloc[-1]
                                    )
                                    indicators["macd"] = (
                                        indicators["ema_12"] - indicators["ema_26"]
                                    )

                                    # Remove NaN values
                                    indicators = {
                                        k: float(v)
                                        for k, v in indicators.items()
                                        if not pd.isna(v)
                                    }

                                    if indicators:
                                        self.update_data_queue(
                                            "technical_indicators", symbol, indicators
                                        )
                                except Exception as ta_e:
                                    logger.debug(
                                        f"Error calculating indicators for {symbol}: {ta_e}"
                                    )
                        except Exception as e:
                            logger.debug(f"Error polling indicators for {symbol}: {e}")

                    # Poll COB data (primary symbol only)
                    try:
                        if hasattr(self.data_provider, "get_latest_cob_data"):
                            cob_data = self.data_provider.get_latest_cob_data(
                                self.symbol
                            )
                            if cob_data and isinstance(cob_data, dict) and cob_data:
                                self.update_data_queue(
                                    "cob_data", self.symbol, cob_data
                                )
                    except Exception as e:
                        logger.debug(f"Error polling COB data: {e}")

                    # Sleep between polls
                    time.sleep(1)  # Poll every second

                except Exception as e:
                    logger.error(f"Error in data polling worker: {e}")
                    time.sleep(5)  # Wait longer on error

        # Start the polling thread
        self.data_polling_thread = threading.Thread(
            target=data_polling_worker, daemon=True
        )
        self.data_polling_thread.start()
        logger.info("Data polling thread started")

        # Populate initial data
        self._populate_initial_queue_data()

    def _populate_initial_queue_data(self):
        """Populate FIFO queues with initial historical data"""
        try:
            logger.info("Populating FIFO queues with initial data...")

            # Get initial OHLCV data for all symbols and timeframes
            for symbol in [self.symbol] + self.ref_symbols:
                for timeframe in ["1s", "1m", "1h", "1d"]:
                    try:
                        # Determine how much data to fetch based on timeframe
                        limits = {"1s": 500, "1m": 300, "1h": 300, "1d": 300}
                        limit = limits.get(timeframe, 300)

                        # Get historical data
                        df = None
                        if hasattr(self.data_provider, "get_historical_data"):
                            df = self.data_provider.get_historical_data(
                                symbol, timeframe, limit=limit
                            )

                        if df is not None and not df.empty:
                            logger.info(
                                f"Loading {len(df)} {timeframe} bars for {symbol}"
                            )

                            # Convert DataFrame to OHLCVBar objects and add to queue
                            from core.data_models import OHLCVBar

                            for idx, row in df.iterrows():
                                try:
                                    ohlcv_bar = OHLCVBar(
                                        symbol=symbol,
                                        timestamp=(
                                            idx
                                            if hasattr(idx, "to_pydatetime")
                                            else datetime.now()
                                        ),
                                        open=float(row["open"]),
                                        high=float(row["high"]),
                                        low=float(row["low"]),
                                        close=float(row["close"]),
                                        volume=float(row["volume"]),
                                        timeframe=timeframe,
                                    )
                                    self.update_data_queue(
                                        f"ohlcv_{timeframe}", symbol, ohlcv_bar
                                    )
                                except Exception as bar_e:
                                    logger.debug(f"Error creating OHLCV bar: {bar_e}")
                        else:
                            logger.warning(
                                f"No historical data available for {symbol} {timeframe}"
                            )

                    except Exception as e:
                        logger.warning(
                            f"Error loading initial data for {symbol} {timeframe}: {e}"
                        )

            # Calculate and populate technical indicators
            logger.info("Calculating technical indicators...")
            for symbol in [self.symbol] + self.ref_symbols:
                try:
                    # Use 1m data to calculate indicators
                    if self.ensure_minimum_data("ohlcv_1m", symbol, 50):
                        minute_data = self.get_queue_data("ohlcv_1m", symbol, 100)
                        if minute_data and len(minute_data) >= 20:
                            # Convert to DataFrame for indicator calculation
                            df_data = []
                            for bar in minute_data:
                                df_data.append(
                                    {
                                        "timestamp": bar.timestamp,
                                        "open": bar.open,
                                        "high": bar.high,
                                        "low": bar.low,
                                        "close": bar.close,
                                        "volume": bar.volume,
                                    }
                                )

                            df = pd.DataFrame(df_data)
                            df.set_index("timestamp", inplace=True)

                            # Calculate indicators
                            indicators = {}
                            try:
                                # Use our own RSI implementation to avoid ta library deprecation warnings
                                if len(df) >= 14:
                                    indicators["rsi"] = self._calculate_rsi(
                                        df["close"], period=14
                                    )
                                if len(df) >= 20:
                                    indicators["sma_20"] = (
                                        df["close"].rolling(20).mean().iloc[-1]
                                    )
                                if len(df) >= 12:
                                    indicators["ema_12"] = (
                                        df["close"].ewm(span=12).mean().iloc[-1]
                                    )
                                if len(df) >= 26:
                                    indicators["ema_26"] = (
                                        df["close"].ewm(span=26).mean().iloc[-1]
                                    )
                                    if "ema_12" in indicators:
                                        indicators["macd"] = (
                                            indicators["ema_12"] - indicators["ema_26"]
                                        )

                                # Bollinger Bands
                                if len(df) >= 20:
                                    bb_period = 20
                                    bb_std = 2
                                    sma = df["close"].rolling(bb_period).mean()
                                    std = df["close"].rolling(bb_period).std()
                                    indicators["bb_upper"] = (
                                        sma + (std * bb_std)
                                    ).iloc[-1]
                                    indicators["bb_lower"] = (
                                        sma - (std * bb_std)
                                    ).iloc[-1]
                                    indicators["bb_middle"] = sma.iloc[-1]

                                # Remove NaN values
                                indicators = {
                                    k: float(v)
                                    for k, v in indicators.items()
                                    if not pd.isna(v)
                                }

                                if indicators:
                                    self.update_data_queue(
                                        "technical_indicators", symbol, indicators
                                    )
                                    logger.info(
                                        f"Calculated {len(indicators)} indicators for {symbol}"
                                    )

                            except Exception as ta_e:
                                logger.warning(
                                    f"Error calculating indicators for {symbol}: {ta_e}"
                                )

                except Exception as e:
                    logger.warning(f"Error processing indicators for {symbol}: {e}")

            # Log final queue status
            logger.info("Initial data population completed")
            self.log_queue_status(detailed=True)

        except Exception as e:
            logger.error(f"Error populating initial queue data: {e}")

    def _try_fallback_data_strategy(
        self, symbol: str, missing_data: List[Tuple[str, int, int]]
    ) -> bool:
        """
        Try to fill missing data using fallback strategies

        Args:
            symbol: Trading symbol
            missing_data: List of (data_type, actual_count, min_count) tuples

        Returns:
            bool: True if fallback successful
        """
        try:
            from core.data_models import OHLCVBar

            for data_type, actual_count, min_count in missing_data:
                needed_count = min_count - actual_count

                if data_type == "ohlcv_1s" and needed_count > 0:
                    # Try to use 1m data to generate 1s data (simple interpolation)
                    if self.ensure_minimum_data("ohlcv_1m", symbol, 10):
                        logger.info(
                            f"Using 1m data to generate {needed_count} 1s bars for {symbol}"
                        )

                        # Get some 1m data
                        minute_data = self.get_queue_data("ohlcv_1m", symbol, 10)
                        if minute_data:
                            # Generate synthetic 1s bars from 1m data
                            for i, minute_bar in enumerate(
                                minute_data[-5:]
                            ):  # Use last 5 minutes
                                # Create 60 synthetic 1s bars from each 1m bar
                                for second in range(60):
                                    if (
                                        len(self.data_queues["ohlcv_1s"][symbol])
                                        >= min_count
                                    ):
                                        break

                                    # Simple interpolation (not perfect but functional)
                                    synthetic_bar = OHLCVBar(
                                        symbol=symbol,
                                        timestamp=minute_bar.timestamp,
                                        open=minute_bar.open,
                                        high=minute_bar.high,
                                        low=minute_bar.low,
                                        close=minute_bar.close,
                                        volume=minute_bar.volume
                                        / 60,  # Distribute volume
                                        timeframe="1s",
                                    )
                                    self.update_data_queue(
                                        "ohlcv_1s", symbol, synthetic_bar
                                    )

                elif data_type == "ohlcv_1h" and needed_count > 0:
                    # Try to use 1m data to generate 1h data
                    if self.ensure_minimum_data("ohlcv_1m", symbol, 60):
                        logger.info(
                            f"Using 1m data to generate {needed_count} 1h bars for {symbol}"
                        )

                        minute_data = self.get_queue_data("ohlcv_1m", symbol, 300)
                        if minute_data and len(minute_data) >= 60:
                            # Group 1m bars into 1h bars
                            for hour_start in range(0, len(minute_data) - 60, 60):
                                if (
                                    len(self.data_queues["ohlcv_1h"][symbol])
                                    >= min_count
                                ):
                                    break

                                hour_bars = minute_data[hour_start : hour_start + 60]
                                if len(hour_bars) == 60:
                                    # Aggregate 1m bars into 1h bar
                                    hour_bar = OHLCVBar(
                                        symbol=symbol,
                                        timestamp=hour_bars[0].timestamp,
                                        open=hour_bars[0].open,
                                        high=max(bar.high for bar in hour_bars),
                                        low=min(bar.low for bar in hour_bars),
                                        close=hour_bars[-1].close,
                                        volume=sum(bar.volume for bar in hour_bars),
                                        timeframe="1h",
                                    )
                                    self.update_data_queue("ohlcv_1h", symbol, hour_bar)

            # Check if we now have minimum data
            all_satisfied = True
            for data_type, _, min_count in missing_data:
                if not self.ensure_minimum_data(data_type, symbol, min_count):
                    all_satisfied = False
                    break

            return all_satisfied

        except Exception as e:
            logger.error(f"Error in fallback data strategy: {e}")
            return False
>>>>>>> d49a473ed6f4aef55bfdd47d6370e53582be6b7b