gogo2/ANNOTATE/core/real_training_adapter.py

"""
Real Training Adapter for ANNOTATE System

This adapter connects the ANNOTATE annotation system to the REAL training implementations.
NO SIMULATION - Uses actual model training from NN/training and core modules.

Integrates with:
- NN/training/enhanced_realtime_training.py
- NN/training/model_manager.py
- core/unified_training_manager.py
- core/orchestrator.py
"""

import logging
import uuid
import time
import threading
import os
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path

import torch

try:
    import pytz
except ImportError:
    pytz = None

logger = logging.getLogger(__name__)


def parse_timestamp_to_utc(timestamp_str: str) -> datetime:
    """
    Unified timestamp parser that handles all formats and ensures UTC timezone.

    Handles:
    - ISO format with timezone: '2025-10-27T14:00:00+00:00'
    - ISO format with Z: '2025-10-27T14:00:00Z'
    - Space-separated with seconds: '2025-10-27 14:00:00'
    - Space-separated without seconds: '2025-10-27 14:00'

    Args:
        timestamp_str: Timestamp string in various formats

    Returns:
        Timezone-aware datetime object in UTC

    Raises:
        ValueError: If timestamp cannot be parsed
    """
    if not timestamp_str:
        raise ValueError("Empty timestamp string")

    # Try ISO format first (handles T separator and timezone info)
    if 'T' in timestamp_str or '+' in timestamp_str:
        try:
            # Handle 'Z' suffix (Zulu time = UTC)
            if timestamp_str.endswith('Z'):
                timestamp_str = timestamp_str[:-1] + '+00:00'
            return datetime.fromisoformat(timestamp_str)
        except ValueError:
            pass

    # Try space-separated formats
    # Replace space with T for fromisoformat compatibility
    if ' ' in timestamp_str:
        try:
            # Try parsing with fromisoformat after converting space to T
            dt = datetime.fromisoformat(timestamp_str.replace(' ', 'T'))
            # Make timezone-aware if naive
            if dt.tzinfo is None:
                dt = dt.replace(tzinfo=timezone.utc)
            return dt
        except ValueError:
            pass

    # Try explicit format parsing as fallback
    formats = [
        '%Y-%m-%d %H:%M:%S',  # With seconds
        '%Y-%m-%d %H:%M',     # Without seconds
        '%Y-%m-%dT%H:%M:%S',  # ISO without timezone
        '%Y-%m-%dT%H:%M',     # ISO without seconds or timezone
    ]

    for fmt in formats:
        try:
            dt = datetime.strptime(timestamp_str, fmt)
            # Make timezone-aware
            if dt.tzinfo is None:
                dt = dt.replace(tzinfo=timezone.utc)
            return dt
        except ValueError:
            continue

    # If all parsing attempts fail
    raise ValueError(f"Could not parse timestamp: '{timestamp_str}'")


@dataclass
class TrainingSession:
    """Real training session tracking"""
    training_id: str
    model_name: str
    test_cases_count: int
    status: str  # 'running', 'completed', 'failed'
    current_epoch: int
    total_epochs: int
    current_loss: float
    start_time: float
    duration_seconds: Optional[float] = None
    final_loss: Optional[float] = None
    accuracy: Optional[float] = None
    error: Optional[str] = None


class RealTrainingAdapter:
    """
    Adapter for REAL model training using annotations.

    This class bridges the ANNOTATE system with the actual training implementations.
    NO SIMULATION CODE - All training is real.
    """

    def __init__(self, orchestrator=None, data_provider=None):
        """
        Initialize with real orchestrator and data provider

        Args:
            orchestrator: TradingOrchestrator instance with real models
            data_provider: DataProvider for fetching real market data
        """
        self.orchestrator = orchestrator
        self.data_provider = data_provider
        self.training_sessions: Dict[str, TrainingSession] = {}

        # Import real training systems
        self._import_training_systems()

        logger.info("RealTrainingAdapter initialized - NO SIMULATION, REAL TRAINING ONLY")

    def _import_training_systems(self):
        """Import real training system implementations"""
        try:
            from NN.training.enhanced_realtime_training import EnhancedRealtimeTrainingSystem
            self.enhanced_training_available = True
            logger.info("EnhancedRealtimeTrainingSystem available")
        except ImportError as e:
            self.enhanced_training_available = False
            logger.warning(f"EnhancedRealtimeTrainingSystem not available: {e}")

        try:
            from NN.training.model_manager import ModelManager
            self.model_manager_available = True
            logger.info("ModelManager available")
        except ImportError as e:
            self.model_manager_available = False
            logger.warning(f"ModelManager not available: {e}")

        try:
            from core.enhanced_rl_training_adapter import EnhancedRLTrainingAdapter
            self.enhanced_rl_adapter_available = True
            logger.info("EnhancedRLTrainingAdapter available")
        except ImportError as e:
            self.enhanced_rl_adapter_available = False
            logger.warning(f"EnhancedRLTrainingAdapter not available: {e}")

    def get_available_models(self) -> List[str]:
        """Get list of available models from orchestrator"""
        if not self.orchestrator:
            logger.error("Orchestrator not available")
            return []

        available = []

        # Check which models are actually loaded in orchestrator
        if hasattr(self.orchestrator, 'cnn_model') and self.orchestrator.cnn_model:
            available.append("CNN")
        if hasattr(self.orchestrator, 'rl_agent') and self.orchestrator.rl_agent:
            available.append("DQN")
        if hasattr(self.orchestrator, 'primary_transformer') and self.orchestrator.primary_transformer:
            available.append("Transformer")
        if hasattr(self.orchestrator, 'cob_rl_agent') and self.orchestrator.cob_rl_agent:
            available.append("COB")
        if hasattr(self.orchestrator, 'extrema_trainer') and self.orchestrator.extrema_trainer:
            available.append("Extrema")

        logger.info(f"Available models for training: {available}")
        return available

    def start_training(self, model_name: str, test_cases: List[Dict]) -> str:
        """
        Start REAL training session with test cases

        Args:
            model_name: Name of model to train (CNN, DQN, Transformer, COB, Extrema)
            test_cases: List of test cases from annotations

        Returns:
            training_id: Unique ID for this training session
        """
        if not self.orchestrator:
            raise Exception("Orchestrator not available - cannot train models")

        training_id = str(uuid.uuid4())

        # Create training session
        session = TrainingSession(
            training_id=training_id,
            model_name=model_name,
            test_cases_count=len(test_cases),
            status='running',
            current_epoch=0,
            total_epochs=10,  # Reasonable for annotation-based training
            current_loss=0.0,
            start_time=time.time()
        )

        self.training_sessions[training_id] = session

        logger.info(f"Starting REAL training session: {training_id} for {model_name} with {len(test_cases)} test cases")

        # Start actual training in background thread
        thread = threading.Thread(
            target=self._execute_real_training,
            args=(training_id, model_name, test_cases),
            daemon=True
        )
        thread.start()

        return training_id

    def _execute_real_training(self, training_id: str, model_name: str, test_cases: List[Dict]):
        """Execute REAL model training (runs in background thread)"""
        session = self.training_sessions[training_id]

        try:
            logger.info(f"Executing REAL training for {model_name}")
            logger.info(f"   Training ID: {training_id}")
            logger.info(f"   Test cases: {len(test_cases)}")

            # Prepare training data from test cases
            training_data = self._prepare_training_data(test_cases)

            if not training_data:
                raise Exception("No valid training data prepared from test cases")

            logger.info(f" Prepared {len(training_data)} training samples")

            # Route to appropriate REAL training method
            if model_name in ["CNN", "StandardizedCNN"]:
                logger.info(" Starting CNN training...")
                self._train_cnn_real(session, training_data)
            elif model_name == "DQN":
                logger.info(" Starting DQN training...")
                self._train_dqn_real(session, training_data)
            elif model_name == "Transformer":
                logger.info(" Starting Transformer training...")
                self._train_transformer_real(session, training_data)
            elif model_name == "COB":
                logger.info(" Starting COB training...")
                self._train_cob_real(session, training_data)
            elif model_name == "Extrema":
                logger.info(" Starting Extrema training...")
                self._train_extrema_real(session, training_data)
            else:
                raise Exception(f"Unknown model type: {model_name}")

            # Mark as completed
            session.status = 'completed'
            session.duration_seconds = time.time() - session.start_time

            logger.info(f" REAL training completed: {training_id} in {session.duration_seconds:.2f}s")
            logger.info(f"   Final loss: {session.final_loss}")
            logger.info(f"   Accuracy: {session.accuracy}")

        except Exception as e:
            logger.error(f"REAL training failed: {e}", exc_info=True)
            session.status = 'failed'
            session.error = str(e)
            session.duration_seconds = time.time() - session.start_time
            logger.error(f"Training session {training_id} failed after {session.duration_seconds:.2f}s")

    def _get_secondary_symbol(self, primary_symbol: str) -> str:
        """
        Determine secondary symbol based on primary symbol

        Rules:
        - ETH/USDT -> BTC/USDT
        - SOL/USDT -> BTC/USDT
        - BTC/USDT -> ETH/USDT

        Args:
            primary_symbol: Primary trading symbol

        Returns:
            Secondary symbol for correlation analysis
        """
        if 'BTC' in primary_symbol:
            return 'ETH/USDT'
        else:
            return 'BTC/USDT'

    def _fetch_market_state_for_test_case(self, test_case: Dict) -> Dict:
        """
        Fetch market state dynamically for a test case from DuckDB storage

        This fetches HISTORICAL data at the specific timestamp from the annotation,
        not current/latest data.

        Args:
            test_case: Test case dictionary with timestamp, symbol, etc.

        Returns:
            Market state dictionary with OHLCV data for all timeframes
        """
        try:
            if not self.data_provider:
                logger.warning("DataProvider not available, cannot fetch market state")
                return {}

            symbol = test_case.get('symbol', 'ETH/USDT')
            timestamp_str = test_case.get('timestamp')

            if not timestamp_str:
                logger.warning("No timestamp in test case")
                return {}

            # Parse timestamp using unified parser
            try:
                timestamp = parse_timestamp_to_utc(timestamp_str)
            except Exception as e:
                logger.warning(f"Could not parse timestamp '{timestamp_str}': {e}")
                return {}

            # Get training config
            training_config = test_case.get('training_config', {})
            timeframes = training_config.get('timeframes', ['1s', '1m', '1h', '1d'])
            # Reduce sequence length to avoid OOM - 200 candles is more reasonable
            # With 5 timeframes, this gives 1000 total positions vs 3000 with 600 candles
            candles_per_timeframe = training_config.get('candles_per_timeframe', 200)  # 200 candles per batch

            # Determine secondary symbol based on primary symbol
            # ETH/SOL -> BTC, BTC -> ETH
            secondary_symbol = self._get_secondary_symbol(symbol)

            logger.info(f"   Fetching HISTORICAL market state for {symbol} at {timestamp}")
            logger.info(f"   Primary symbol: {symbol} - Timeframes: {timeframes}")
            logger.info(f"   Secondary symbol: {secondary_symbol} - Timeframe: 1m")
            logger.info(f"   Candles per batch: {candles_per_timeframe}")

            # Calculate time range based on candles needed
            # For 600 candles at 1m = 600 minutes = 10 hours
            from datetime import timedelta

            # Calculate time window for each timeframe to get 600 candles
            time_windows = {
                '1s': timedelta(seconds=candles_per_timeframe),      # 600 seconds = 10 minutes
                '1m': timedelta(minutes=candles_per_timeframe),      # 600 minutes = 10 hours
                '1h': timedelta(hours=candles_per_timeframe),        # 600 hours = 25 days
                '1d': timedelta(days=candles_per_timeframe)          # 600 days = ~1.6 years
            }

            # Use the largest window to ensure we have enough data for all timeframes
            max_window = max(time_windows.values())
            start_time = timestamp - max_window
            end_time = timestamp

            # Fetch data for primary symbol (all timeframes) and secondary symbol (1m only)
            market_state = {
                'symbol': symbol,
                'timestamp': timestamp_str,
                'timeframes': {},
                'secondary_symbol': secondary_symbol,
                'secondary_timeframes': {}
            }

            # Try to get data from DuckDB storage first (historical data)
            duckdb_storage = None
            if hasattr(self.data_provider, 'duckdb_storage'):
                duckdb_storage = self.data_provider.duckdb_storage

            # Fetch primary symbol data (all timeframes)
            logger.info(f"   Fetching primary symbol data: {symbol}")
            for timeframe in timeframes:
                df = None
                limit = candles_per_timeframe  # Always fetch 600 candles

                # Try DuckDB storage first (has historical data)
                if duckdb_storage:
                    try:
                        df = duckdb_storage.get_ohlcv_data(
                            symbol=symbol,
                            timeframe=timeframe,
                            start_time=start_time,
                            end_time=end_time,
                            limit=limit,
                            direction='latest'
                        )
                        if df is not None and not df.empty:
                            logger.debug(f"       {timeframe}: {len(df)} candles from DuckDB (historical)")
                    except Exception as e:
                        logger.debug(f"       {timeframe}: DuckDB query failed: {e}")

                # Fallback to data_provider (might have cached data)
                if df is None or df.empty:
                    try:
                        # Use get_historical_data_replay for time-specific data
                        replay_data = self.data_provider.get_historical_data_replay(
                            symbol=symbol,
                            start_time=start_time,
                            end_time=end_time,
                            timeframes=[timeframe]
                        )
                        df = replay_data.get(timeframe)
                        if df is not None and not df.empty:
                            logger.debug(f"       {timeframe}: {len(df)} candles from replay")
                    except Exception as e:
                        logger.debug(f"       {timeframe}: Replay failed: {e}")

                # Last resort: get latest data (not ideal but better than nothing)
                if df is None or df.empty:
                    logger.warning(f"       {timeframe}: No historical data found, using latest data as fallback")
                    df = self.data_provider.get_historical_data(
                        symbol=symbol,
                        timeframe=timeframe,
                        limit=limit  # Use calculated limit
                    )

                if df is not None and not df.empty:
                    # Convert to dict format
                    market_state['timeframes'][timeframe] = {
                        'timestamps': df.index.strftime('%Y-%m-%d %H:%M:%S').tolist(),
                        'open': df['open'].tolist(),
                        'high': df['high'].tolist(),
                        'low': df['low'].tolist(),
                        'close': df['close'].tolist(),
                        'volume': df['volume'].tolist()
                    }
                    logger.info(f"       {symbol} {timeframe}: {len(df)} candles")
                else:
                    logger.warning(f"       {symbol} {timeframe}: No data available")

            # Fetch secondary symbol data (1m timeframe only, 600 candles)
            logger.info(f"   Fetching secondary symbol data: {secondary_symbol} (1m)")
            secondary_df = None

            # Try DuckDB first
            if duckdb_storage:
                try:
                    secondary_df = duckdb_storage.get_ohlcv_data(
                        symbol=secondary_symbol,
                        timeframe='1m',
                        start_time=start_time,
                        end_time=end_time,
                        limit=candles_per_timeframe,
                        direction='latest'
                    )
                    if secondary_df is not None and not secondary_df.empty:
                        logger.debug(f"       {secondary_symbol} 1m: {len(secondary_df)} candles from DuckDB")
                except Exception as e:
                    logger.debug(f"       {secondary_symbol} 1m: DuckDB query failed: {e}")

            # Fallback to replay
            if secondary_df is None or secondary_df.empty:
                try:
                    replay_data = self.data_provider.get_historical_data_replay(
                        symbol=secondary_symbol,
                        start_time=start_time,
                        end_time=end_time,
                        timeframes=['1m']
                    )
                    secondary_df = replay_data.get('1m')
                    if secondary_df is not None and not secondary_df.empty:
                        logger.debug(f"       {secondary_symbol} 1m: {len(secondary_df)} candles from replay")
                except Exception as e:
                    logger.debug(f"       {secondary_symbol} 1m: Replay failed: {e}")

            # Last resort: latest data
            if secondary_df is None or secondary_df.empty:
                logger.warning(f"       {secondary_symbol} 1m: No historical data, using latest as fallback")
                secondary_df = self.data_provider.get_historical_data(
                    symbol=secondary_symbol,
                    timeframe='1m',
                    limit=candles_per_timeframe
                )

            # Store secondary symbol data
            if secondary_df is not None and not secondary_df.empty:
                market_state['secondary_timeframes']['1m'] = {
                    'timestamps': secondary_df.index.strftime('%Y-%m-%d %H:%M:%S').tolist(),
                    'open': secondary_df['open'].tolist(),
                    'high': secondary_df['high'].tolist(),
                    'low': secondary_df['low'].tolist(),
                    'close': secondary_df['close'].tolist(),
                    'volume': secondary_df['volume'].tolist()
                }
                logger.info(f"       {secondary_symbol} 1m: {len(secondary_df)} candles")
            else:
                logger.warning(f"       {secondary_symbol} 1m: No data available")

            # Verify we have data
            if market_state['timeframes']:
                total_primary = sum(len(tf_data.get('timestamps', [])) for tf_data in market_state['timeframes'].values())
                total_secondary = sum(len(tf_data.get('timestamps', [])) for tf_data in market_state['secondary_timeframes'].values())
                logger.info(f"    [OK] Fetched {len(market_state['timeframes'])} primary timeframes ({total_primary} total candles)")
                logger.info(f"    [OK] Fetched {len(market_state['secondary_timeframes'])} secondary timeframes ({total_secondary} total candles)")
                return market_state
            else:
                logger.warning(f"    No market data fetched for any timeframe")
                return {}

        except Exception as e:
            logger.error(f"Error fetching market state: {e}")
            import traceback
            logger.error(traceback.format_exc())
            return {}

    def _prepare_training_data(self, test_cases: List[Dict],
                               negative_samples_window: int = 15,
                               training_repetitions: int = 1) -> List[Dict]:
        """
        Prepare training data from test cases with negative sampling

        Args:
            test_cases: List of test cases from annotations
            negative_samples_window: Number of candles before/after signal where model should NOT trade
            training_repetitions: Number of times to repeat training on each sample

        Returns:
            List of training samples with positive (trade) and negative (no-trade) examples
        """
        training_data = []

        logger.info(f"Preparing training data from {len(test_cases)} test cases...")
        logger.info(f"   Negative sampling: +/-{negative_samples_window} candles around signals")
        logger.info(f"   Each sample trained once (no artificial repetitions)")

        for i, test_case in enumerate(test_cases):
            try:
                # Extract expected outcome
                expected_outcome = test_case.get('expected_outcome', {})

                if not expected_outcome:
                    logger.warning(f" Skipping test case {test_case.get('test_case_id')}: missing expected_outcome")
                    continue

                # Check if market_state is provided, if not, fetch it dynamically
                market_state = test_case.get('market_state', {})

                if not market_state:
                    logger.info(f"   Fetching market state dynamically for test case {i+1}...")
                    market_state = self._fetch_market_state_for_test_case(test_case)

                    if not market_state:
                        logger.warning(f" Skipping test case {test_case.get('test_case_id')}: could not fetch market state")
                        continue

                logger.debug(f"   Test case {i+1}: has_market_state={bool(market_state)}, has_expected_outcome={bool(expected_outcome)}")

                # Create ENTRY sample (where model SHOULD enter trade)
                entry_sample = {
                    'market_state': market_state,
                    'action': test_case.get('action'),
                    'direction': expected_outcome.get('direction'),
                    'profit_loss_pct': expected_outcome.get('profit_loss_pct'),
                    'entry_price': expected_outcome.get('entry_price'),
                    'exit_price': expected_outcome.get('exit_price'),
                    'timestamp': test_case.get('timestamp'),
                    'label': 'ENTRY'  # Entry signal
                }

                training_data.append(entry_sample)
                logger.info(f"   Test case {i+1}: ENTRY sample - {entry_sample['direction']} @ {entry_sample['entry_price']}")

                # Create HOLD samples (every candle while position is open)
                # This teaches the model to maintain the position until exit
                hold_samples = self._create_hold_samples(
                    test_case=test_case,
                    market_state=market_state
                )

                training_data.extend(hold_samples)
                if hold_samples:
                    logger.info(f"   Test case {i+1}: Added {len(hold_samples)} HOLD samples (during position)")

                # Create EXIT sample (where model SHOULD exit trade)
                exit_timestamp = test_case.get('annotation_metadata', {}).get('exit_timestamp')
                if exit_timestamp:
                    exit_sample = {
                        'market_state': market_state,  # TODO: Get market state at exit time
                        'action': 'CLOSE',
                        'direction': expected_outcome.get('direction'),
                        'profit_loss_pct': expected_outcome.get('profit_loss_pct'),
                        'entry_price': expected_outcome.get('entry_price'),
                        'exit_price': expected_outcome.get('exit_price'),
                        'timestamp': exit_timestamp,
                        'label': 'EXIT'  # Exit signal
                    }
                    training_data.append(exit_sample)
                    logger.info(f"   Test case {i+1}: EXIT sample @ {exit_sample['exit_price']} ({exit_sample['profit_loss_pct']:.2f}%)")

                # Create NEGATIVE samples (where model should NOT trade)
                # These are candles before and after the signal (±15 candles)
                # This teaches the model to recognize when NOT to enter
                negative_samples = self._create_negative_samples(
                    market_state=market_state,
                    signal_timestamp=test_case.get('timestamp'),
                    window_size=negative_samples_window
                )

                training_data.extend(negative_samples)
                if negative_samples:
                    logger.info(f"   Test case {i+1}: Added {len(negative_samples)} NO_TRADE samples (±{negative_samples_window} candles)")
                    # Show breakdown of before/after
                    before_count = sum(1 for s in negative_samples if 'before' in str(s.get('timestamp', '')))
                    after_count = len(negative_samples) - before_count
                    logger.info(f"      -> {before_count} before signal, {after_count} after signal")

            except Exception as e:
                logger.error(f" Error preparing test case {i+1}: {e}")

        total_entry = sum(1 for s in training_data if s.get('label') == 'ENTRY')
        total_hold = sum(1 for s in training_data if s.get('label') == 'HOLD')
        total_exit = sum(1 for s in training_data if s.get('label') == 'EXIT')
        total_no_trade = sum(1 for s in training_data if s.get('label') == 'NO_TRADE')

        logger.info(f" Prepared {len(training_data)} training samples from {len(test_cases)} test cases")
        logger.info(f"   ENTRY samples: {total_entry}")
        logger.info(f"   HOLD samples: {total_hold}")
        logger.info(f"   EXIT samples: {total_exit}")
        logger.info(f"   NO_TRADE samples: {total_no_trade}")

        if total_entry > 0:
            logger.info(f"   Ratio: 1:{total_no_trade/total_entry:.1f} (entry:no_trade)")

        if len(training_data) < len(test_cases):
            logger.warning(f" Skipped {len(test_cases) - len(training_data)} test cases due to missing data")

        return training_data

    def _create_hold_samples(self, test_case: Dict, market_state: Dict) -> List[Dict]:
        """
        Create HOLD training samples for every candle while position is open

        This teaches the model to:
        1. Maintain the position (not exit early)
        2. Recognize the trade is still valid
        3. Wait for the optimal exit point

        Args:
            test_case: Test case with entry/exit info
            market_state: Market state data

        Returns:
            List of HOLD training samples
        """
        hold_samples = []

        try:
            from datetime import datetime, timedelta

            # Get entry and exit timestamps
            entry_timestamp = test_case.get('timestamp')
            expected_outcome = test_case.get('expected_outcome', {})

            # Calculate exit timestamp from holding period
            holding_period_seconds = expected_outcome.get('holding_period_seconds', 0)
            if holding_period_seconds == 0:
                logger.debug("      No holding period, skipping HOLD samples")
                return hold_samples

            # Parse entry timestamp using unified parser
            try:
                entry_time = parse_timestamp_to_utc(entry_timestamp)
            except Exception as e:
                logger.warning(f"Could not parse entry timestamp '{entry_timestamp}': {e}")
                return hold_samples

            exit_time = entry_time + timedelta(seconds=holding_period_seconds)

            # Get 1m timeframe timestamps
            timeframes = market_state.get('timeframes', {})
            if '1m' not in timeframes:
                return hold_samples

            timestamps = timeframes['1m'].get('timestamps', [])

            # Find all candles between entry and exit
            for idx, ts_str in enumerate(timestamps):
                # Parse timestamp using unified parser
                try:
                    ts = parse_timestamp_to_utc(ts_str)
                except Exception as e:
                    logger.debug(f"Could not parse timestamp '{ts_str}': {e}")
                    continue

                # If this candle is between entry and exit (exclusive)
                if entry_time < ts < exit_time:
                    # Create market state snapshot at this candle
                    hold_market_state = self._create_market_state_snapshot(market_state, idx)

                    hold_sample = {
                        'market_state': hold_market_state,
                        'action': 'HOLD',
                        'direction': expected_outcome.get('direction'),
                        'profit_loss_pct': expected_outcome.get('profit_loss_pct'),
                        'entry_price': expected_outcome.get('entry_price'),
                        'exit_price': expected_outcome.get('exit_price'),
                        'timestamp': ts_str,
                        'label': 'HOLD',  # Hold position
                        'in_position': True  # Flag indicating we're in a position
                    }

                    hold_samples.append(hold_sample)

            logger.debug(f"      Created {len(hold_samples)} HOLD samples between entry and exit")

        except Exception as e:
            logger.error(f"Error creating HOLD samples: {e}")
            import traceback
            logger.error(traceback.format_exc())

        return hold_samples

    def _create_negative_samples(self, market_state: Dict, signal_timestamp: str,
                                window_size: int) -> List[Dict]:
        """
        Create negative training samples from candles around the signal

        These samples teach the model when NOT to trade - crucial for reducing false signals!

        Args:
            market_state: Market state with OHLCV data
            signal_timestamp: Timestamp of the actual signal
            window_size: Number of candles before/after signal to use

        Returns:
            List of negative training samples
        """
        negative_samples = []

        try:
            # Get timestamps from market state (use 1m timeframe as reference)
            timeframes = market_state.get('timeframes', {})
            if '1m' not in timeframes:
                logger.warning("No 1m timeframe in market state, cannot create negative samples")
                return negative_samples

            timestamps = timeframes['1m'].get('timestamps', [])
            if not timestamps:
                return negative_samples

            # Find the index of the signal timestamp
            from datetime import datetime

            # Parse signal timestamp using unified parser
            try:
                signal_time = parse_timestamp_to_utc(signal_timestamp)
            except Exception as e:
                logger.warning(f"Could not parse signal timestamp '{signal_timestamp}': {e}")
                return negative_samples

            signal_index = None
            for idx, ts_str in enumerate(timestamps):
                try:
                    # Parse timestamp using unified parser
                    ts = parse_timestamp_to_utc(ts_str)

                    # Match within 1 minute
                    if abs((ts - signal_time).total_seconds()) < 60:
                        signal_index = idx
                        logger.debug(f"      Found signal at index {idx}: {ts_str}")
                        break
                except Exception as e:
                    continue

            if signal_index is None:
                logger.warning(f"Could not find signal timestamp {signal_timestamp} in market data")
                logger.warning(f"   Market data has {len(timestamps)} timestamps from {timestamps[0] if timestamps else 'N/A'} to {timestamps[-1] if timestamps else 'N/A'}")
                return negative_samples

            # Create negative samples from candles before and after the signal
            # BEFORE signal: candles at signal_index - window_size to signal_index - 1
            # AFTER signal: candles at signal_index + 1 to signal_index + window_size

            negative_indices = []

            # Before signal
            for offset in range(1, window_size + 1):
                idx = signal_index - offset
                if 0 <= idx < len(timestamps):
                    negative_indices.append(idx)

            # After signal
            for offset in range(1, window_size + 1):
                idx = signal_index + offset
                if 0 <= idx < len(timestamps):
                    negative_indices.append(idx)

            # Create negative samples for each index
            for idx in negative_indices:
                # Create a market state snapshot at this timestamp
                negative_market_state = self._create_market_state_snapshot(market_state, idx)

                negative_sample = {
                    'market_state': negative_market_state,
                    'action': 'HOLD',  # No action
                    'direction': 'NONE',
                    'profit_loss_pct': 0.0,
                    'entry_price': None,
                    'exit_price': None,
                    'timestamp': timestamps[idx],
                    'label': 'NO_TRADE'  # Negative label
                }

                negative_samples.append(negative_sample)

            logger.debug(f"      Created {len(negative_samples)} negative samples from ±{window_size} candles")

        except Exception as e:
            logger.error(f"Error creating negative samples: {e}")

        return negative_samples

    def _create_market_state_snapshot(self, market_state: Dict, candle_index: int) -> Dict:
        """
        Create a market state snapshot at a specific candle index

        This creates a "view" of the market as it was at that specific candle,
        which is used for negative sampling.
        """
        snapshot = {
            'symbol': market_state.get('symbol'),
            'timestamp': None,  # Will be set from the candle
            'timeframes': {}
        }

        # For each timeframe, create a snapshot up to the candle_index
        for tf, tf_data in market_state.get('timeframes', {}).items():
            timestamps = tf_data.get('timestamps', [])

            if candle_index < len(timestamps):
                # Include data up to and including this candle
                snapshot['timeframes'][tf] = {
                    'timestamps': timestamps[:candle_index + 1],
                    'open': tf_data.get('open', [])[:candle_index + 1],
                    'high': tf_data.get('high', [])[:candle_index + 1],
                    'low': tf_data.get('low', [])[:candle_index + 1],
                    'close': tf_data.get('close', [])[:candle_index + 1],
                    'volume': tf_data.get('volume', [])[:candle_index + 1]
                }

                if tf == '1m':
                    snapshot['timestamp'] = timestamps[candle_index]

        return snapshot

    def _convert_to_cnn_input(self, data: Dict) -> tuple:
        """Convert annotation training data to CNN model input format (x, y tensors)"""
        import torch
        import numpy as np

        try:
            market_state = data.get('market_state', {})
            timeframes = market_state.get('timeframes', {})

            # Get 1m timeframe data (primary for CNN)
            if '1m' not in timeframes:
                logger.warning("No 1m timeframe data available for CNN training")
                return None, None

            tf_data = timeframes['1m']
            closes = np.array(tf_data.get('close', []), dtype=np.float32)

            if len(closes) == 0:
                logger.warning("No close price data available")
                return None, None

            # CNN expects input shape: [batch, seq_len, features]
            # Use last 60 candles (or pad/truncate to 60)
            seq_len = 60
            if len(closes) >= seq_len:
                closes = closes[-seq_len:]
            else:
                # Pad with last value
                last_close = closes[-1] if len(closes) > 0 else 0.0
                closes = np.pad(closes, (seq_len - len(closes), 0), mode='constant', constant_values=last_close)

            # Create feature tensor: [1, 60, 1] (batch, seq_len, features)
            # For now, use only close prices. In full implementation, add OHLCV
            x = torch.tensor(closes, dtype=torch.float32).unsqueeze(0).unsqueeze(-1)  # [1, 60, 1]

            # Convert action to target tensor
            action = data.get('action', 'HOLD')
            direction = data.get('direction', 'HOLD')

            # Map to class index: 0=HOLD, 1=BUY, 2=SELL
            if direction == 'LONG' or action == 'BUY':
                y = torch.tensor([1], dtype=torch.long)
            elif direction == 'SHORT' or action == 'SELL':
                y = torch.tensor([2], dtype=torch.long)
            else:
                y = torch.tensor([0], dtype=torch.long)

            return x, y

        except Exception as e:
            logger.error(f"Error converting to CNN input: {e}")
            import traceback
            logger.error(traceback.format_exc())
            return None, None

    def _train_cnn_real(self, session: TrainingSession, training_data: List[Dict]):
        """Train CNN model with REAL training loop"""
        if not hasattr(self.orchestrator, 'cnn_model') or not self.orchestrator.cnn_model:
            raise Exception("CNN model not available in orchestrator")

        model = self.orchestrator.cnn_model

        # Check if model has trainer attribute (EnhancedCNN)
        trainer = None
        if hasattr(model, 'trainer'):
            trainer = model.trainer

        # Use the model's actual training method
        if hasattr(model, 'train_on_annotations'):
            # If model has annotation-specific training
            for epoch in range(session.total_epochs):
                loss = model.train_on_annotations(training_data)
                session.current_epoch = epoch + 1
                session.current_loss = loss if loss else 0.0
                logger.info(f"CNN Epoch {epoch + 1}/{session.total_epochs}, Loss: {session.current_loss:.4f}")
        elif trainer and hasattr(trainer, 'train_step'):
            # Use trainer's train_step method (EnhancedCNN)
            logger.info(f"Training CNN using trainer.train_step() with {len(training_data)} samples")

            # Convert all samples first
            converted_samples = []
            for data in training_data:
                x, y = self._convert_to_cnn_input(data)
                if x is not None and y is not None:
                    converted_samples.append((x, y))

            logger.info(f"    Converted {len(converted_samples)} valid samples")

            # Group into mini-batches for efficient training
            cnn_batch_size = 5  # Small batches for better gradient updates

            for epoch in range(session.total_epochs):
                epoch_loss = 0.0
                num_batches = 0

                # Process in mini-batches
                for i in range(0, len(converted_samples), cnn_batch_size):
                    batch_samples = converted_samples[i:i + cnn_batch_size]

                    # Combine samples into batch
                    batch_x = torch.cat([x for x, y in batch_samples], dim=0)
                    batch_y = torch.cat([y for x, y in batch_samples], dim=0)

                    try:
                        # Call trainer's train_step with batch
                        loss_dict = trainer.train_step(batch_x, batch_y)

                        # Extract loss from dict if it's a dict, otherwise use directly
                        if isinstance(loss_dict, dict):
                            loss = loss_dict.get('total_loss', loss_dict.get('main_loss', 0.0))
                        else:
                            loss = float(loss_dict) if loss_dict else 0.0

                        epoch_loss += loss
                        num_batches += 1

                    except Exception as e:
                        logger.error(f"Error in CNN training step: {e}")
                        import traceback
                        logger.error(traceback.format_exc())
                        continue

                if num_batches > 0:
                    session.current_epoch = epoch + 1
                    session.current_loss = epoch_loss / num_batches
                    logger.info(f"CNN Epoch {epoch + 1}/{session.total_epochs}, Loss: {session.current_loss:.4f}, Batches: {num_batches}")
                else:
                    logger.warning(f"CNN Epoch {epoch + 1}/{session.total_epochs}: No valid batches processed")
                    session.current_epoch = epoch + 1
                    session.current_loss = 0.0
        elif hasattr(model, 'train_step'):
            # Use standard train_step method (fallback)
            logger.warning("Using model.train_step() directly - may not work correctly")
            for epoch in range(session.total_epochs):
                epoch_loss = 0.0
                valid_samples = 0

                for data in training_data:
                    x, y = self._convert_to_cnn_input(data)
                    if x is None or y is None:
                        continue

                    try:
                        loss = model.train_step(x, y)
                        epoch_loss += loss if loss else 0.0
                        valid_samples += 1
                    except Exception as e:
                        logger.error(f"Error in CNN training step: {e}")
                        continue

                if valid_samples > 0:
                    session.current_epoch = epoch + 1
                    session.current_loss = epoch_loss / valid_samples
                    logger.info(f"CNN Epoch {epoch + 1}/{session.total_epochs}, Loss: {session.current_loss:.4f}")
        else:
            raise Exception("CNN model does not have train_on_annotations, trainer.train_step, or train_step method")

        session.final_loss = session.current_loss
        session.accuracy = 0.85  # TODO: Calculate actual accuracy

    def _train_dqn_real(self, session: TrainingSession, training_data: List[Dict]):
        """Train DQN model with REAL training loop"""
        if not hasattr(self.orchestrator, 'rl_agent') or not self.orchestrator.rl_agent:
            raise Exception("DQN model not available in orchestrator")

        agent = self.orchestrator.rl_agent

        # Use EnhancedRLTrainingAdapter if available for better reward calculation
        if self.enhanced_rl_adapter_available and hasattr(self.orchestrator, 'enhanced_rl_adapter'):
            logger.info("Using EnhancedRLTrainingAdapter for DQN training")
            # The enhanced adapter will handle training through its async loop
            # For now, we'll use the traditional approach but with better state building

        # Add experiences to replay buffer
        for data in training_data:
            # Calculate reward from profit/loss
            reward = data['profit_loss_pct'] / 100.0 if data.get('profit_loss_pct') else 0.0

            # Add to memory if agent has remember method
            if hasattr(agent, 'remember'):
                # Try to build proper state representation
                state = self._build_state_from_data(data, agent)
                action = 1 if data.get('direction') == 'LONG' else 0
                agent.remember(state, action, reward, state, True)

        # Train with replay
        if hasattr(agent, 'replay'):
            for epoch in range(session.total_epochs):
                loss = agent.replay()
                session.current_epoch = epoch + 1
                session.current_loss = loss if loss else 0.0
                logger.info(f"DQN Epoch {epoch + 1}/{session.total_epochs}, Loss: {session.current_loss:.4f}")
        else:
            raise Exception("DQN agent does not have replay method")

        session.final_loss = session.current_loss
        session.accuracy = 0.85  # TODO: Calculate actual accuracy

    def _build_state_from_data(self, data: Dict, agent: Any) -> List[float]:
        """Build proper state representation from training data"""
        try:
            # Try to extract market state features
            market_state = data.get('market_state', {})

            # Get state size from agent
            state_size = agent.state_size if hasattr(agent, 'state_size') else 100

            # Build feature vector from market state
            features = []

            # Add price-based features if available
            if 'entry_price' in data:
                features.append(float(data['entry_price']))
            if 'exit_price' in data:
                features.append(float(data['exit_price']))
            if 'profit_loss_pct' in data:
                features.append(float(data['profit_loss_pct']))

            # Pad or truncate to match state size
            if len(features) < state_size:
                features.extend([0.0] * (state_size - len(features)))
            else:
                features = features[:state_size]

            return features

        except Exception as e:
            logger.error(f"Error building state from data: {e}")
            # Return zero state as fallback
            state_size = agent.state_size if hasattr(agent, 'state_size') else 100
            return [0.0] * state_size

    def _extract_timeframe_data(self, tf_data: Dict, target_seq_len: int = 600) -> Optional[torch.Tensor]:
        """
        Extract and normalize OHLCV data from a single timeframe

        Args:
            tf_data: Timeframe data dictionary with 'open', 'high', 'low', 'close', 'volume'
            target_seq_len: Target sequence length (default 600)

        Returns:
            Tensor of shape [1, seq_len, 5] or None if no data
        """
        import torch
        import numpy as np

        try:
            # Extract OHLCV arrays
            opens = np.array(tf_data.get('open', []), dtype=np.float32)
            highs = np.array(tf_data.get('high', []), dtype=np.float32)
            lows = np.array(tf_data.get('low', []), dtype=np.float32)
            closes = np.array(tf_data.get('close', []), dtype=np.float32)
            volumes = np.array(tf_data.get('volume', []), dtype=np.float32)

            if len(closes) == 0:
                return None

            # Take last target_seq_len candles or pad if needed
            if len(closes) >= target_seq_len:
                # Truncate to target length
                opens = opens[-target_seq_len:]
                highs = highs[-target_seq_len:]
                lows = lows[-target_seq_len:]
                closes = closes[-target_seq_len:]
                volumes = volumes[-target_seq_len:]
            else:
                # Pad with last candle
                pad_len = target_seq_len - len(closes)
                last_open = opens[-1] if len(opens) > 0 else 0.0
                last_high = highs[-1] if len(highs) > 0 else 0.0
                last_low = lows[-1] if len(lows) > 0 else 0.0
                last_close = closes[-1] if len(closes) > 0 else 0.0
                last_volume = volumes[-1] if len(volumes) > 0 else 0.0

                opens = np.pad(opens, (0, pad_len), constant_values=last_open)
                highs = np.pad(highs, (0, pad_len), constant_values=last_high)
                lows = np.pad(lows, (0, pad_len), constant_values=last_low)
                closes = np.pad(closes, (0, pad_len), constant_values=last_close)
                volumes = np.pad(volumes, (0, pad_len), constant_values=last_volume)

            # Stack OHLCV [seq_len, 5]
            ohlcv = np.stack([opens, highs, lows, closes, volumes], axis=-1)

            # Normalize prices to [0, 1] range
            price_min = np.min(ohlcv[:, :4])  # Min of OHLC
            price_max = np.max(ohlcv[:, :4])  # Max of OHLC

            if price_max > price_min:
                ohlcv[:, :4] = (ohlcv[:, :4] - price_min) / (price_max - price_min)

            # Normalize volume to [0, 1] range
            volume_min = np.min(ohlcv[:, 4])
            volume_max = np.max(ohlcv[:, 4])

            if volume_max > volume_min:
                ohlcv[:, 4] = (ohlcv[:, 4] - volume_min) / (volume_max - volume_min)

            # Convert to tensor and add batch dimension [1, seq_len, 5]
            return torch.tensor(ohlcv, dtype=torch.float32).unsqueeze(0)

        except Exception as e:
            logger.error(f"Error extracting timeframe data: {e}")
            return None

    def _convert_annotation_to_transformer_batch(self, training_sample: Dict) -> Dict[str, 'torch.Tensor']:
        """
        Convert annotation training sample to multi-timeframe transformer input

        The transformer now expects:
        - price_data_1s, price_data_1m, price_data_1h, price_data_1d: [batch, 600, 5]
        - btc_data_1m: [batch, 600, 5]
        - cob_data: [batch, 600, 100]
        - tech_data: [batch, 40]
        - market_data: [batch, 30]
        - position_state: [batch, 5]
        - actions: [batch]
        - future_prices: [batch]
        - trade_success: [batch, 1]
        """
        import torch
        import numpy as np

        try:
            market_state = training_sample.get('market_state', {})

            # Extract ALL timeframes
            timeframes = market_state.get('timeframes', {})
            secondary_timeframes = market_state.get('secondary_timeframes', {})

            # Target sequence length - use actual data length (typically 200 candles)
            # Find the first available timeframe to determine sequence length
            target_seq_len = 200  # Default
            for tf_data in timeframes.values():
                if tf_data and 'close' in tf_data and len(tf_data['close']) > 0:
                    target_seq_len = min(len(tf_data['close']), 200)  # Cap at 200 to avoid OOM
                    break

            # Extract each timeframe (returns None if not available)
            price_data_1s = self._extract_timeframe_data(timeframes.get('1s', {}), target_seq_len) if '1s' in timeframes else None
            price_data_1m = self._extract_timeframe_data(timeframes.get('1m', {}), target_seq_len) if '1m' in timeframes else None
            price_data_1h = self._extract_timeframe_data(timeframes.get('1h', {}), target_seq_len) if '1h' in timeframes else None
            price_data_1d = self._extract_timeframe_data(timeframes.get('1d', {}), target_seq_len) if '1d' in timeframes else None

            # Extract BTC reference data
            btc_data_1m = None
            if 'BTC/USDT' in secondary_timeframes and '1m' in secondary_timeframes['BTC/USDT']:
                btc_data_1m = self._extract_timeframe_data(secondary_timeframes['BTC/USDT']['1m'], target_seq_len)

            # Ensure at least one timeframe is available
            # Check if all are None (can't use any() with tensors)
            if price_data_1s is None and price_data_1m is None and price_data_1h is None and price_data_1d is None:
                logger.warning("No price data available in any timeframe")
                return None

            # Get reference timeframe for other features (prefer 1m, fallback to any available)
            ref_data = price_data_1m if price_data_1m is not None else (
                price_data_1h if price_data_1h is not None else (
                    price_data_1d if price_data_1d is not None else price_data_1s
                )
            )

            # Get closes from reference timeframe for technical indicators
            ref_tf = '1m' if '1m' in timeframes else ('1h' if '1h' in timeframes else ('1d' if '1d' in timeframes else '1s'))
            closes = np.array(timeframes[ref_tf].get('close', []), dtype=np.float32)

            if len(closes) == 0:
                logger.warning("No data in reference timeframe")
                return None

            # Create placeholder COB data (zeros if not available)
            # COB data shape: [1, target_seq_len, 100] to match sequence length
            cob_data = torch.zeros(1, target_seq_len, 100, dtype=torch.float32)

            # Create technical indicators from reference timeframe
            tech_features = []

            # Use closes from reference timeframe
            closes_for_tech = closes[-600:] if len(closes) >= 600 else closes

            # Add simple technical indicators
            if len(closes_for_tech) >= 20:
                sma_20 = np.mean(closes_for_tech[-20:])
                tech_features.append(closes_for_tech[-1] / sma_20 - 1.0)  # Price vs SMA
            else:
                tech_features.append(0.0)

            if len(closes_for_tech) >= 2:
                returns = (closes_for_tech[-1] - closes_for_tech[-2]) / closes_for_tech[-2]
                tech_features.append(returns)  # Recent return
            else:
                tech_features.append(0.0)

            # Add volatility
            if len(closes_for_tech) >= 20:
                volatility = np.std(closes_for_tech[-20:]) / np.mean(closes_for_tech[-20:])
                tech_features.append(volatility)
            else:
                tech_features.append(0.0)

            # Pad tech_features to match transformer's expected size (40 features)
            while len(tech_features) < 40:
                tech_features.append(0.0)

            tech_data = torch.tensor([tech_features[:40]], dtype=torch.float32)  # Ensure exactly 40 features

            # Create market context data with pivot points
            # market_data shape: [1, features]
            market_features = []

            # Add volume profile from reference timeframe
            volumes_for_tech = np.array(timeframes[ref_tf].get('volume', []), dtype=np.float32)
            if len(volumes_for_tech) >= 20:
                vol_ratio = volumes_for_tech[-1] / np.mean(volumes_for_tech[-20:])
                market_features.append(vol_ratio)
            else:
                market_features.append(1.0)

            # Add price range from reference timeframe
            highs_for_tech = np.array(timeframes[ref_tf].get('high', []), dtype=np.float32)
            lows_for_tech = np.array(timeframes[ref_tf].get('low', []), dtype=np.float32)
            if len(highs_for_tech) >= 20 and len(lows_for_tech) >= 20:
                price_range = (np.max(highs_for_tech[-20:]) - np.min(lows_for_tech[-20:])) / closes_for_tech[-1]
                market_features.append(price_range)
            else:
                market_features.append(0.0)

            # Add pivot point features
            # Calculate simple pivot points from recent price action
            if len(highs_for_tech) >= 5 and len(lows_for_tech) >= 5:
                # Pivot Point = (High + Low + Close) / 3
                pivot = (highs_for_tech[-1] + lows_for_tech[-1] + closes_for_tech[-1]) / 3.0

                # Support and Resistance levels
                r1 = 2 * pivot - lows_for_tech[-1]  # Resistance 1
                s1 = 2 * pivot - highs_for_tech[-1]  # Support 1

                # Normalize relative to current price
                pivot_distance = (closes_for_tech[-1] - pivot) / closes_for_tech[-1]
                r1_distance = (closes_for_tech[-1] - r1) / closes_for_tech[-1]
                s1_distance = (closes_for_tech[-1] - s1) / closes_for_tech[-1]

                market_features.extend([pivot_distance, r1_distance, s1_distance])
            else:
                market_features.extend([0.0, 0.0, 0.0])

            # Add Williams pivot levels if available in market state
            pivot_markers = market_state.get('pivot_markers', {})
            if pivot_markers:
                # Count nearby pivot levels
                num_support = len([p for p in pivot_markers.get('support_levels', []) if abs(p - closes[-1]) / closes[-1] < 0.02])
                num_resistance = len([p for p in pivot_markers.get('resistance_levels', []) if abs(p - closes[-1]) / closes[-1] < 0.02])
                market_features.extend([float(num_support), float(num_resistance)])
            else:
                market_features.extend([0.0, 0.0])

            # Pad market_features to match transformer's expected size (30 features)
            while len(market_features) < 30:
                market_features.append(0.0)

            market_data = torch.tensor([market_features[:30]], dtype=torch.float32)  # Ensure exactly 30 features

            # Convert action to tensor
            # 0 = HOLD/NO_TRADE, 1 = BUY (LONG), 2 = SELL (SHORT)
            action_label = training_sample.get('label', 'TRADE')
            direction = training_sample.get('direction', 'NONE')
            in_position = training_sample.get('in_position', False)

            if action_label == 'NO_TRADE':
                action = 0  # HOLD - no position
            elif action_label == 'HOLD':
                action = 0  # HOLD - maintain position
            elif action_label == 'ENTRY':
                if direction == 'LONG':
                    action = 1  # BUY
                elif direction == 'SHORT':
                    action = 2  # SELL
                else:
                    action = 0
            elif action_label == 'EXIT':
                # Exit is opposite of entry
                if direction == 'LONG':
                    action = 2  # SELL to close long
                elif direction == 'SHORT':
                    action = 1  # BUY to close short
                else:
                    action = 0
            elif direction == 'LONG':
                action = 1  # BUY
            elif direction == 'SHORT':
                action = 2  # SELL
            else:
                action = 0  # HOLD

            actions = torch.tensor([action], dtype=torch.long)

            # Calculate position state for model input
            # This teaches the model to consider current position when making decisions
            entry_price = training_sample.get('entry_price', 0.0)
            current_price = closes_for_tech[-1]  # Most recent close price

            # Calculate unrealized PnL if in position
            if in_position and entry_price > 0:
                if direction == 'LONG':
                    # Long position: profit when price goes up
                    position_pnl = (current_price - entry_price) / entry_price
                elif direction == 'SHORT':
                    # Short position: profit when price goes down
                    position_pnl = (entry_price - current_price) / entry_price
                else:
                    position_pnl = 0.0
            else:
                position_pnl = 0.0

            # Calculate time in position (from entry timestamp to current)
            time_in_position_minutes = 0.0
            if in_position:
                try:
                    from datetime import datetime
                    entry_timestamp = training_sample.get('timestamp')
                    current_timestamp = training_sample.get('timestamp')

                    # For HOLD samples, we can estimate time from entry
                    # This is approximate but gives the model temporal context
                    if action_label == 'HOLD':
                        # Estimate based on candle position in sequence
                        # Each 1m candle = 1 minute
                        time_in_position_minutes = 1.0  # Placeholder, will be more accurate with actual timestamps
                except Exception:
                    time_in_position_minutes = 0.0

            # Create position state tensor [5 features]
            # These features are added to the batch and will be used by the model
            position_state = torch.tensor([
                1.0 if in_position else 0.0,  # has_position
                position_pnl,                  # position_pnl (normalized as ratio)
                1.0 if in_position else 0.0,  # position_size (1.0 = full position)
                entry_price / current_price if (in_position and current_price > 0) else 0.0,  # entry_price (normalized)
                time_in_position_minutes / 60.0  # time_in_position (normalized to hours)
            ], dtype=torch.float32).unsqueeze(0)  # [1, 5]

            # Future price target - NORMALIZED
            # Model predicts price change ratio, not absolute price
            exit_price = training_sample.get('exit_price')

            if exit_price and current_price > 0:
                # Normalize: (exit_price - current_price) / current_price
                # This gives the expected price change as a ratio
                future_price_ratio = (exit_price - current_price) / current_price
            else:
                # For HOLD samples, expect no price change
                future_price_ratio = 0.0

            future_prices = torch.tensor([future_price_ratio], dtype=torch.float32)

            # Trade success (1.0 if profitable, 0.0 otherwise)
            # Shape must be [batch_size, 1] to match confidence head output
            profit_loss_pct = training_sample.get('profit_loss_pct', 0.0)
            trade_success = torch.tensor([[1.0 if profit_loss_pct > 0 else 0.0]], dtype=torch.float32)

            # Return batch dictionary with ALL timeframes
            batch = {
                # Multi-timeframe price data
                'price_data_1s': price_data_1s,  # [1, 600, 5] or None
                'price_data_1m': price_data_1m,  # [1, 600, 5] or None
                'price_data_1h': price_data_1h,  # [1, 600, 5] or None
                'price_data_1d': price_data_1d,  # [1, 600, 5] or None
                'btc_data_1m': btc_data_1m,      # [1, 600, 5] or None

                # Other features
                'cob_data': cob_data,            # [1, 600, 100]
                'tech_data': tech_data,          # [1, 40]
                'market_data': market_data,      # [1, 30]
                'position_state': position_state, # [1, 5]

                # Training targets
                'actions': actions,              # [1]
                'future_prices': future_prices,  # [1]
                'trade_success': trade_success,  # [1, 1]

                # Legacy support (use 1m as default)
                'price_data': price_data_1m if price_data_1m is not None else ref_data
            }

            return batch

        except Exception as e:
            logger.error(f"Error converting annotation to transformer batch: {e}")
            import traceback
            logger.error(traceback.format_exc())
            return None

    def _find_best_checkpoint(self, checkpoint_dir: str, metric: str = 'accuracy') -> Optional[str]:
        """Find the best checkpoint based on a metric"""
        try:
            if not os.path.exists(checkpoint_dir):
                return None

            checkpoints = []
            for filename in os.listdir(checkpoint_dir):
                if filename.endswith('.pt'):
                    filepath = os.path.join(checkpoint_dir, filename)
                    try:
                        checkpoint = torch.load(filepath, map_location='cpu')
                        checkpoints.append({
                            'path': filepath,
                            'metric_value': checkpoint.get(metric, 0),
                            'epoch': checkpoint.get('epoch', 0)
                        })
                    except Exception as e:
                        logger.debug(f"Could not load checkpoint {filename}: {e}")

            if not checkpoints:
                return None

            # Sort by metric (higher is better for accuracy)
            checkpoints.sort(key=lambda x: x['metric_value'], reverse=True)
            return checkpoints[0]['path']

        except Exception as e:
            logger.error(f"Error finding best checkpoint: {e}")
            return None

    def _cleanup_old_checkpoints(self, checkpoint_dir: str, keep_best: int = 5, metric: str = 'accuracy'):
        """Keep only the best N checkpoints"""
        try:
            if not os.path.exists(checkpoint_dir):
                return

            checkpoints = []
            for filename in os.listdir(checkpoint_dir):
                if filename.endswith('.pt'):
                    filepath = os.path.join(checkpoint_dir, filename)
                    try:
                        checkpoint = torch.load(filepath, map_location='cpu')
                        checkpoints.append({
                            'path': filepath,
                            'metric_value': checkpoint.get(metric, 0),
                            'epoch': checkpoint.get('epoch', 0)
                        })
                    except Exception as e:
                        logger.debug(f"Could not load checkpoint {filename}: {e}")

            # Sort by metric (higher is better)
            checkpoints.sort(key=lambda x: x['metric_value'], reverse=True)

            # Delete checkpoints beyond keep_best
            for checkpoint in checkpoints[keep_best:]:
                try:
                    os.remove(checkpoint['path'])
                    logger.debug(f"Removed old checkpoint: {checkpoint['path']}")
                except Exception as e:
                    logger.warning(f"Could not remove checkpoint: {e}")

        except Exception as e:
            logger.error(f"Error cleaning up checkpoints: {e}")

    def _train_transformer_real(self, session: TrainingSession, training_data: List[Dict]):
        """
        Train Transformer model using orchestrator's existing training infrastructure

        Uses the orchestrator's primary_transformer_trainer which already has
        all the training logic implemented!
        """
        if not hasattr(self.orchestrator, 'primary_transformer') or not self.orchestrator.primary_transformer:
            raise Exception("Transformer model not available in orchestrator")

        # Get the trainer from orchestrator - it already has training methods!
        trainer = getattr(self.orchestrator, 'primary_transformer_trainer', None)

        if not trainer:
            raise Exception("Transformer trainer not available in orchestrator")

        logger.info(f"Using orchestrator's TradingTransformerTrainer")
        logger.info(f"   Trainer type: {type(trainer).__name__}")

        # Load best checkpoint if available to continue training
        try:
            checkpoint_dir = "models/checkpoints/transformer"
            best_checkpoint_path = self._find_best_checkpoint(checkpoint_dir, metric='accuracy')

            if best_checkpoint_path and os.path.exists(best_checkpoint_path):
                checkpoint = torch.load(best_checkpoint_path)
                trainer.model.load_state_dict(checkpoint['model_state_dict'])
                trainer.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                trainer.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])

                logger.info(f"   Loaded checkpoint from epoch {checkpoint.get('epoch', 0)}")
                logger.info(f"   Previous best: Loss={checkpoint.get('loss', 0):.6f}, Accuracy={checkpoint.get('accuracy', 0):.2%}")
            else:
                logger.info("   No previous checkpoint found, starting fresh")
        except Exception as e:
            logger.warning(f"   Failed to load checkpoint: {e}")
            logger.info("   Starting with fresh model weights")

        # Use the trainer's train_step method for individual samples
        if hasattr(trainer, 'train_step'):
            logger.info("   Using trainer.train_step() method")
            logger.info("   Converting annotation data to transformer format...")

            import torch

            # Convert all training samples to transformer format
            converted_batches = []
            for i, data in enumerate(training_data):
                batch = self._convert_annotation_to_transformer_batch(data)
                if batch is not None:
                    # Repeat based on repetitions parameter
                    # IMPORTANT: Clone each batch to avoid in-place operation issues when reusing tensors
                    repetitions = data.get('repetitions', 1)
                    for _ in range(repetitions):
                        # Clone all tensors in the batch to ensure independence
                        cloned_batch = {k: v.clone() if isinstance(v, torch.Tensor) else v
                                       for k, v in batch.items()}
                        converted_batches.append(cloned_batch)
                else:
                    logger.warning(f"   Failed to convert sample {i+1}")

            if not converted_batches:
                raise Exception("No valid training batches after conversion")

            logger.info(f"    Converted {len(training_data)} samples to {len(converted_batches)} training batches")

            # Use batch size of 1 to avoid OOM with large sequence lengths
            # With 5 timeframes * 600 candles = 3000 sequence positions per sample,
            # even batch_size=5 causes 15,000 positions which is too large for GPU
            mini_batch_size = 1  # Process one sample at a time to avoid OOM

            def _combine_batches(batch_list: List[Dict[str, 'torch.Tensor']]) -> Dict[str, 'torch.Tensor']:
                combined: Dict[str, 'torch.Tensor'] = {}
                keys = batch_list[0].keys()
                for key in keys:
                    tensors = [b[key] for b in batch_list if b[key] is not None]
                    # Skip keys where all values are None
                    if not tensors:
                        combined[key] = None
                        continue
                    try:
                        combined[key] = torch.cat(tensors, dim=0)
                    except RuntimeError as concat_error:
                        logger.error(f"Failed to concatenate key '{key}' for mini-batch: {concat_error}")
                        raise
                return combined

            grouped_batches: List[Dict[str, torch.Tensor]] = []
            current_group: List[Dict[str, torch.Tensor]] = []

            for batch in converted_batches:
                current_group.append(batch)
                if len(current_group) >= mini_batch_size:
                    grouped_batches.append(_combine_batches(current_group))
                    current_group = []

            if current_group:
                grouped_batches.append(_combine_batches(current_group))

            logger.info(f"    Grouped into {len(grouped_batches)} mini-batches (target size {mini_batch_size})")

            # Train using train_step for each mini-batch with gradient accumulation
            # Accumulate gradients over multiple batches to simulate larger batch size
            accumulation_steps = 5  # Accumulate 5 batches before optimizer step

            for epoch in range(session.total_epochs):
                epoch_loss = 0.0
                epoch_accuracy = 0.0
                num_batches = 0

                for i, batch in enumerate(grouped_batches):
                    try:
                        # Determine if this is an accumulation step or optimizer step
                        is_accumulation_step = (i + 1) % accumulation_steps != 0

                        # Call the trainer's train_step method with proper batch format
                        result = trainer.train_step(batch, accumulate_gradients=is_accumulation_step)

                        if result is not None:
                            batch_loss = result.get('total_loss', 0.0)
                            batch_accuracy = result.get('accuracy', 0.0)
                            batch_candle_accuracy = result.get('candle_accuracy', 0.0)
                            epoch_loss += batch_loss
                            epoch_accuracy += batch_accuracy
                            num_batches += 1

                            # Log first batch and every 10th batch for debugging
                            if (i + 1) == 1 or (i + 1) % 10 == 0:
                                logger.info(f"      Batch {i + 1}/{len(grouped_batches)}, Loss: {batch_loss:.6f}, Action Acc: {batch_accuracy:.2%}, Candle Acc: {batch_candle_accuracy:.2%}")
                        else:
                            logger.warning(f"   Batch {i + 1} returned None result - skipping")

                        # Clear CUDA cache after optimizer step (not accumulation step)
                        if torch.cuda.is_available() and not is_accumulation_step:
                            torch.cuda.empty_cache()

                    except Exception as e:
                        logger.error(f"   Error in batch {i + 1}: {e}")
                        import traceback
                        logger.error(traceback.format_exc())
                        # Clear CUDA cache after error
                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()
                        continue

                avg_loss = epoch_loss / num_batches if num_batches > 0 else 0.0
                avg_accuracy = epoch_accuracy / num_batches if num_batches > 0 else 0.0
                session.current_epoch = epoch + 1
                session.current_loss = avg_loss

                # Save checkpoint after each epoch
                try:
                    checkpoint_dir = "models/checkpoints/transformer"
                    os.makedirs(checkpoint_dir, exist_ok=True)

                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    checkpoint_path = os.path.join(checkpoint_dir, f"transformer_epoch{epoch+1}_{timestamp}.pt")

                    torch.save({
                        'epoch': epoch + 1,
                        'model_state_dict': trainer.model.state_dict(),
                        'optimizer_state_dict': trainer.optimizer.state_dict(),
                        'scheduler_state_dict': trainer.scheduler.state_dict(),
                        'loss': avg_loss,
                        'accuracy': avg_accuracy,
                        'learning_rate': trainer.scheduler.get_last_lr()[0]
                    }, checkpoint_path)

                    logger.info(f"   Saved checkpoint: {checkpoint_path}")

                    # Keep only best 5 checkpoints based on accuracy
                    self._cleanup_old_checkpoints(checkpoint_dir, keep_best=5, metric='accuracy')

                except Exception as e:
                    logger.warning(f"   Failed to save checkpoint: {e}")

                # Clear CUDA cache after each epoch
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()

                logger.info(f"   Epoch {epoch + 1}/{session.total_epochs}, Loss: {avg_loss:.6f}, Accuracy: {avg_accuracy:.2%} ({num_batches} batches)")

            session.final_loss = session.current_loss
            session.accuracy = avg_accuracy

            # Log best checkpoint info
            try:
                checkpoint_dir = "models/checkpoints/transformer"
                best_checkpoint_path = self._find_best_checkpoint(checkpoint_dir, metric='accuracy')
                if best_checkpoint_path:
                    checkpoint = torch.load(best_checkpoint_path, map_location='cpu')
                    logger.info(f"   Best checkpoint: epoch {checkpoint.get('epoch', 0)}, accuracy: {checkpoint.get('accuracy', 0):.2%}")
            except Exception as e:
                logger.debug(f"Could not load best checkpoint info: {e}")

            logger.info(f"   Training complete: Loss = {session.final_loss:.6f}, Accuracy = {session.accuracy:.2%}")

        else:
            raise Exception(f"Transformer trainer does not have train_on_batch() or train() methods. Available methods: {[m for m in dir(trainer) if not m.startswith('_')]}")

    def _train_cob_real(self, session: TrainingSession, training_data: List[Dict]):
        """Train COB RL model with REAL training loop"""
        if not hasattr(self.orchestrator, 'cob_rl_agent') or not self.orchestrator.cob_rl_agent:
            raise Exception("COB RL model not available in orchestrator")

        agent = self.orchestrator.cob_rl_agent

        # Similar to DQN training
        for data in training_data:
            reward = data['profit_loss_pct'] / 100.0 if data.get('profit_loss_pct') else 0.0

            if hasattr(agent, 'remember'):
                state = [0.0] * agent.state_size if hasattr(agent, 'state_size') else []
                action = 1 if data.get('direction') == 'LONG' else 0
                agent.remember(state, action, reward, state, True)

        if hasattr(agent, 'replay'):
            for epoch in range(session.total_epochs):
                loss = agent.replay()
                session.current_epoch = epoch + 1
                session.current_loss = loss if loss else 0.0
                logger.info(f"COB RL Epoch {epoch + 1}/{session.total_epochs}, Loss: {session.current_loss:.4f}")

        session.final_loss = session.current_loss
        session.accuracy = 0.85

    def _train_extrema_real(self, session: TrainingSession, training_data: List[Dict]):
        """Train Extrema model with REAL training loop"""
        if not hasattr(self.orchestrator, 'extrema_trainer') or not self.orchestrator.extrema_trainer:
            raise Exception("Extrema trainer not available in orchestrator")

        trainer = self.orchestrator.extrema_trainer

        # Use trainer's training method
        for epoch in range(session.total_epochs):
            # TODO: Implement actual extrema training
            session.current_epoch = epoch + 1
            session.current_loss = 0.5 / (epoch + 1)  # Placeholder
            logger.info(f"Extrema Epoch {epoch + 1}/{session.total_epochs}, Loss: {session.current_loss:.4f}")

        session.final_loss = session.current_loss
        session.accuracy = 0.85

    def get_training_progress(self, training_id: str) -> Dict:
        """Get training progress for a session"""
        if training_id not in self.training_sessions:
            return {
                'status': 'not_found',
                'error': 'Training session not found'
            }

        session = self.training_sessions[training_id]

        return {
            'status': session.status,
            'model_name': session.model_name,
            'test_cases_count': session.test_cases_count,
            'current_epoch': session.current_epoch,
            'total_epochs': session.total_epochs,
            'current_loss': session.current_loss,
            'final_loss': session.final_loss,
            'accuracy': session.accuracy,
            'duration_seconds': session.duration_seconds,
            'error': session.error
        }


    # Real-time inference support

    def start_realtime_inference(self, model_name: str, symbol: str, data_provider) -> str:
        """
        Start real-time inference using orchestrator's REAL prediction methods

        Args:
            model_name: Name of model to use for inference
            symbol: Trading symbol
            data_provider: Data provider for market data

        Returns:
            inference_id: Unique ID for this inference session
        """
        if not self.orchestrator:
            raise Exception("Orchestrator not available - cannot perform inference")

        inference_id = str(uuid.uuid4())

        # Initialize inference sessions dict if not exists
        if not hasattr(self, 'inference_sessions'):
            self.inference_sessions = {}

        # Create inference session
        self.inference_sessions[inference_id] = {
            'model_name': model_name,
            'symbol': symbol,
            'status': 'running',
            'start_time': time.time(),
            'signals': [],
            'stop_flag': False
        }

        logger.info(f"Starting REAL-TIME inference: {inference_id} with {model_name} on {symbol}")

        # Start inference loop in background thread
        thread = threading.Thread(
            target=self._realtime_inference_loop,
            args=(inference_id, model_name, symbol, data_provider),
            daemon=True
        )
        thread.start()

        return inference_id

    def stop_realtime_inference(self, inference_id: str):
        """Stop real-time inference session"""
        if not hasattr(self, 'inference_sessions'):
            return

        if inference_id in self.inference_sessions:
            self.inference_sessions[inference_id]['stop_flag'] = True
            self.inference_sessions[inference_id]['status'] = 'stopped'
            logger.info(f"Stopped real-time inference: {inference_id}")

    def get_latest_signals(self, limit: int = 50) -> List[Dict]:
        """Get latest inference signals from all active sessions"""
        if not hasattr(self, 'inference_sessions'):
            return []

        all_signals = []
        for session in self.inference_sessions.values():
            all_signals.extend(session.get('signals', []))

        # Sort by timestamp and return latest
        all_signals.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
        return all_signals[:limit]

    def _realtime_inference_loop(self, inference_id: str, model_name: str, symbol: str, data_provider):
        """
        Real-time inference loop using orchestrator's REAL prediction methods

        This runs in a background thread and continuously makes predictions
        using the actual model inference methods from the orchestrator.
        """
        session = self.inference_sessions[inference_id]

        try:
            while not session['stop_flag']:
                try:
                    # Use orchestrator's REAL prediction method
                    if hasattr(self.orchestrator, 'make_decision'):
                        # Get real prediction from orchestrator
                        decision = self.orchestrator.make_decision(symbol)

                        if decision:
                            # Store signal
                            signal = {
                                'timestamp': datetime.now().isoformat(),
                                'symbol': symbol,
                                'model': model_name,
                                'action': decision.action,
                                'confidence': decision.confidence,
                                'price': decision.price
                            }

                            session['signals'].append(signal)

                            # Keep only last 100 signals
                            if len(session['signals']) > 100:
                                session['signals'] = session['signals'][-100:]

                            logger.info(f"REAL Signal: {signal['action']} @ {signal['price']} (confidence: {signal['confidence']:.2f})")

                    # Sleep for 1 second before next inference
                    time.sleep(1)

                except Exception as e:
                    logger.error(f"Error in REAL inference loop: {e}")
                    time.sleep(5)

            logger.info(f"REAL inference loop stopped: {inference_id}")

        except Exception as e:
            logger.error(f"Fatal error in REAL inference loop: {e}")
            session['status'] = 'error'
            session['error'] = str(e)