""" Overnight Training Coordinator This module coordinates comprehensive training for CNN and COB RL models during overnight sessions. It ensures that: 1. Training passes occur on each signal when predictions change 2. Trades are executed and recorded in simulation mode 3. Performance statistics are tracked and logged 4. Models learn from both successful and unsuccessful trades """ import logging import time import threading from datetime import datetime, timedelta from typing import Dict, List, Optional, Any, Tuple from dataclasses import dataclass, field from collections import deque import numpy as np import json import os logger = logging.getLogger(__name__) @dataclass class TrainingSession: """Represents a training session for a model""" model_name: str symbol: str start_time: datetime end_time: Optional[datetime] = None training_samples: int = 0 initial_loss: Optional[float] = None final_loss: Optional[float] = None improvement: Optional[float] = None trades_executed: int = 0 successful_trades: int = 0 total_pnl: float = 0.0 @dataclass class SignalTradeRecord: """Records a signal and its corresponding trade execution""" timestamp: datetime symbol: str signal_action: str signal_confidence: float model_source: str executed: bool = False execution_price: Optional[float] = None trade_pnl: Optional[float] = None training_triggered: bool = False training_loss: Optional[float] = None class OvernightTrainingCoordinator: """ Coordinates comprehensive overnight training for all models """ def __init__(self, orchestrator, data_provider, trading_executor, dashboard=None): self.orchestrator = orchestrator self.data_provider = data_provider self.trading_executor = trading_executor self.dashboard = dashboard # Training configuration self.config = { 'training_on_signal_change': True, # Train when prediction changes 'min_confidence_for_trade': 0.3, # Minimum confidence to execute trade 'max_trades_per_hour': 20, # Rate limiting 'training_batch_size': 32, # Training batch size 'performance_tracking_window': 100, # Number of trades to track for performance 'model_checkpoint_interval': 50, # Save checkpoints every N trades } # State tracking self.is_running = False self.training_thread = None self.last_predictions: Dict[str, Dict[str, Any]] = {} # {symbol: {model: prediction}} self.signal_trade_records: deque = deque(maxlen=1000) self.training_sessions: Dict[str, TrainingSession] = {} # Performance tracking self.performance_stats = { 'total_signals': 0, 'total_trades': 0, 'successful_trades': 0, 'total_pnl': 0.0, 'training_sessions': 0, 'models_trained': set(), 'hourly_stats': deque(maxlen=24) # Last 24 hours } # Rate limiting self.last_trade_time: Dict[str, datetime] = {} self.trades_this_hour: Dict[str, int] = {} self.hour_reset_time = datetime.now().replace(minute=0, second=0, microsecond=0) logger.info("Overnight Training Coordinator initialized") def start_overnight_training(self): """Start the overnight training session""" if self.is_running: logger.warning("Training coordinator already running") return self.is_running = True self.training_thread = threading.Thread(target=self._training_loop, daemon=True) self.training_thread.start() logger.info("🌙 OVERNIGHT TRAINING SESSION STARTED") logger.info("=" * 60) logger.info("Features enabled:") logger.info("✅ CNN training on signal changes") logger.info("✅ COB RL training on market microstructure") logger.info("✅ Trade execution and recording") logger.info("✅ Performance tracking and statistics") logger.info("✅ Model checkpointing") logger.info("=" * 60) def stop_overnight_training(self): """Stop the overnight training session""" self.is_running = False if self.training_thread: self.training_thread.join(timeout=10) # Generate final report self._generate_training_report() logger.info("🌅 OVERNIGHT TRAINING SESSION COMPLETED") def _training_loop(self): """Main training loop that monitors signals and triggers training""" while self.is_running: try: # Reset hourly counters if needed self._reset_hourly_counters() # Process signals from orchestrator self._process_orchestrator_signals() # Check for model training opportunities self._check_training_opportunities() # Update performance statistics self._update_performance_stats() # Sleep briefly to avoid overwhelming the system time.sleep(0.5) except Exception as e: logger.error(f"Error in training loop: {e}") time.sleep(5) def _process_orchestrator_signals(self): """Process signals from the orchestrator and trigger training/trading""" try: # Get recent decisions from orchestrator if not hasattr(self.orchestrator, 'recent_decisions'): return for symbol in self.orchestrator.symbols: if symbol not in self.orchestrator.recent_decisions: continue recent_decisions = self.orchestrator.recent_decisions[symbol] if not recent_decisions: continue # Get the latest decision latest_decision = recent_decisions[-1] # Check if this is a new signal that requires processing if self._is_new_signal_requiring_action(symbol, latest_decision): self._process_new_signal(symbol, latest_decision) except Exception as e: logger.error(f"Error processing orchestrator signals: {e}") def _is_new_signal_requiring_action(self, symbol: str, decision) -> bool: """Check if this signal requires training or trading action""" try: # Get current prediction for comparison current_action = decision.action current_confidence = decision.confidence current_time = decision.timestamp # Check if we have a previous prediction for this symbol if symbol not in self.last_predictions: self.last_predictions[symbol] = {} # Check if prediction has changed significantly last_action = self.last_predictions[symbol].get('action') last_confidence = self.last_predictions[symbol].get('confidence', 0.0) last_time = self.last_predictions[symbol].get('timestamp') # Determine if action is required action_changed = last_action != current_action confidence_changed = abs(current_confidence - last_confidence) > 0.1 time_elapsed = not last_time or (current_time - last_time).total_seconds() > 30 # Update last prediction self.last_predictions[symbol] = { 'action': current_action, 'confidence': current_confidence, 'timestamp': current_time } return action_changed or confidence_changed or time_elapsed except Exception as e: logger.error(f"Error checking if signal requires action: {e}") return False def _process_new_signal(self, symbol: str, decision): """Process a new signal by triggering training and potentially executing trade""" try: signal_record = SignalTradeRecord( timestamp=decision.timestamp, symbol=symbol, signal_action=decision.action, signal_confidence=decision.confidence, model_source=getattr(decision, 'reasoning', {}).get('primary_model', 'orchestrator') ) # 1. Trigger training on signal change if self.config['training_on_signal_change']: training_loss = self._trigger_model_training(symbol, decision) signal_record.training_triggered = True signal_record.training_loss = training_loss # 2. Execute trade if confidence is sufficient if (decision.confidence >= self.config['min_confidence_for_trade'] and decision.action in ['BUY', 'SELL'] and self._can_execute_trade(symbol)): trade_executed, execution_price, trade_pnl = self._execute_signal_trade(symbol, decision) signal_record.executed = trade_executed signal_record.execution_price = execution_price signal_record.trade_pnl = trade_pnl # Update performance stats self.performance_stats['total_trades'] += 1 if trade_pnl and trade_pnl > 0: self.performance_stats['successful_trades'] += 1 if trade_pnl: self.performance_stats['total_pnl'] += trade_pnl # 3. Record the signal self.signal_trade_records.append(signal_record) self.performance_stats['total_signals'] += 1 # 4. Log the action status = "EXECUTED" if signal_record.executed else "SIGNAL_ONLY" logger.info(f"[{status}] {symbol} {decision.action} " f"(conf: {decision.confidence:.3f}, " f"training: {'✅' if signal_record.training_triggered else '❌'}, " f"pnl: {signal_record.trade_pnl:.2f if signal_record.trade_pnl else 'N/A'})") except Exception as e: logger.error(f"Error processing new signal for {symbol}: {e}") def _trigger_model_training(self, symbol: str, decision) -> Optional[float]: """Trigger training for all relevant models""" try: training_losses = [] # 1. Train CNN model if hasattr(self.orchestrator, 'cnn_model') and self.orchestrator.cnn_model: cnn_loss = self._train_cnn_model(symbol, decision) if cnn_loss is not None: training_losses.append(cnn_loss) self.performance_stats['models_trained'].add('CNN') # 2. Train COB RL model if hasattr(self.orchestrator, 'cob_rl_agent') and self.orchestrator.cob_rl_agent: cob_rl_loss = self._train_cob_rl_model(symbol, decision) if cob_rl_loss is not None: training_losses.append(cob_rl_loss) self.performance_stats['models_trained'].add('COB_RL') # 3. Train DQN model if hasattr(self.orchestrator, 'rl_agent') and self.orchestrator.rl_agent: dqn_loss = self._train_dqn_model(symbol, decision) if dqn_loss is not None: training_losses.append(dqn_loss) self.performance_stats['models_trained'].add('DQN') # Return average loss return np.mean(training_losses) if training_losses else None except Exception as e: logger.error(f"Error triggering model training: {e}") return None def _train_cnn_model(self, symbol: str, decision) -> Optional[float]: """Train CNN model on current market data""" try: # Get market data for training df = self.data_provider.get_historical_data(symbol, '1m', limit=100) if df is None or len(df) < 50: return None # Prepare training data features = self._prepare_cnn_features(df) target = self._prepare_cnn_target(decision) if features is None or target is None: return None # Train the model if hasattr(self.orchestrator.cnn_model, 'train_on_batch'): loss = self.orchestrator.cnn_model.train_on_batch(features, target) logger.debug(f"CNN training loss for {symbol}: {loss:.4f}") return loss return None except Exception as e: logger.error(f"Error training CNN model: {e}") return None def _train_cob_rl_model(self, symbol: str, decision) -> Optional[float]: """Train COB RL model on market microstructure data""" try: # Get COB data if available if not hasattr(self.dashboard, 'latest_cob_data') or symbol not in self.dashboard.latest_cob_data: return None cob_data = self.dashboard.latest_cob_data[symbol] # Prepare COB features features = self._prepare_cob_features(cob_data) reward = self._calculate_cob_reward(decision) if features is None: return None # Train the model if hasattr(self.orchestrator.cob_rl_agent, 'train'): loss = self.orchestrator.cob_rl_agent.train(features, reward) logger.debug(f"COB RL training loss for {symbol}: {loss:.4f}") return loss return None except Exception as e: logger.error(f"Error training COB RL model: {e}") return None def _train_dqn_model(self, symbol: str, decision) -> Optional[float]: """Train DQN model on trading decision""" try: # Get state features state_features = self._prepare_dqn_state(symbol) action = self._map_action_to_index(decision.action) reward = decision.confidence # Use confidence as immediate reward if state_features is None: return None # Add experience to replay buffer if hasattr(self.orchestrator.rl_agent, 'remember'): # We'll use a dummy next_state for now next_state = state_features # Simplified done = False self.orchestrator.rl_agent.remember(state_features, action, reward, next_state, done) # Train if we have enough experiences if hasattr(self.orchestrator.rl_agent, 'replay'): loss = self.orchestrator.rl_agent.replay() if loss is not None: logger.debug(f"DQN training loss for {symbol}: {loss:.4f}") return loss return None except Exception as e: logger.error(f"Error training DQN model: {e}") return None def _execute_signal_trade(self, symbol: str, decision) -> Tuple[bool, Optional[float], Optional[float]]: """Execute a trade based on the signal""" try: if not self.trading_executor: return False, None, None # Get current price current_price = self.data_provider.get_current_price(symbol) if not current_price: return False, None, None # Execute the trade success = self.trading_executor.execute_signal( symbol=symbol, action=decision.action, confidence=decision.confidence, current_price=current_price ) if success: # Calculate PnL (simplified - in real implementation this would be more complex) trade_pnl = self._calculate_trade_pnl(symbol, decision.action, current_price) # Update rate limiting self.last_trade_time[symbol] = datetime.now() if symbol not in self.trades_this_hour: self.trades_this_hour[symbol] = 0 self.trades_this_hour[symbol] += 1 return True, current_price, trade_pnl return False, None, None except Exception as e: logger.error(f"Error executing signal trade: {e}") return False, None, None def _can_execute_trade(self, symbol: str) -> bool: """Check if we can execute a trade based on rate limiting""" try: # Check hourly limit if symbol in self.trades_this_hour: if self.trades_this_hour[symbol] >= self.config['max_trades_per_hour']: return False # Check minimum time between trades (30 seconds) if symbol in self.last_trade_time: time_since_last = (datetime.now() - self.last_trade_time[symbol]).total_seconds() if time_since_last < 30: return False return True except Exception as e: logger.error(f"Error checking if can execute trade: {e}") return False def _prepare_cnn_features(self, df) -> Optional[np.ndarray]: """Prepare features for CNN training""" try: # Use OHLCV data as features features = df[['open', 'high', 'low', 'close', 'volume']].values # Normalize features features = (features - features.mean(axis=0)) / (features.std(axis=0) + 1e-8) # Reshape for CNN (add batch and channel dimensions) features = features.reshape(1, features.shape[0], features.shape[1]) return features.astype(np.float32) except Exception as e: logger.error(f"Error preparing CNN features: {e}") return None def _prepare_cnn_target(self, decision) -> Optional[np.ndarray]: """Prepare target for CNN training""" try: # Map action to target action_map = {'BUY': [1, 0, 0], 'SELL': [0, 1, 0], 'HOLD': [0, 0, 1]} target = action_map.get(decision.action, [0, 0, 1]) return np.array([target], dtype=np.float32) except Exception as e: logger.error(f"Error preparing CNN target: {e}") return None def _prepare_cob_features(self, cob_data) -> Optional[np.ndarray]: """Prepare COB features for training""" try: # Extract key COB features features = [] # Order book imbalance imbalance = cob_data.get('stats', {}).get('imbalance', 0) features.append(imbalance) # Bid/Ask liquidity bid_liquidity = cob_data.get('stats', {}).get('bid_liquidity', 0) ask_liquidity = cob_data.get('stats', {}).get('ask_liquidity', 0) features.extend([bid_liquidity, ask_liquidity]) # Spread spread = cob_data.get('stats', {}).get('spread_bps', 0) features.append(spread) # Pad to expected size (2000 features for COB RL) while len(features) < 2000: features.append(0.0) return np.array(features[:2000], dtype=np.float32) except Exception as e: logger.error(f"Error preparing COB features: {e}") return None def _calculate_cob_reward(self, decision) -> float: """Calculate reward for COB RL training""" try: # Use confidence as base reward base_reward = decision.confidence # Adjust based on action if decision.action in ['BUY', 'SELL']: return base_reward else: return base_reward * 0.1 # Lower reward for HOLD except Exception as e: logger.error(f"Error calculating COB reward: {e}") return 0.0 def _prepare_dqn_state(self, symbol: str) -> Optional[np.ndarray]: """Prepare state features for DQN training""" try: # Get market data df = self.data_provider.get_historical_data(symbol, '1m', limit=50) if df is None or len(df) < 10: return None # Prepare basic features features = [] # Price features close_prices = df['close'].values features.extend(close_prices[-10:]) # Last 10 prices # Technical indicators if len(close_prices) >= 20: sma_20 = np.mean(close_prices[-20:]) features.append(sma_20) else: features.append(close_prices[-1]) # Volume features volumes = df['volume'].values features.extend(volumes[-5:]) # Last 5 volumes # Pad to expected size (100 features for DQN) while len(features) < 100: features.append(0.0) return np.array(features[:100], dtype=np.float32) except Exception as e: logger.error(f"Error preparing DQN state: {e}") return None def _map_action_to_index(self, action: str) -> int: """Map action string to index""" action_map = {'BUY': 0, 'SELL': 1, 'HOLD': 2} return action_map.get(action, 2) def _calculate_trade_pnl(self, symbol: str, action: str, price: float) -> float: """Calculate simplified PnL for a trade""" try: # This is a simplified PnL calculation # In a real implementation, this would track actual position changes # Get previous price for comparison df = self.data_provider.get_historical_data(symbol, '1m', limit=2) if df is None or len(df) < 2: return 0.0 prev_price = df['close'].iloc[-2] current_price = price # Calculate price change price_change = (current_price - prev_price) / prev_price # Apply action direction if action == 'BUY': return price_change * 100 # Simplified PnL elif action == 'SELL': return -price_change * 100 # Simplified PnL else: return 0.0 except Exception as e: logger.error(f"Error calculating trade PnL: {e}") return 0.0 def _check_training_opportunities(self): """Check for additional training opportunities""" try: # Check if we should save model checkpoints if (self.performance_stats['total_trades'] > 0 and self.performance_stats['total_trades'] % self.config['model_checkpoint_interval'] == 0): self._save_model_checkpoints() except Exception as e: logger.error(f"Error checking training opportunities: {e}") def _save_model_checkpoints(self): """Save model checkpoints""" try: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Save CNN model if hasattr(self.orchestrator, 'cnn_model') and self.orchestrator.cnn_model: if hasattr(self.orchestrator.cnn_model, 'save'): checkpoint_path = f"models/overnight_cnn_{timestamp}.pth" self.orchestrator.cnn_model.save(checkpoint_path) logger.info(f"CNN checkpoint saved: {checkpoint_path}") # Save COB RL model if hasattr(self.orchestrator, 'cob_rl_agent') and self.orchestrator.cob_rl_agent: if hasattr(self.orchestrator.cob_rl_agent, 'save_model'): checkpoint_path = f"models/overnight_cob_rl_{timestamp}.pth" self.orchestrator.cob_rl_agent.save_model(checkpoint_path) logger.info(f"COB RL checkpoint saved: {checkpoint_path}") # Save DQN model if hasattr(self.orchestrator, 'rl_agent') and self.orchestrator.rl_agent: if hasattr(self.orchestrator.rl_agent, 'save'): checkpoint_path = f"models/overnight_dqn_{timestamp}.pth" self.orchestrator.rl_agent.save(checkpoint_path) logger.info(f"DQN checkpoint saved: {checkpoint_path}") except Exception as e: logger.error(f"Error saving model checkpoints: {e}") def _reset_hourly_counters(self): """Reset hourly trade counters""" try: current_hour = datetime.now().replace(minute=0, second=0, microsecond=0) if current_hour > self.hour_reset_time: self.trades_this_hour = {} self.hour_reset_time = current_hour logger.info("Hourly trade counters reset") except Exception as e: logger.error(f"Error resetting hourly counters: {e}") def _update_performance_stats(self): """Update performance statistics""" try: # Update hourly stats every hour current_hour = datetime.now().replace(minute=0, second=0, microsecond=0) # Check if we need to add a new hourly stat if not self.performance_stats['hourly_stats'] or self.performance_stats['hourly_stats'][-1]['hour'] != current_hour: hourly_stat = { 'hour': current_hour, 'signals': 0, 'trades': 0, 'pnl': 0.0, 'models_trained': set() } self.performance_stats['hourly_stats'].append(hourly_stat) except Exception as e: logger.error(f"Error updating performance stats: {e}") def _generate_training_report(self): """Generate a comprehensive training report""" try: logger.info("=" * 80) logger.info("🌅 OVERNIGHT TRAINING SESSION REPORT") logger.info("=" * 80) # Overall statistics logger.info(f"📊 OVERALL STATISTICS:") logger.info(f" Total Signals Processed: {self.performance_stats['total_signals']}") logger.info(f" Total Trades Executed: {self.performance_stats['total_trades']}") logger.info(f" Successful Trades: {self.performance_stats['successful_trades']}") logger.info(f" Success Rate: {(self.performance_stats['successful_trades'] / max(1, self.performance_stats['total_trades']) * 100):.1f}%") logger.info(f" Total P&L: ${self.performance_stats['total_pnl']:.2f}") # Model training statistics logger.info(f"🧠 MODEL TRAINING:") logger.info(f" Models Trained: {', '.join(self.performance_stats['models_trained'])}") logger.info(f" Training Sessions: {len(self.training_sessions)}") # Recent performance if self.signal_trade_records: recent_records = list(self.signal_trade_records)[-20:] # Last 20 records executed_trades = [r for r in recent_records if r.executed] successful_trades = [r for r in executed_trades if r.trade_pnl and r.trade_pnl > 0] logger.info(f"📈 RECENT PERFORMANCE (Last 20 signals):") logger.info(f" Signals: {len(recent_records)}") logger.info(f" Executed: {len(executed_trades)}") logger.info(f" Successful: {len(successful_trades)}") if executed_trades: recent_pnl = sum(r.trade_pnl for r in executed_trades if r.trade_pnl) logger.info(f" Recent P&L: ${recent_pnl:.2f}") logger.info("=" * 80) except Exception as e: logger.error(f"Error generating training report: {e}") def get_performance_summary(self) -> Dict[str, Any]: """Get current performance summary""" try: return { 'total_signals': self.performance_stats['total_signals'], 'total_trades': self.performance_stats['total_trades'], 'successful_trades': self.performance_stats['successful_trades'], 'success_rate': (self.performance_stats['successful_trades'] / max(1, self.performance_stats['total_trades'])), 'total_pnl': self.performance_stats['total_pnl'], 'models_trained': list(self.performance_stats['models_trained']), 'is_running': self.is_running, 'recent_signals': len(self.signal_trade_records) } except Exception as e: logger.error(f"Error getting performance summary: {e}") return {}