#!/usr/bin/env python """ Hybrid Training Script - Combining Supervised and Reinforcement Learning This script provides a hybrid approach that: 1. Performs supervised learning on market data using CNN models 2. Uses reinforcement learning to optimize trading strategies 3. Only uses real market data (never synthetic) The script enables both approaches to complement each other: - CNN model learns patterns from historical data (supervised) - RL agent optimizes actual trading decisions (reinforcement) """ import os import sys import logging import argparse import numpy as np import torch import time import json import asyncio import signal import threading from datetime import datetime from pathlib import Path import matplotlib.pyplot as plt from torch.utils.tensorboard import SummaryWriter # Add project root to path if needed project_root = os.path.dirname(os.path.abspath(__file__)) if project_root not in sys.path: sys.path.append(project_root) # Import configurations import train_config # Import key components from NN.models.cnn_model_pytorch import CNNModelPyTorch from NN.models.dqn_agent import DQNAgent from realtime import MultiTimeframeDataInterface, RealTimeChart from NN.utils.signal_interpreter import SignalInterpreter # Global variables for graceful shutdown running = True training_stats = { "supervised": { "epochs_completed": 0, "best_val_pnl": -float('inf'), "best_epoch": 0, "best_win_rate": 0 }, "reinforcement": { "episodes_completed": 0, "best_reward": -float('inf'), "best_episode": 0, "best_win_rate": 0 }, "hybrid": { "iterations_completed": 0, "best_combined_score": -float('inf'), "training_started": datetime.now().isoformat(), "last_update": datetime.now().isoformat() } } # Configure signal handler for graceful shutdown def signal_handler(sig, frame): global running logging.info("Received interrupt signal. Finishing current training cycle and saving models...") running = False # Register signal handler signal.signal(signal.SIGINT, signal_handler) class HybridModel: """ Hybrid model that combines supervised CNN learning with RL-based decision optimization """ def __init__(self, config): self.config = config self.device = torch.device(config['hardware']['device']) self.supervised_model = None self.rl_agent = None self.data_interface = None self.signal_interpreter = None self.chart = None # Training stats self.tensorboard_writer = None self.iter_count = 0 self.supervised_epochs = 0 self.rl_episodes = 0 # Initialize logging self.logger = logging.getLogger('hybrid_model') # Paths self.models_dir = Path(config['paths']['models_dir']) self.models_dir.mkdir(exist_ok=True, parents=True) def initialize(self): """Initialize all components of the hybrid model""" # Set up TensorBoard log_dir = Path(self.config['paths']['tensorboard_dir']) / f"hybrid_{int(time.time())}" self.tensorboard_writer = SummaryWriter(log_dir=str(log_dir)) self.logger.info(f"TensorBoard initialized at {log_dir}") # Initialize data interface symbol = self.config['market_data']['symbol'] timeframes = self.config['market_data']['timeframes'] window_size = self.config['market_data']['window_size'] self.logger.info(f"Initializing data interface for {symbol} with timeframes {timeframes}") self.data_interface = MultiTimeframeDataInterface( symbol=symbol, timeframes=timeframes ) # Initialize supervised model (CNN) self._initialize_supervised_model(window_size) # Initialize RL agent self._initialize_rl_agent(window_size) # Initialize signal interpreter self.signal_interpreter = SignalInterpreter(config={ 'buy_threshold': 0.65, 'sell_threshold': 0.65, 'hold_threshold': 0.75, 'trend_filter_enabled': True, 'volume_filter_enabled': True }) # Initialize chart if visualization is enabled if self.config.get('visualization', {}).get('enabled', False): self._initialize_chart() return True def _initialize_supervised_model(self, window_size): """Initialize the supervised CNN model""" try: # Get data shape information X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data( window_size=window_size, refresh=True ) if X_train_dict is None or y_train is None: raise ValueError("Failed to load training data") # Get reference timeframe (lowest timeframe) reference_tf = min( self.config['market_data']['timeframes'], key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600) ) # Get feature count from the data num_features = X_train_dict[reference_tf].shape[2] # Initialize model self.logger.info(f"Initializing CNN model with {num_features} features") self.supervised_model = CNNModelPyTorch( window_size=window_size, num_features=num_features, output_size=3, # BUY/HOLD/SELL timeframes=self.config['market_data']['timeframes'] ) # Load existing model if available model_path = self.models_dir / "supervised_model_best.pt" if model_path.exists(): self.logger.info(f"Loading existing CNN model from {model_path}") self.supervised_model.load(str(model_path)) self.logger.info("CNN model loaded successfully") else: self.logger.info("No existing CNN model found. Starting with a new model.") except Exception as e: self.logger.error(f"Error initializing supervised model: {str(e)}") import traceback self.logger.error(traceback.format_exc()) raise def _initialize_rl_agent(self, window_size): """Initialize the RL agent""" try: # Get data for RL training X_train_dict, _, _, _, _, _ = self.data_interface.prepare_training_data( window_size=window_size, refresh=True ) if X_train_dict is None: raise ValueError("Failed to load training data for RL agent") # Get reference timeframe features reference_tf = min( self.config['market_data']['timeframes'], key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600) ) # Calculate state size - this is more complex for RL # For simplicity, we'll use the CNN's feature representation + position info state_size = window_size * X_train_dict[reference_tf].shape[2] + 3 # +3 for position, equity, unrealized_pnl # Initialize RL agent self.logger.info(f"Initializing RL agent with state size {state_size}") self.rl_agent = DQNAgent( state_size=state_size, n_actions=3, # BUY/HOLD/SELL epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, learning_rate=self.config['training']['learning_rate'], gamma=0.99, buffer_size=10000, batch_size=self.config['training']['batch_size'], device=self.device ) # Load existing agent if available agent_path = self.models_dir / "rl_agent_best.pth" if agent_path.exists(): self.logger.info(f"Loading existing RL agent from {agent_path}") self.rl_agent.load(str(agent_path)) self.logger.info("RL agent loaded successfully") else: self.logger.info("No existing RL agent found. Starting with a new agent.") except Exception as e: self.logger.error(f"Error initializing RL agent: {str(e)}") import traceback self.logger.error(traceback.format_exc()) raise def _initialize_chart(self): """Initialize the RealTimeChart for visualization""" try: from realtime import RealTimeChart symbol = self.config['market_data']['symbol'] self.logger.info(f"Initializing RealTimeChart for {symbol}") self.chart = RealTimeChart(symbol=symbol) # TODO: Start chart server in a background thread except Exception as e: self.logger.error(f"Error initializing chart: {str(e)}") self.chart = None async def train_hybrid(self, iterations=10, sv_epochs_per_iter=5, rl_episodes_per_iter=2): """ Main hybrid training loop Args: iterations: Number of hybrid iterations to run sv_epochs_per_iter: Number of supervised epochs per iteration rl_episodes_per_iter: Number of RL episodes per iteration Returns: dict: Training statistics """ self.logger.info(f"Starting hybrid training with {iterations} iterations") self.logger.info(f"Each iteration includes {sv_epochs_per_iter} supervised epochs and {rl_episodes_per_iter} RL episodes") # Training loop for iteration in range(iterations): if not running: self.logger.info("Training stopped by user") break self.logger.info(f"Iteration {iteration+1}/{iterations}") self.iter_count += 1 # 1. Supervised learning phase self.logger.info("Starting supervised learning phase") sv_stats = await self.train_supervised(epochs=sv_epochs_per_iter) # 2. Reinforcement learning phase self.logger.info("Starting reinforcement learning phase") rl_stats = await self.train_reinforcement(episodes=rl_episodes_per_iter) # 3. Update global training stats self._update_training_stats(sv_stats, rl_stats) # 4. Save models and stats self._save_models_and_stats() # 5. Log to TensorBoard if self.tensorboard_writer: self._log_to_tensorboard(iteration, sv_stats, rl_stats) self.logger.info("Hybrid training completed") return training_stats async def train_supervised(self, epochs=5): """ Run supervised training for a specified number of epochs Args: epochs: Number of epochs to train Returns: dict: Training statistics """ # Get fresh data window_size = self.config['market_data']['window_size'] X_train_dict, y_train, X_val_dict, y_val, train_prices, val_prices = self.data_interface.prepare_training_data( window_size=window_size, refresh=True ) if X_train_dict is None or y_train is None: self.logger.error("Failed to load training data") return {} # Get reference timeframe (lowest timeframe) reference_tf = min( self.config['market_data']['timeframes'], key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600) ) # Calculate future prices for profitability-focused loss function train_future_prices = self.data_interface.get_future_prices(train_prices, n_candles=8) val_future_prices = self.data_interface.get_future_prices(val_prices, n_candles=8) # For now, we use only the reference timeframe X_train = X_train_dict[reference_tf] X_val = X_val_dict[reference_tf] # Training stats stats = { "train_losses": [], "val_losses": [], "train_accuracies": [], "val_accuracies": [], "train_pnls": [], "val_pnls": [], "best_val_pnl": -float('inf'), "best_epoch": -1 } batch_size = self.config['training']['batch_size'] # Training loop for epoch in range(epochs): if not running: break epoch_start = time.time() # Train one epoch train_action_loss, train_price_loss, train_acc = self.supervised_model.train_epoch( X_train, y_train, train_future_prices, batch_size ) # Evaluate val_action_loss, val_price_loss, val_acc = self.supervised_model.evaluate( X_val, y_val, val_future_prices ) # Get predictions for PnL calculation train_action_probs, _ = self.supervised_model.predict(X_train) val_action_probs, _ = self.supervised_model.predict(X_val) # Convert probabilities to actions train_preds = np.argmax(train_action_probs, axis=1) val_preds = np.argmax(val_action_probs, axis=1) # Calculate PnL train_pnl, train_win_rate, _ = self.data_interface.calculate_pnl( train_preds, train_prices, position_size=1.0 ) val_pnl, val_win_rate, _ = self.data_interface.calculate_pnl( val_preds, val_prices, position_size=1.0 ) # Update stats stats["train_losses"].append(train_action_loss) stats["val_losses"].append(val_action_loss) stats["train_accuracies"].append(train_acc) stats["val_accuracies"].append(val_acc) stats["train_pnls"].append(train_pnl) stats["val_pnls"].append(val_pnl) # Check if this is the best model if val_pnl > stats["best_val_pnl"]: stats["best_val_pnl"] = val_pnl stats["best_epoch"] = epoch stats["best_win_rate"] = val_win_rate # Save the best model self.supervised_model.save(str(self.models_dir / "supervised_model_best.pt")) # Log epoch results self.logger.info(f"Supervised Epoch {epoch+1}/{epochs}") self.logger.info(f" Train Loss: {train_action_loss:.4f}, Accuracy: {train_acc:.4f}, PnL: {train_pnl:.4f}") self.logger.info(f" Val Loss: {val_action_loss:.4f}, Accuracy: {val_acc:.4f}, PnL: {val_pnl:.4f}") # Log timing epoch_time = time.time() - epoch_start self.logger.info(f" Epoch completed in {epoch_time:.2f} seconds") # Update global epoch counter self.supervised_epochs += 1 # Small delay to allow for interruption await asyncio.sleep(0.1) return stats async def train_reinforcement(self, episodes=2): """ Run reinforcement learning for a specified number of episodes Args: episodes: Number of episodes to train Returns: dict: Training statistics """ from NN.train_rl import RLTradingEnvironment # Get data for RL environment window_size = self.config['market_data']['window_size'] # Get all timeframes data data_dict = self.data_interface.get_multi_timeframe_data(refresh=True) if not data_dict: self.logger.error("Failed to fetch data for any timeframe") return {} # Extract key timeframes timeframes = self.config['market_data']['timeframes'] # Extract features from dataframes features = {} for tf in timeframes: if tf in data_dict: df = data_dict[tf] # Add indicators if not already added if 'rsi' not in df.columns: df = self.data_interface.add_indicators(df) # Convert to numpy array with close price as the last column features[tf] = np.hstack([ df.drop(['timestamp', 'close'], axis=1).values, df['close'].values.reshape(-1, 1) ]) # Ensure we have all needed timeframes required_tfs = ['1m', '5m', '15m'] # Most common timeframes used by RL for tf in required_tfs: if tf not in features and tf in timeframes: self.logger.error(f"Missing features for timeframe {tf}") return {} # Create environment with our feature data env = RLTradingEnvironment( features_1m=features.get('1m'), features_1h=features.get('1h', features.get('5m')), # Use 5m as fallback features_1d=features.get('1d', features.get('15m')) # Use 15m as fallback ) # Training stats stats = { "rewards": [], "win_rates": [], "trades": [], "best_reward": -float('inf'), "best_episode": -1 } # RL training loop for episode in range(episodes): if not running: break episode_start = time.time() self.logger.info(f"RL Episode {episode+1}/{episodes}") # Reset environment state = env.reset() total_reward = 0 trades = 0 wins = 0 # Run one episode done = False max_steps = 1000 step = 0 while not done and step < max_steps: # Use CNN model to enhance state representation if available enhanced_state = self._enhance_state_with_cnn(state) # Select action using the RL agent action = self.rl_agent.act(enhanced_state) # Take step in environment next_state, reward, done, info = env.step(action) # Store in replay buffer self.rl_agent.remember(enhanced_state, action, reward, self._enhance_state_with_cnn(next_state), done) # Update episode statistics total_reward += reward state = next_state step += 1 # Track trades and wins if action != 2: # Not HOLD trades += 1 if reward > 0: wins += 1 # Train the agent on a batch of experiences if len(self.rl_agent.memory) > self.config['training']['batch_size']: self.rl_agent.replay(self.config['training']['batch_size']) # Allow for interruption if step % 100 == 0: await asyncio.sleep(0.1) if not running: break # Calculate win rate win_rate = wins / max(1, trades) # Update stats stats["rewards"].append(total_reward) stats["win_rates"].append(win_rate) stats["trades"].append(trades) # Check if this is the best agent if total_reward > stats["best_reward"]: stats["best_reward"] = total_reward stats["best_episode"] = episode # Save the best agent self.rl_agent.save(str(self.models_dir / "rl_agent_best.pth")) # Log episode results self.logger.info(f" Reward: {total_reward:.4f}, Win Rate: {win_rate:.4f}, Trades: {trades}") # Log timing episode_time = time.time() - episode_start self.logger.info(f" Episode completed in {episode_time:.2f} seconds") # Update global episode counter self.rl_episodes += 1 # Reduce exploration rate self.rl_agent.adjust_epsilon() # Small delay to allow for interruption await asyncio.sleep(0.1) return stats def _enhance_state_with_cnn(self, state): """ Enhance the RL state with CNN feature extraction Args: state: The original state from the environment Returns: numpy.ndarray: Enhanced state representation """ # This is a placeholder - in a real implementation, you would: # 1. Format the state for the CNN # 2. Get the CNN's feature representation # 3. Combine with the original state features return state def _update_training_stats(self, sv_stats, rl_stats): """Update global training statistics""" global training_stats # Update supervised stats if sv_stats: training_stats["supervised"]["epochs_completed"] = self.supervised_epochs if "best_val_pnl" in sv_stats and sv_stats["best_val_pnl"] > training_stats["supervised"]["best_val_pnl"]: training_stats["supervised"]["best_val_pnl"] = sv_stats["best_val_pnl"] training_stats["supervised"]["best_epoch"] = sv_stats["best_epoch"] + training_stats["supervised"]["epochs_completed"] - len(sv_stats["train_losses"]) training_stats["supervised"]["best_win_rate"] = sv_stats.get("best_win_rate", 0) # Update reinforcement stats if rl_stats: training_stats["reinforcement"]["episodes_completed"] = self.rl_episodes if "best_reward" in rl_stats and rl_stats["best_reward"] > training_stats["reinforcement"]["best_reward"]: training_stats["reinforcement"]["best_reward"] = rl_stats["best_reward"] training_stats["reinforcement"]["best_episode"] = rl_stats["best_episode"] + training_stats["reinforcement"]["episodes_completed"] - len(rl_stats["rewards"]) # Update hybrid stats training_stats["hybrid"]["iterations_completed"] = self.iter_count training_stats["hybrid"]["last_update"] = datetime.now().isoformat() # Calculate combined score (simple formula, can be adjusted) sv_score = training_stats["supervised"]["best_val_pnl"] rl_score = training_stats["reinforcement"]["best_reward"] combined_score = sv_score * 0.7 + rl_score * 0.3 # Weight supervised more if combined_score > training_stats["hybrid"]["best_combined_score"]: training_stats["hybrid"]["best_combined_score"] = combined_score def _save_models_and_stats(self): """Save models and training statistics""" # Save training stats try: stats_file = self.models_dir / "hybrid_training_stats.json" with open(stats_file, 'w') as f: json.dump(training_stats, f, indent=2) self.logger.info(f"Training statistics saved to {stats_file}") except Exception as e: self.logger.error(f"Error saving training stats: {str(e)}") # Models are already saved in their respective training functions def _log_to_tensorboard(self, iteration, sv_stats, rl_stats): """Log training metrics to TensorBoard""" if not self.tensorboard_writer: return # Log supervised metrics if sv_stats and "train_losses" in sv_stats: for i, loss in enumerate(sv_stats["train_losses"]): step = (iteration * len(sv_stats["train_losses"])) + i self.tensorboard_writer.add_scalar('supervised/train_loss', loss, step) self.tensorboard_writer.add_scalar('supervised/val_loss', sv_stats["val_losses"][i], step) self.tensorboard_writer.add_scalar('supervised/train_accuracy', sv_stats["train_accuracies"][i], step) self.tensorboard_writer.add_scalar('supervised/val_accuracy', sv_stats["val_accuracies"][i], step) self.tensorboard_writer.add_scalar('supervised/train_pnl', sv_stats["train_pnls"][i], step) self.tensorboard_writer.add_scalar('supervised/val_pnl', sv_stats["val_pnls"][i], step) # Log reinforcement metrics if rl_stats and "rewards" in rl_stats: for i, reward in enumerate(rl_stats["rewards"]): step = (iteration * len(rl_stats["rewards"])) + i self.tensorboard_writer.add_scalar('reinforcement/reward', reward, step) self.tensorboard_writer.add_scalar('reinforcement/win_rate', rl_stats["win_rates"][i], step) self.tensorboard_writer.add_scalar('reinforcement/trades', rl_stats["trades"][i], step) # Log hybrid metrics self.tensorboard_writer.add_scalar('hybrid/iterations', self.iter_count, iteration) self.tensorboard_writer.add_scalar('hybrid/combined_score', training_stats["hybrid"]["best_combined_score"], iteration) # Flush to ensure data is written self.tensorboard_writer.flush() async def main(): """Main entry point for the hybrid training script""" parser = argparse.ArgumentParser(description='Hybrid Training Script') parser.add_argument('--iterations', type=int, default=10, help='Number of hybrid iterations to run') parser.add_argument('--sv-epochs', type=int, default=5, help='Supervised epochs per iteration') parser.add_argument('--rl-episodes', type=int, default=2, help='RL episodes per iteration') parser.add_argument('--symbol', type=str, default='BTC/USDT', help='Trading symbol') parser.add_argument('--timeframes', type=str, nargs='+', default=['1m', '5m', '15m'], help='Timeframes to use') parser.add_argument('--window-size', type=int, default=24, help='Window size for models') parser.add_argument('--visualize', action='store_true', help='Enable visualization') parser.add_argument('--config', type=str, help='Path to custom configuration file') args = parser.parse_args() # Load configuration if args.config: config = train_config.load_config(args.config) else: # Create custom config from command-line arguments custom_config = { 'market_data': { 'symbol': args.symbol, 'timeframes': args.timeframes, 'window_size': args.window_size }, 'visualization': { 'enabled': args.visualize } } config = train_config.get_config('hybrid', custom_config) # Print startup banner print("=" * 80) print("HYBRID TRAINING SESSION") print("Combining supervised learning (CNN) with reinforcement learning (RL)") print(f"Symbol: {config['market_data']['symbol']}") print(f"Timeframes: {config['market_data']['timeframes']}") print(f"Iterations: {args.iterations} (SV epochs: {args.sv_epochs}, RL episodes: {args.rl_episodes})") print("Press Ctrl+C to safely stop training and save the models") print("=" * 80) # Initialize the hybrid model hybrid_model = HybridModel(config) initialized = hybrid_model.initialize() if not initialized: print("Failed to initialize hybrid model. Exiting.") return 1 try: # Run training await hybrid_model.train_hybrid( iterations=args.iterations, sv_epochs_per_iter=args.sv_epochs, rl_episodes_per_iter=args.rl_episodes ) print("Training completed successfully.") return 0 except KeyboardInterrupt: print("Training interrupted by user.") return 0 except Exception as e: print(f"Error during training: {str(e)}") import traceback traceback.print_exc() return 1 if __name__ == "__main__": asyncio.run(main())