misc

2025-05-13 17:19:52 +03:00
parent 7dda00b64a
commit c0872248ab
60 changed files with 42085 additions and 6885 deletions
--- a/train_hybrid.py
+++ b/train_hybrid.py
@@ -0,0 +1,731 @@
+#!/usr/bin/env python
+"""
+Hybrid Training Script - Combining Supervised and Reinforcement Learning
+
+This script provides a hybrid approach that:
+1. Performs supervised learning on market data using CNN models
+2. Uses reinforcement learning to optimize trading strategies
+3. Only uses real market data (never synthetic)
+
+The script enables both approaches to complement each other:
+- CNN model learns patterns from historical data (supervised)
+- RL agent optimizes actual trading decisions (reinforcement)
+"""
+
+import os
+import sys
+import logging
+import argparse
+import numpy as np
+import torch
+import time
+import json
+import asyncio
+import signal
+import threading
+from datetime import datetime
+from pathlib import Path
+import matplotlib.pyplot as plt
+from torch.utils.tensorboard import SummaryWriter
+
+# Add project root to path if needed
+project_root = os.path.dirname(os.path.abspath(__file__))
+if project_root not in sys.path:
+    sys.path.append(project_root)
+
+# Import configurations
+import train_config
+
+# Import key components
+from NN.models.cnn_model_pytorch import CNNModelPyTorch
+from NN.models.dqn_agent import DQNAgent
+from realtime import MultiTimeframeDataInterface, RealTimeChart
+from NN.utils.signal_interpreter import SignalInterpreter
+
+# Global variables for graceful shutdown
+running = True
+training_stats = {
+    "supervised": {
+        "epochs_completed": 0,
+        "best_val_pnl": -float('inf'),
+        "best_epoch": 0,
+        "best_win_rate": 0
+    },
+    "reinforcement": {
+        "episodes_completed": 0,
+        "best_reward": -float('inf'),
+        "best_episode": 0,
+        "best_win_rate": 0
+    },
+    "hybrid": {
+        "iterations_completed": 0,
+        "best_combined_score": -float('inf'),
+        "training_started": datetime.now().isoformat(),
+        "last_update": datetime.now().isoformat()
+    }
+}
+
+# Configure signal handler for graceful shutdown
+def signal_handler(sig, frame):
+    global running
+    logging.info("Received interrupt signal. Finishing current training cycle and saving models...")
+    running = False
+
+# Register signal handler
+signal.signal(signal.SIGINT, signal_handler)
+
+class HybridModel:
+    """
+    Hybrid model that combines supervised CNN learning with RL-based decision optimization
+    """
+    def __init__(self, config):
+        self.config = config
+        self.device = torch.device(config['hardware']['device'])
+        self.supervised_model = None
+        self.rl_agent = None
+        self.data_interface = None
+        self.signal_interpreter = None
+        self.chart = None
+        
+        # Training stats
+        self.tensorboard_writer = None
+        self.iter_count = 0
+        self.supervised_epochs = 0
+        self.rl_episodes = 0
+        
+        # Initialize logging
+        self.logger = logging.getLogger('hybrid_model')
+        
+        # Paths
+        self.models_dir = Path(config['paths']['models_dir'])
+        self.models_dir.mkdir(exist_ok=True, parents=True)
+        
+    def initialize(self):
+        """Initialize all components of the hybrid model"""
+        # Set up TensorBoard
+        log_dir = Path(self.config['paths']['tensorboard_dir']) / f"hybrid_{int(time.time())}"
+        self.tensorboard_writer = SummaryWriter(log_dir=str(log_dir))
+        self.logger.info(f"TensorBoard initialized at {log_dir}")
+        
+        # Initialize data interface
+        symbol = self.config['market_data']['symbol']
+        timeframes = self.config['market_data']['timeframes']
+        window_size = self.config['market_data']['window_size']
+        
+        self.logger.info(f"Initializing data interface for {symbol} with timeframes {timeframes}")
+        self.data_interface = MultiTimeframeDataInterface(
+            symbol=symbol,
+            timeframes=timeframes
+        )
+        
+        # Initialize supervised model (CNN)
+        self._initialize_supervised_model(window_size)
+        
+        # Initialize RL agent
+        self._initialize_rl_agent(window_size)
+        
+        # Initialize signal interpreter
+        self.signal_interpreter = SignalInterpreter(config={
+            'buy_threshold': 0.65,
+            'sell_threshold': 0.65,
+            'hold_threshold': 0.75,
+            'trend_filter_enabled': True,
+            'volume_filter_enabled': True
+        })
+        
+        # Initialize chart if visualization is enabled
+        if self.config.get('visualization', {}).get('enabled', False):
+            self._initialize_chart()
+            
+        return True
+        
+    def _initialize_supervised_model(self, window_size):
+        """Initialize the supervised CNN model"""
+        try:
+            # Get data shape information
+            X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data(
+                window_size=window_size,
+                refresh=True
+            )
+            
+            if X_train_dict is None or y_train is None:
+                raise ValueError("Failed to load training data")
+                
+            # Get reference timeframe (lowest timeframe)
+            reference_tf = min(
+                self.config['market_data']['timeframes'],
+                key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
+            )
+            
+            # Get feature count from the data
+            num_features = X_train_dict[reference_tf].shape[2]
+            
+            # Initialize model
+            self.logger.info(f"Initializing CNN model with {num_features} features")
+            
+            self.supervised_model = CNNModelPyTorch(
+                window_size=window_size,
+                num_features=num_features,
+                output_size=3,  # BUY/HOLD/SELL
+                timeframes=self.config['market_data']['timeframes']
+            )
+            
+            # Load existing model if available
+            model_path = self.models_dir / "supervised_model_best.pt"
+            if model_path.exists():
+                self.logger.info(f"Loading existing CNN model from {model_path}")
+                self.supervised_model.load(str(model_path))
+                self.logger.info("CNN model loaded successfully")
+            else:
+                self.logger.info("No existing CNN model found. Starting with a new model.")
+                
+        except Exception as e:
+            self.logger.error(f"Error initializing supervised model: {str(e)}")
+            import traceback
+            self.logger.error(traceback.format_exc())
+            raise
+            
+    def _initialize_rl_agent(self, window_size):
+        """Initialize the RL agent"""
+        try:
+            # Get data for RL training
+            X_train_dict, _, _, _, _, _ = self.data_interface.prepare_training_data(
+                window_size=window_size,
+                refresh=True
+            )
+            
+            if X_train_dict is None:
+                raise ValueError("Failed to load training data for RL agent")
+                
+            # Get reference timeframe features
+            reference_tf = min(
+                self.config['market_data']['timeframes'],
+                key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
+            )
+            
+            # Calculate state size - this is more complex for RL
+            # For simplicity, we'll use the CNN's feature representation + position info
+            state_size = window_size * X_train_dict[reference_tf].shape[2] + 3  # +3 for position, equity, unrealized_pnl
+            
+            # Initialize RL agent
+            self.logger.info(f"Initializing RL agent with state size {state_size}")
+            
+            self.rl_agent = DQNAgent(
+                state_size=state_size,
+                n_actions=3,  # BUY/HOLD/SELL
+                epsilon=1.0,
+                epsilon_decay=0.995,
+                epsilon_min=0.01,
+                learning_rate=self.config['training']['learning_rate'],
+                gamma=0.99,
+                buffer_size=10000,
+                batch_size=self.config['training']['batch_size'],
+                device=self.device
+            )
+            
+            # Load existing agent if available
+            agent_path = self.models_dir / "rl_agent_best.pth"
+            if agent_path.exists():
+                self.logger.info(f"Loading existing RL agent from {agent_path}")
+                self.rl_agent.load(str(agent_path))
+                self.logger.info("RL agent loaded successfully")
+            else:
+                self.logger.info("No existing RL agent found. Starting with a new agent.")
+                
+        except Exception as e:
+            self.logger.error(f"Error initializing RL agent: {str(e)}")
+            import traceback
+            self.logger.error(traceback.format_exc())
+            raise
+            
+    def _initialize_chart(self):
+        """Initialize the RealTimeChart for visualization"""
+        try:
+            from realtime import RealTimeChart
+            
+            symbol = self.config['market_data']['symbol']
+            self.logger.info(f"Initializing RealTimeChart for {symbol}")
+            
+            self.chart = RealTimeChart(symbol=symbol)
+            
+            # TODO: Start chart server in a background thread
+            
+        except Exception as e:
+            self.logger.error(f"Error initializing chart: {str(e)}")
+            self.chart = None
+            
+    async def train_hybrid(self, iterations=10, sv_epochs_per_iter=5, rl_episodes_per_iter=2):
+        """
+        Main hybrid training loop
+        
+        Args:
+            iterations: Number of hybrid iterations to run
+            sv_epochs_per_iter: Number of supervised epochs per iteration
+            rl_episodes_per_iter: Number of RL episodes per iteration
+            
+        Returns:
+            dict: Training statistics
+        """
+        self.logger.info(f"Starting hybrid training with {iterations} iterations")
+        self.logger.info(f"Each iteration includes {sv_epochs_per_iter} supervised epochs and {rl_episodes_per_iter} RL episodes")
+        
+        # Training loop
+        for iteration in range(iterations):
+            if not running:
+                self.logger.info("Training stopped by user")
+                break
+                
+            self.logger.info(f"Iteration {iteration+1}/{iterations}")
+            self.iter_count += 1
+            
+            # 1. Supervised learning phase
+            self.logger.info("Starting supervised learning phase")
+            sv_stats = await self.train_supervised(epochs=sv_epochs_per_iter)
+            
+            # 2. Reinforcement learning phase
+            self.logger.info("Starting reinforcement learning phase")
+            rl_stats = await self.train_reinforcement(episodes=rl_episodes_per_iter)
+            
+            # 3. Update global training stats
+            self._update_training_stats(sv_stats, rl_stats)
+            
+            # 4. Save models and stats
+            self._save_models_and_stats()
+            
+            # 5. Log to TensorBoard
+            if self.tensorboard_writer:
+                self._log_to_tensorboard(iteration, sv_stats, rl_stats)
+                
+        self.logger.info("Hybrid training completed")
+        return training_stats
+        
+    async def train_supervised(self, epochs=5):
+        """
+        Run supervised training for a specified number of epochs
+        
+        Args:
+            epochs: Number of epochs to train
+            
+        Returns:
+            dict: Training statistics
+        """
+        # Get fresh data
+        window_size = self.config['market_data']['window_size']
+        X_train_dict, y_train, X_val_dict, y_val, train_prices, val_prices = self.data_interface.prepare_training_data(
+            window_size=window_size,
+            refresh=True
+        )
+        
+        if X_train_dict is None or y_train is None:
+            self.logger.error("Failed to load training data")
+            return {}
+            
+        # Get reference timeframe (lowest timeframe)
+        reference_tf = min(
+            self.config['market_data']['timeframes'],
+            key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
+        )
+        
+        # Calculate future prices for profitability-focused loss function
+        train_future_prices = self.data_interface.get_future_prices(train_prices, n_candles=8)
+        val_future_prices = self.data_interface.get_future_prices(val_prices, n_candles=8)
+        
+        # For now, we use only the reference timeframe
+        X_train = X_train_dict[reference_tf]
+        X_val = X_val_dict[reference_tf]
+        
+        # Training stats
+        stats = {
+            "train_losses": [],
+            "val_losses": [],
+            "train_accuracies": [],
+            "val_accuracies": [],
+            "train_pnls": [],
+            "val_pnls": [],
+            "best_val_pnl": -float('inf'),
+            "best_epoch": -1
+        }
+        
+        batch_size = self.config['training']['batch_size']
+        
+        # Training loop
+        for epoch in range(epochs):
+            if not running:
+                break
+                
+            epoch_start = time.time()
+            
+            # Train one epoch
+            train_action_loss, train_price_loss, train_acc = self.supervised_model.train_epoch(
+                X_train, y_train, train_future_prices, batch_size
+            )
+            
+            # Evaluate
+            val_action_loss, val_price_loss, val_acc = self.supervised_model.evaluate(
+                X_val, y_val, val_future_prices
+            )
+            
+            # Get predictions for PnL calculation
+            train_action_probs, _ = self.supervised_model.predict(X_train)
+            val_action_probs, _ = self.supervised_model.predict(X_val)
+            
+            # Convert probabilities to actions
+            train_preds = np.argmax(train_action_probs, axis=1)
+            val_preds = np.argmax(val_action_probs, axis=1)
+            
+            # Calculate PnL
+            train_pnl, train_win_rate, _ = self.data_interface.calculate_pnl(
+                train_preds, train_prices, position_size=1.0
+            )
+            val_pnl, val_win_rate, _ = self.data_interface.calculate_pnl(
+                val_preds, val_prices, position_size=1.0
+            )
+            
+            # Update stats
+            stats["train_losses"].append(train_action_loss)
+            stats["val_losses"].append(val_action_loss)
+            stats["train_accuracies"].append(train_acc)
+            stats["val_accuracies"].append(val_acc)
+            stats["train_pnls"].append(train_pnl)
+            stats["val_pnls"].append(val_pnl)
+            
+            # Check if this is the best model
+            if val_pnl > stats["best_val_pnl"]:
+                stats["best_val_pnl"] = val_pnl
+                stats["best_epoch"] = epoch
+                stats["best_win_rate"] = val_win_rate
+                
+                # Save the best model
+                self.supervised_model.save(str(self.models_dir / "supervised_model_best.pt"))
+                
+            # Log epoch results
+            self.logger.info(f"Supervised Epoch {epoch+1}/{epochs}")
+            self.logger.info(f"  Train Loss: {train_action_loss:.4f}, Accuracy: {train_acc:.4f}, PnL: {train_pnl:.4f}")
+            self.logger.info(f"  Val Loss: {val_action_loss:.4f}, Accuracy: {val_acc:.4f}, PnL: {val_pnl:.4f}")
+            
+            # Log timing
+            epoch_time = time.time() - epoch_start
+            self.logger.info(f"  Epoch completed in {epoch_time:.2f} seconds")
+            
+            # Update global epoch counter
+            self.supervised_epochs += 1
+            
+            # Small delay to allow for interruption
+            await asyncio.sleep(0.1)
+            
+        return stats
+        
+    async def train_reinforcement(self, episodes=2):
+        """
+        Run reinforcement learning for a specified number of episodes
+        
+        Args:
+            episodes: Number of episodes to train
+            
+        Returns:
+            dict: Training statistics
+        """
+        from NN.train_rl import RLTradingEnvironment
+        
+        # Get data for RL environment
+        window_size = self.config['market_data']['window_size']
+        
+        # Get all timeframes data
+        data_dict = self.data_interface.get_multi_timeframe_data(refresh=True)
+        
+        if not data_dict:
+            self.logger.error("Failed to fetch data for any timeframe")
+            return {}
+            
+        # Extract key timeframes
+        timeframes = self.config['market_data']['timeframes']
+        
+        # Extract features from dataframes
+        features = {}
+        for tf in timeframes:
+            if tf in data_dict:
+                df = data_dict[tf]
+                # Add indicators if not already added
+                if 'rsi' not in df.columns:
+                    df = self.data_interface.add_indicators(df)
+                
+                # Convert to numpy array with close price as the last column
+                features[tf] = np.hstack([
+                    df.drop(['timestamp', 'close'], axis=1).values,
+                    df['close'].values.reshape(-1, 1)
+                ])
+        
+        # Ensure we have all needed timeframes
+        required_tfs = ['1m', '5m', '15m']  # Most common timeframes used by RL
+        for tf in required_tfs:
+            if tf not in features and tf in timeframes:
+                self.logger.error(f"Missing features for timeframe {tf}")
+                return {}
+        
+        # Create environment with our feature data
+        env = RLTradingEnvironment(
+            features_1m=features.get('1m'),
+            features_1h=features.get('1h', features.get('5m')),  # Use 5m as fallback
+            features_1d=features.get('1d', features.get('15m'))  # Use 15m as fallback
+        )
+        
+        # Training stats
+        stats = {
+            "rewards": [],
+            "win_rates": [],
+            "trades": [],
+            "best_reward": -float('inf'),
+            "best_episode": -1
+        }
+        
+        # RL training loop
+        for episode in range(episodes):
+            if not running:
+                break
+                
+            episode_start = time.time()
+            self.logger.info(f"RL Episode {episode+1}/{episodes}")
+            
+            # Reset environment
+            state = env.reset()
+            total_reward = 0
+            trades = 0
+            wins = 0
+            
+            # Run one episode
+            done = False
+            max_steps = 1000
+            step = 0
+            
+            while not done and step < max_steps:
+                # Use CNN model to enhance state representation if available
+                enhanced_state = self._enhance_state_with_cnn(state)
+                
+                # Select action using the RL agent
+                action = self.rl_agent.act(enhanced_state)
+                
+                # Take step in environment
+                next_state, reward, done, info = env.step(action)
+                
+                # Store in replay buffer
+                self.rl_agent.remember(enhanced_state, action, reward, 
+                                       self._enhance_state_with_cnn(next_state), done)
+                
+                # Update episode statistics
+                total_reward += reward
+                state = next_state
+                step += 1
+                
+                # Track trades and wins
+                if action != 2:  # Not HOLD
+                    trades += 1
+                    if reward > 0:
+                        wins += 1
+                
+                # Train the agent on a batch of experiences
+                if len(self.rl_agent.memory) > self.config['training']['batch_size']:
+                    self.rl_agent.replay(self.config['training']['batch_size'])
+                
+                # Allow for interruption
+                if step % 100 == 0:
+                    await asyncio.sleep(0.1)
+                    if not running:
+                        break
+            
+            # Calculate win rate
+            win_rate = wins / max(1, trades)
+            
+            # Update stats
+            stats["rewards"].append(total_reward)
+            stats["win_rates"].append(win_rate)
+            stats["trades"].append(trades)
+            
+            # Check if this is the best agent
+            if total_reward > stats["best_reward"]:
+                stats["best_reward"] = total_reward
+                stats["best_episode"] = episode
+                
+                # Save the best agent
+                self.rl_agent.save(str(self.models_dir / "rl_agent_best.pth"))
+            
+            # Log episode results
+            self.logger.info(f"  Reward: {total_reward:.4f}, Win Rate: {win_rate:.4f}, Trades: {trades}")
+            
+            # Log timing
+            episode_time = time.time() - episode_start
+            self.logger.info(f"  Episode completed in {episode_time:.2f} seconds")
+            
+            # Update global episode counter
+            self.rl_episodes += 1
+            
+            # Reduce exploration rate
+            self.rl_agent.adjust_epsilon()
+            
+            # Small delay to allow for interruption
+            await asyncio.sleep(0.1)
+            
+        return stats
+    
+    def _enhance_state_with_cnn(self, state):
+        """
+        Enhance the RL state with CNN feature extraction
+        
+        Args:
+            state: The original state from the environment
+            
+        Returns:
+            numpy.ndarray: Enhanced state representation
+        """
+        # This is a placeholder - in a real implementation, you would:
+        # 1. Format the state for the CNN
+        # 2. Get the CNN's feature representation
+        # 3. Combine with the original state features
+        return state
+    
+    def _update_training_stats(self, sv_stats, rl_stats):
+        """Update global training statistics"""
+        global training_stats
+        
+        # Update supervised stats
+        if sv_stats:
+            training_stats["supervised"]["epochs_completed"] = self.supervised_epochs
+            if "best_val_pnl" in sv_stats and sv_stats["best_val_pnl"] > training_stats["supervised"]["best_val_pnl"]:
+                training_stats["supervised"]["best_val_pnl"] = sv_stats["best_val_pnl"]
+                training_stats["supervised"]["best_epoch"] = sv_stats["best_epoch"] + training_stats["supervised"]["epochs_completed"] - len(sv_stats["train_losses"])
+                training_stats["supervised"]["best_win_rate"] = sv_stats.get("best_win_rate", 0)
+        
+        # Update reinforcement stats
+        if rl_stats:
+            training_stats["reinforcement"]["episodes_completed"] = self.rl_episodes
+            if "best_reward" in rl_stats and rl_stats["best_reward"] > training_stats["reinforcement"]["best_reward"]:
+                training_stats["reinforcement"]["best_reward"] = rl_stats["best_reward"]
+                training_stats["reinforcement"]["best_episode"] = rl_stats["best_episode"] + training_stats["reinforcement"]["episodes_completed"] - len(rl_stats["rewards"])
+                
+        # Update hybrid stats
+        training_stats["hybrid"]["iterations_completed"] = self.iter_count
+        training_stats["hybrid"]["last_update"] = datetime.now().isoformat()
+        
+        # Calculate combined score (simple formula, can be adjusted)
+        sv_score = training_stats["supervised"]["best_val_pnl"]
+        rl_score = training_stats["reinforcement"]["best_reward"]
+        combined_score = sv_score * 0.7 + rl_score * 0.3  # Weight supervised more
+        
+        if combined_score > training_stats["hybrid"]["best_combined_score"]:
+            training_stats["hybrid"]["best_combined_score"] = combined_score
+    
+    def _save_models_and_stats(self):
+        """Save models and training statistics"""
+        # Save training stats
+        try:
+            stats_file = self.models_dir / "hybrid_training_stats.json"
+            with open(stats_file, 'w') as f:
+                json.dump(training_stats, f, indent=2)
+            self.logger.info(f"Training statistics saved to {stats_file}")
+        except Exception as e:
+            self.logger.error(f"Error saving training stats: {str(e)}")
+        
+        # Models are already saved in their respective training functions
+    
+    def _log_to_tensorboard(self, iteration, sv_stats, rl_stats):
+        """Log training metrics to TensorBoard"""
+        if not self.tensorboard_writer:
+            return
+            
+        # Log supervised metrics
+        if sv_stats and "train_losses" in sv_stats:
+            for i, loss in enumerate(sv_stats["train_losses"]):
+                step = (iteration * len(sv_stats["train_losses"])) + i
+                self.tensorboard_writer.add_scalar('supervised/train_loss', loss, step)
+                self.tensorboard_writer.add_scalar('supervised/val_loss', sv_stats["val_losses"][i], step)
+                self.tensorboard_writer.add_scalar('supervised/train_accuracy', sv_stats["train_accuracies"][i], step)
+                self.tensorboard_writer.add_scalar('supervised/val_accuracy', sv_stats["val_accuracies"][i], step)
+                self.tensorboard_writer.add_scalar('supervised/train_pnl', sv_stats["train_pnls"][i], step)
+                self.tensorboard_writer.add_scalar('supervised/val_pnl', sv_stats["val_pnls"][i], step)
+        
+        # Log reinforcement metrics
+        if rl_stats and "rewards" in rl_stats:
+            for i, reward in enumerate(rl_stats["rewards"]):
+                step = (iteration * len(rl_stats["rewards"])) + i
+                self.tensorboard_writer.add_scalar('reinforcement/reward', reward, step)
+                self.tensorboard_writer.add_scalar('reinforcement/win_rate', rl_stats["win_rates"][i], step)
+                self.tensorboard_writer.add_scalar('reinforcement/trades', rl_stats["trades"][i], step)
+        
+        # Log hybrid metrics
+        self.tensorboard_writer.add_scalar('hybrid/iterations', self.iter_count, iteration)
+        self.tensorboard_writer.add_scalar('hybrid/combined_score', training_stats["hybrid"]["best_combined_score"], iteration)
+        
+        # Flush to ensure data is written
+        self.tensorboard_writer.flush()
+
+async def main():
+    """Main entry point for the hybrid training script"""
+    parser = argparse.ArgumentParser(description='Hybrid Training Script')
+    parser.add_argument('--iterations', type=int, default=10, help='Number of hybrid iterations to run')
+    parser.add_argument('--sv-epochs', type=int, default=5, help='Supervised epochs per iteration')
+    parser.add_argument('--rl-episodes', type=int, default=2, help='RL episodes per iteration')
+    parser.add_argument('--symbol', type=str, default='BTC/USDT', help='Trading symbol')
+    parser.add_argument('--timeframes', type=str, nargs='+', default=['1m', '5m', '15m'], help='Timeframes to use')
+    parser.add_argument('--window-size', type=int, default=24, help='Window size for models')
+    parser.add_argument('--visualize', action='store_true', help='Enable visualization')
+    parser.add_argument('--config', type=str, help='Path to custom configuration file')
+    
+    args = parser.parse_args()
+    
+    # Load configuration
+    if args.config:
+        config = train_config.load_config(args.config)
+    else:
+        # Create custom config from command-line arguments
+        custom_config = {
+            'market_data': {
+                'symbol': args.symbol,
+                'timeframes': args.timeframes,
+                'window_size': args.window_size
+            },
+            'visualization': {
+                'enabled': args.visualize
+            }
+        }
+        config = train_config.get_config('hybrid', custom_config)
+    
+    # Print startup banner
+    print("=" * 80)
+    print("HYBRID TRAINING SESSION")
+    print("Combining supervised learning (CNN) with reinforcement learning (RL)")
+    print(f"Symbol: {config['market_data']['symbol']}")
+    print(f"Timeframes: {config['market_data']['timeframes']}")
+    print(f"Iterations: {args.iterations} (SV epochs: {args.sv_epochs}, RL episodes: {args.rl_episodes})")
+    print("Press Ctrl+C to safely stop training and save the models")
+    print("=" * 80)
+    
+    # Initialize the hybrid model
+    hybrid_model = HybridModel(config)
+    initialized = hybrid_model.initialize()
+    
+    if not initialized:
+        print("Failed to initialize hybrid model. Exiting.")
+        return 1
+    
+    try:
+        # Run training
+        await hybrid_model.train_hybrid(
+            iterations=args.iterations,
+            sv_epochs_per_iter=args.sv_epochs,
+            rl_episodes_per_iter=args.rl_episodes
+        )
+        
+        print("Training completed successfully.")
+        return 0
+        
+    except KeyboardInterrupt:
+        print("Training interrupted by user.")
+        return 0
+        
+    except Exception as e:
+        print(f"Error during training: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return 1
+
+if __name__ == "__main__":
+    asyncio.run(main())