#!/usr/bin/env python """ Hybrid Training Script with Device Compatibility Fixes This is a fixed version of the hybrid training script that: 1. Forces CPU use to avoid CUDA/device mismatch errors 2. Adds better error handling and recovery for model initialization 3. Implements direct model movement to CPU Usage: python train_hybrid_fixed.py --iterations 10 --sv-epochs 5 --rl-episodes 2 """ import os import sys import logging import argparse import numpy as np import torch import time import json import asyncio import signal import threading from datetime import datetime from pathlib import Path import matplotlib.pyplot as plt from torch.utils.tensorboard import SummaryWriter from torch import optim import torch.nn.functional as F # Force CPU usage to avoid device mismatch errors os.environ['CUDA_VISIBLE_DEVICES'] = '' os.environ['DISABLE_MIXED_PRECISION'] = '1' # Force PyTorch to use CPU os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128' os.environ['PYTORCH_JIT'] = '0' # Disable CUDA completely in PyTorch torch.cuda.is_available = lambda: False # Add project root to path if needed project_root = os.path.dirname(os.path.abspath(__file__)) if project_root not in sys.path: sys.path.append(project_root) # Import configurations import train_config # Import key components from NN.models.cnn_model_pytorch import CNNModelPyTorch, CNNPyTorch from NN.models.dqn_agent import DQNAgent from dataprovider_realtime import MultiTimeframeDataInterface, RealTimeChart from NN.utils.signal_interpreter import SignalInterpreter # Configure logging log_dir = Path("logs") log_dir.mkdir(exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") log_file = log_dir / f"hybrid_training_{timestamp}.log" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(log_file), logging.StreamHandler() ] ) logger = logging.getLogger('hybrid_training') # Global variables for graceful shutdown running = True training_stats = { "supervised": { "epochs_completed": 0, "best_val_pnl": -float('inf'), "best_epoch": 0, "best_win_rate": 0 }, "reinforcement": { "episodes_completed": 0, "best_reward": -float('inf'), "best_episode": 0, "best_win_rate": 0 }, "hybrid": { "iterations_completed": 0, "best_combined_score": -float('inf'), "training_started": datetime.now().isoformat(), "last_update": datetime.now().isoformat() } } # Configure signal handler for graceful shutdown def signal_handler(sig, frame): global running logger.info("Received interrupt signal. Finishing current training cycle and saving models...") running = False # Register signal handler signal.signal(signal.SIGINT, signal_handler) class HybridModel: """ Hybrid model that combines supervised CNN learning with RL-based decision optimization """ def __init__(self, config): self.config = config # Force CPU for all operations config['hardware']['device'] = 'cpu' config['hardware']['mixed_precision'] = False self.device = torch.device('cpu') self.supervised_model = None self.rl_agent = None self.data_interface = None self.signal_interpreter = None self.chart = None # Training stats self.tensorboard_writer = None self.iter_count = 0 self.supervised_epochs = 0 self.rl_episodes = 0 # Initialize logging self.logger = logging.getLogger('hybrid_model') # Paths self.models_dir = Path(config['paths']['models_dir']) self.models_dir.mkdir(exist_ok=True, parents=True) def initialize(self): """Initialize all components of the hybrid model""" # Set up TensorBoard tb_dir = Path(self.config['paths']['tensorboard_dir']) tb_dir.mkdir(exist_ok=True, parents=True) log_dir = tb_dir / f"hybrid_{timestamp}" self.tensorboard_writer = SummaryWriter(log_dir=str(log_dir)) self.logger.info(f"TensorBoard initialized at {log_dir}") # Initialize data interface symbol = self.config['market_data']['symbol'] timeframes = self.config['market_data']['timeframes'] window_size = self.config['market_data']['window_size'] self.logger.info(f"Initializing data interface for {symbol} with timeframes {timeframes}") self.data_interface = MultiTimeframeDataInterface( symbol=symbol, timeframes=timeframes ) # Initialize supervised model (CNN) self._initialize_supervised_model(window_size) # Initialize RL agent self._initialize_rl_agent(window_size) # Initialize signal interpreter self.signal_interpreter = SignalInterpreter(config={ 'buy_threshold': 0.65, 'sell_threshold': 0.65, 'hold_threshold': 0.75, 'trend_filter_enabled': True, 'volume_filter_enabled': True }) # Initialize chart if visualization is enabled if self.config.get('visualization', {}).get('enabled', False): self._initialize_chart() return True def _initialize_supervised_model(self, window_size): """Initialize the supervised CNN model""" try: # Get data shape information X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data( window_size=window_size, refresh=True ) if X_train_dict is None or y_train is None: raise ValueError("Failed to load training data") # Get reference timeframe (lowest timeframe) reference_tf = min( self.config['market_data']['timeframes'], key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600) ) # Get feature count from the data features_per_tf = X_train_dict[reference_tf].shape[2] total_features = features_per_tf * len(self.config['market_data']['timeframes']) # Initialize model self.logger.info(f"Initializing CNN model with {total_features} features") self.supervised_model = CNNModelPyTorch( window_size=window_size, timeframes=self.config['market_data']['timeframes'], output_size=3, # BUY/HOLD/SELL num_pairs=1 # Single pair for now ) # Create a new model instance with the correct input shape if hasattr(self.supervised_model, 'model'): # The underlying model needs to be recreated with the correct input shape input_shape = (window_size, total_features) # Force CPU device for this model self.supervised_model.device = self.device # Create a new CNNPyTorch model on the CPU new_model = CNNPyTorch(input_shape, self.supervised_model.output_size) new_model.device = self.device new_model.to(self.device) # Make sure class_weights tensor is on CPU if hasattr(new_model, 'class_weights'): new_model.class_weights = new_model.class_weights.to(self.device) # Replace the model self.supervised_model.model = new_model # Reinitialize the optimizer self.supervised_model.optimizer = optim.Adam( self.supervised_model.model.parameters(), lr=0.0001, weight_decay=0.01 ) # Initialize the criterion (missing in the model) self.supervised_model.criterion = torch.nn.CrossEntropyLoss() # Ensure model is on CPU self.supervised_model.device = self.device if hasattr(self.supervised_model, 'model'): self.supervised_model.model.to(self.device) # Load existing model if available and not creating new model model_path = self.models_dir / "supervised_model_best.pt" if model_path.exists() and not self.config.get('model', {}).get('new_model', False): self.logger.info(f"Loading existing CNN model from {model_path}") try: self.supervised_model.load(str(model_path)) self.logger.info("CNN model loaded successfully") except Exception as e: self.logger.error(f"Error loading CNN model: {str(e)}") self.logger.info("Starting with a new CNN model") else: self.logger.info("Starting with a new CNN model") except Exception as e: self.logger.error(f"Error initializing supervised model: {str(e)}") import traceback self.logger.error(traceback.format_exc()) raise def _initialize_rl_agent(self, window_size): """Initialize the RL agent""" try: # Get data for RL training X_train_dict, _, _, _, _, _ = self.data_interface.prepare_training_data( window_size=window_size, refresh=True ) if X_train_dict is None: raise ValueError("Failed to load training data for RL agent") # Get reference timeframe features reference_tf = min( self.config['market_data']['timeframes'], key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600) ) # Get feature count from the data num_features = X_train_dict[reference_tf].shape[2] # Initialize RL agent self.logger.info(f"Initializing RL agent") # State shape for DQN agent: (timeframes, window_size, features) state_shape = (len(self.config['market_data']['timeframes']), window_size, num_features) self.rl_agent = DQNAgent( state_shape=state_shape, n_actions=3, # BUY/HOLD/SELL epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995, learning_rate=self.config['training']['learning_rate'], gamma=0.95, buffer_size=10000, batch_size=self.config['training']['batch_size'], device=self.device # Explicitly pass CPU device ) # Explicitly move agent to CPU and force it to stay there try: # First set the device in the agent itself self.rl_agent.device = self.device # Force PyTorch to use CPU by setting device on each model if hasattr(self.rl_agent, 'policy_net'): self.rl_agent.policy_net.to(self.device) # Force all layers to CPU for parameter in self.rl_agent.policy_net.parameters(): parameter.data = parameter.data.to(self.device) if hasattr(self.rl_agent, 'target_net'): self.rl_agent.target_net.to(self.device) # Force all layers to CPU for parameter in self.rl_agent.target_net.parameters(): parameter.data = parameter.data.to(self.device) # Move models to the specified device self.rl_agent.move_models_to_device(self.device) self.logger.info(f"RL agent models moved to {self.device}") except Exception as e: self.logger.warning(f"Could not move RL agent models to device: {str(e)}") # Load existing agent if available and not creating new model agent_path = self.models_dir / "rl_agent_best" if os.path.exists(f"{agent_path}_policy.pt") and not self.config.get('model', {}).get('new_model', False): self.logger.info(f"Loading existing RL agent from {agent_path}") try: self.rl_agent.load(str(agent_path)) self.logger.info("RL agent loaded successfully") except Exception as e: self.logger.error(f"Error loading RL agent: {str(e)}") self.logger.info("Starting with a new RL agent") else: self.logger.info("Starting with a new RL agent") # Reset epsilon if training a new model if self.config.get('model', {}).get('new_model', False): if hasattr(self.rl_agent, 'epsilon_start'): self.rl_agent.epsilon = self.rl_agent.epsilon_start self.logger.info(f"New model requested. Reset RL agent epsilon to starting value: {self.rl_agent.epsilon:.2f}") else: # Fallback if epsilon_start isn't defined, assume 1.0 self.rl_agent.epsilon = 1.0 self.logger.info("New model requested. Reset RL agent epsilon to default starting value: 1.00") except Exception as e: self.logger.error(f"Error initializing RL agent: {str(e)}") import traceback self.logger.error(traceback.format_exc()) raise def _initialize_chart(self): """Initialize the RealTimeChart for visualization""" try: symbol = self.config['market_data']['symbol'] self.logger.info(f"Initializing RealTimeChart for {symbol}") self.chart = RealTimeChart(symbol=symbol) # Start chart server in a background thread dashboard_port = self.config.get('visualization', {}).get('port', 8050) self.logger.info(f"Starting web dashboard for {symbol} on port {dashboard_port}") self.chart_thread = threading.Thread( target=lambda: self.chart.run(host='localhost', port=dashboard_port) ) self.chart_thread.daemon = True # Allow the thread to exit when main program exits self.chart_thread.start() self.logger.info(f"Web dashboard started at http://localhost:{dashboard_port}/") # Also start the websocket connection for real-time data self.websocket_thread = threading.Thread( target=lambda: asyncio.run(self.chart.start_websocket()) ) self.websocket_thread.daemon = True self.websocket_thread.start() self.logger.info(f"WebSocket connection started for {symbol}") except Exception as e: self.logger.error(f"Error initializing chart: {str(e)}") import traceback self.logger.error(traceback.format_exc()) self.chart = None def train_hybrid(self, iterations=10, sv_epochs_per_iter=5, rl_episodes_per_iter=2): """ Main hybrid training loop Args: iterations: Number of hybrid iterations to run sv_epochs_per_iter: Number of supervised epochs per iteration rl_episodes_per_iter: Number of RL episodes per iteration Returns: dict: Training statistics """ self.logger.info(f"Starting hybrid training with {iterations} iterations") self.logger.info(f"Each iteration includes {sv_epochs_per_iter} supervised epochs and {rl_episodes_per_iter} RL episodes") # Training loop for iteration in range(iterations): if not running: self.logger.info("Training stopped by user") break self.logger.info(f"Iteration {iteration+1}/{iterations}") self.iter_count += 1 # 1. Supervised learning phase self.logger.info("Starting supervised learning phase") sv_stats = self.train_supervised(epochs=sv_epochs_per_iter) # 2. Reinforcement learning phase self.logger.info("Starting reinforcement learning phase") rl_stats = self.train_reinforcement(episodes=rl_episodes_per_iter) # 3. Update global training stats self._update_training_stats(sv_stats, rl_stats) # 4. Save models and stats self._save_models_and_stats() # 5. Log to TensorBoard if self.tensorboard_writer: self._log_to_tensorboard(iteration, sv_stats, rl_stats) self.logger.info("Hybrid training completed") return training_stats def train_supervised(self, epochs=5): """Train the supervised CNN model""" stats = { "epochs": epochs, "completed": 0, "best_val_pnl": -float('inf'), "best_win_rate": 0, "final_loss": 0 } self.logger.info(f"Training supervised model for {epochs} epochs") try: # Prepare training data window_size = self.config['market_data']['window_size'] X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data( window_size=window_size, refresh=True ) # Get reference timeframe for consistency reference_tf = min( self.config['market_data']['timeframes'], key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600) ) # Check available samples min_samples = min(X_train_dict[tf].shape[0] for tf in self.config['market_data']['timeframes']) self.logger.info(f"Using {min_samples} samples for training") # Get the feature count per timeframe features_per_tf = X_train_dict[reference_tf].shape[2] total_features = features_per_tf * len(self.config['market_data']['timeframes']) self.logger.info(f"Features per timeframe: {features_per_tf}, Total features: {total_features}") # Log timeframe data shapes for debugging for tf in self.config['market_data']['timeframes']: self.logger.info(f"Timeframe {tf} data shape: {X_train_dict[tf].shape}") # Prepare concatenated inputs for each sample across all timeframes # Shape will be [samples, window_size, features*num_timeframes] X_train_combined = np.zeros((min_samples, window_size, total_features)) # Fill the array with data from all timeframes for i in range(min_samples): # For each timeframe, stack the features horizontally for the same window for tf_idx, tf in enumerate(self.config['market_data']['timeframes']): # Place this timeframe's features at the appropriate position start_idx = tf_idx * features_per_tf end_idx = (tf_idx + 1) * features_per_tf X_train_combined[i, :, start_idx:end_idx] = X_train_dict[tf][i] # For validation data - ensure we have validation data by splitting training data if needed if X_val_dict is None or y_val is None or min(X_val_dict[tf].shape[0] for tf in self.config['market_data']['timeframes']) == 0: # No validation data provided, use a portion of training data self.logger.info("No validation data available, using 20% of training data for validation") train_size = int(0.8 * min_samples) # Split the training data X_train_split = X_train_combined[:train_size] y_train_split = y_train[:train_size] X_val_combined = X_train_combined[train_size:min_samples] y_val_np = y_train[train_size:min_samples] # Update training data X_train_combined = X_train_split y_train_np = y_train_split else: # For validation data min_val_samples = min(X_val_dict[tf].shape[0] for tf in self.config['market_data']['timeframes']) X_val_combined = np.zeros((min_val_samples, window_size, features_per_tf * len(self.config['market_data']['timeframes']))) for i in range(min_val_samples): for tf_idx, tf in enumerate(self.config['market_data']['timeframes']): start_idx = tf_idx * features_per_tf end_idx = (tf_idx + 1) * features_per_tf X_val_combined[i, :, start_idx:end_idx] = X_val_dict[tf][i] y_train_np = y_train[:min_samples] y_val_np = y_val[:min_val_samples] self.logger.info(f"Prepared data: X_train shape: {X_train_combined.shape}, X_val shape: {X_val_combined.shape}") # Reset and initialize chart for trading information if self.chart: # Reset trading stats on the chart if hasattr(self.chart, 'positions'): self.chart.positions = [] if hasattr(self.chart, 'accumulative_pnl'): self.chart.accumulative_pnl = 0.0 if hasattr(self.chart, 'current_balance'): self.chart.current_balance = 100.0 if hasattr(self.chart, 'update_trading_info'): self.chart.update_trading_info( action="INIT", prediction=None, price=0.0, timestamp=int(time.time() * 1000) ) # Create a custom training loop instead of using the model's train method # This gives us more control over the process self.supervised_model.model.train() # History to store metrics history = { 'loss': [], 'val_loss': [], 'accuracy': [], 'val_accuracy': [], 'val_pnl': [] } # Convert data to tensors X_train_tensor = torch.tensor(X_train_combined, dtype=torch.float32).to(self.device) y_train_tensor = torch.tensor(y_train_np, dtype=torch.long).to(self.device) X_val_tensor = torch.tensor(X_val_combined, dtype=torch.float32).to(self.device) y_val_tensor = torch.tensor(y_val_np, dtype=torch.long).to(self.device) # Verify that model's feature dimensions match the input data if hasattr(self.supervised_model, 'total_features'): expected_features = X_train_combined.shape[2] if self.supervised_model.total_features != expected_features: self.logger.warning(f"Model features ({self.supervised_model.total_features}) don't match input features ({expected_features})") self.logger.info(f"Updating model's total_features to match input data") self.supervised_model.total_features = expected_features # Rebuild the layers with correct dimensions if hasattr(self.supervised_model, '_create_layers'): self.supervised_model._create_layers() self.supervised_model.to(self.device) # Reinitialize optimizer after changing the model self.supervised_model.optimizer = optim.Adam( self.supervised_model.parameters(), lr=0.0001, weight_decay=0.01 ) # Create dataloaders train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.config['training']['batch_size'], shuffle=True ) val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=self.config['training']['batch_size'] ) # Training loop for epoch in range(epochs): # Training phase self.supervised_model.model.train() train_loss = 0.0 train_correct = 0 train_total = 0 for inputs, targets in train_loader: # Zero the parameter gradients self.supervised_model.optimizer.zero_grad() # Forward pass outputs, _ = self.supervised_model.model(inputs) loss = self.supervised_model.criterion(outputs, targets) # Backward pass and optimize loss.backward() self.supervised_model.optimizer.step() # Statistics train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) train_total += targets.size(0) train_correct += (predicted == targets).sum().item() # Calculate training metrics train_loss = train_loss / len(train_loader) train_accuracy = 100 * train_correct / train_total if train_total > 0 else 0 # Validation phase self.supervised_model.model.eval() val_loss = 0.0 val_correct = 0 val_total = 0 all_predictions = [] all_targets = [] with torch.no_grad(): for inputs, targets in val_loader: # Forward pass outputs, _ = self.supervised_model.model(inputs) loss = self.supervised_model.criterion(outputs, targets) # Statistics val_loss += loss.item() _, predicted = torch.max(outputs.data, 1) val_total += targets.size(0) val_correct += (predicted == targets).sum().item() # Store for PnL calculation all_predictions.append(predicted.cpu().numpy()) all_targets.append(targets.cpu().numpy()) # Calculate validation metrics val_loss = val_loss / len(val_loader) val_accuracy = 100 * val_correct / val_total if val_total > 0 else 0 # Calculate PnL using the robust DataInterface method all_predictions = np.concatenate(all_predictions) # We need the corresponding prices for the validation set # Fetch the raw prices used for validation data val_prices_start_index = len(y_train_np) # Assuming validation data follows training data val_prices_end_index = val_prices_start_index + len(y_val_np) # Get prices from the reference timeframe dataframe prepared earlier if hasattr(self.data_interface, 'dataframes') and reference_tf in self.data_interface.dataframes: reference_df = self.data_interface.dataframes[reference_tf] # Ensure indices align with the X_val_combined data length # We need prices corresponding to the END of each window in validation price_indices = np.arange(len(X_train_combined) + window_size -1 , len(X_train_combined) + len(X_val_combined) + window_size - 1) # Clamp indices to be within bounds of the reference dataframe price_indices = np.clip(price_indices, 0, len(reference_df) - 1) if len(price_indices) == len(all_predictions): actual_val_prices = reference_df['close'].iloc[price_indices].values pnl, win_rate, _ = self.data_interface.calculate_pnl(all_predictions, actual_val_prices) self.logger.info(f"PnL calculation (robust) - Trades based on {len(actual_val_prices)} prices. Net PnL: {pnl:.4f}, Win Rate: {win_rate:.2f}") else: self.logger.warning(f"Price indices length ({len(price_indices)}) doesn't match predictions length ({len(all_predictions)}). Cannot calculate robust PnL.") pnl, win_rate = 0.0, 0.0 # Fallback else: self.logger.warning("Reference timeframe data not available for robust PnL calculation.") pnl, win_rate = 0.0, 0.0 # Fallback # Update history history['loss'].append(train_loss) history['val_loss'].append(val_loss) history['accuracy'].append(train_accuracy) history['val_accuracy'].append(val_accuracy) history['val_pnl'].append(pnl) # Update stats stats["completed"] += 1 stats["final_loss"] = val_loss if pnl > stats["best_val_pnl"]: stats["best_val_pnl"] = pnl # Save best model by PnL model_path = self.models_dir / "supervised_model_best.pt" self.supervised_model.save(str(model_path)) self.logger.info(f"New best CNN model saved with PnL: {pnl:.2f}") if win_rate > stats["best_win_rate"]: stats["best_win_rate"] = win_rate # Log epoch results self.logger.info(f"Epoch {epoch+1}/{epochs} - Train loss: {train_loss:.4f}, " + f"Train acc: {train_accuracy:.2f}%, Val loss: {val_loss:.4f}, " + f"Val acc: {val_accuracy:.2f}%, PnL: {pnl:.2f}, Win rate: {win_rate:.2f}") # Log to TensorBoard if self.tensorboard_writer: self.tensorboard_writer.add_scalar('SupervisedTrain/Loss', train_loss, self.supervised_epochs + epoch) self.tensorboard_writer.add_scalar('SupervisedTrain/Accuracy', train_accuracy, self.supervised_epochs + epoch) self.tensorboard_writer.add_scalar('SupervisedVal/Loss', val_loss, self.supervised_epochs + epoch) self.tensorboard_writer.add_scalar('SupervisedVal/Accuracy', val_accuracy, self.supervised_epochs + epoch) self.tensorboard_writer.add_scalar('SupervisedVal/PnL', pnl, self.supervised_epochs + epoch) self.tensorboard_writer.add_scalar('SupervisedVal/WinRate', win_rate * 100, self.supervised_epochs + epoch) # Update chart with model predictions if self.chart and epoch % 2 == 0: # Update every other epoch # Use the model to make predictions on some validation data for visualization try: # Choose a subset of validation data for visualization viz_size = min(20, len(X_val_tensor)) viz_indices = np.random.choice(len(X_val_tensor), viz_size, replace=False) viz_inputs = X_val_tensor[viz_indices] viz_targets = y_val_tensor[viz_indices] # Get predictions self.supervised_model.model.eval() with torch.no_grad(): outputs, _ = self.supervised_model.model(viz_inputs) probs = F.softmax(outputs, dim=1) _, predictions = torch.max(probs, 1) # Display last few predictions in the chart for i in range(min(5, viz_size)): timestamp_ms = int(time.time() * 1000) + i * 1000 # Space them out # Get prediction and target pred_idx = predictions[i].item() target_idx = viz_targets[i].item() action_names = ["BUY", "HOLD", "SELL"] pred_action = action_names[pred_idx] # Get confidence confidence = probs[i, pred_idx].item() # Add to chart if hasattr(self.chart, 'latest_price') and self.chart.latest_price is not None: display_price = self.chart.latest_price else: display_price = 20000 + np.random.randn() * 100 # Placeholder price for BTC # Add signal to chart if hasattr(self.chart, 'add_nn_signal'): self.chart.add_nn_signal( symbol=self.config['market_data']['symbol'], signal=pred_action, confidence=confidence, timestamp=timestamp_ms ) # Update trading info if hasattr(self.chart, 'update_trading_info'): self.chart.update_trading_info( action="EPOCH_VIZ", prediction=f"SV Acc: {val_accuracy:.1f}%, PnL: {pnl:.1f}", price=display_price, timestamp=int(time.time() * 1000) ) except Exception as e: self.logger.warning(f"Error updating chart during supervised viz: {str(e)}") # Update supervised epochs counter self.supervised_epochs += epochs except Exception as e: self.logger.error(f"Error in supervised learning: {str(e)}") import traceback self.logger.error(traceback.format_exc()) return stats def train_reinforcement(self, episodes=2): """Train the RL agent""" stats = { "episodes": episodes, "completed": 0, "best_reward": -float('inf'), "final_reward": 0, "avg_reward": 0 } self.logger.info(f"Training RL agent for {episodes} episodes") try: # Prepare data for RL training window_size = self.config['market_data']['window_size'] X_train_dict, y_train, _, _, _, _ = self.data_interface.prepare_training_data( window_size=window_size, refresh=True # Ensure we get relatively fresh data for this iteration ) if X_train_dict is None or not X_train_dict or y_train is None: self.logger.error("Failed to get training data for RL phase.") return stats # Get reference timeframe reference_tf = min( self.config['market_data']['timeframes'], key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600) ) # Find minimum length across all timeframes min_length = min(len(X_train_dict[tf]) for tf in self.config['market_data']['timeframes'] if X_train_dict[tf] is not None) if min_length <= window_size + 1: self.logger.error(f"Not enough data samples ({min_length}) for RL training with window size {window_size}.") return stats self.logger.info(f"Using {min_length} samples from each timeframe for RL training preparation") # For DQN we need to reshape data according to state_shape=(timeframes, window_size, features) states = [] num_features = X_train_dict[reference_tf].shape[2] num_timeframes = len(self.config['market_data']['timeframes']) for i in range(min_length - 1): # -1 to ensure we have next states state = np.zeros((num_timeframes, window_size, num_features), dtype=np.float32) valid_state = True for tf_idx, tf in enumerate(self.config['market_data']['timeframes']): if X_train_dict[tf] is None or len(X_train_dict[tf]) <= i: valid_state = False break state[tf_idx] = X_train_dict[tf][i] if valid_state: states.append(state) else: # This should ideally not happen if min_length was calculated correctly self.logger.warning(f"Skipping state preparation at index {i} due to insufficient data in a timeframe.") # Get actions from labels (corresponding to the prepared states) actions = [] # Ensure y_train is sliced correctly to match the number of prepared states num_states = len(states) if len(y_train) >= num_states: y_train_sliced = y_train[:num_states] for i in range(num_states): # Ensure y_train_sliced[i] is a valid array/list before argmax if isinstance(y_train_sliced[i], (np.ndarray, list)) and len(y_train_sliced[i]) > 0: actions.append(np.argmax(y_train_sliced[i])) else: # Handle cases where y_train_sliced[i] might be invalid self.logger.warning(f"Invalid label found at index {i}, using default action (HOLD=1). Label: {y_train_sliced[i]}") actions.append(1) # Default to HOLD else: self.logger.error(f"Mismatch between number of states ({num_states}) and labels ({len(y_train)}). Cannot proceed with RL training.") return stats self.logger.info(f"Prepared {len(states)} state-action pairs for RL training") if not states: self.logger.error("No states were prepared for RL training.") return stats # --- Pre-calculate Supervised Predictions --- self.logger.info("Pre-calculating supervised model predictions for RL states...") sv_predictions = [] try: self.supervised_model.model.eval() # Set model to evaluation mode with torch.no_grad(): # Reshape states for supervised model: [batch, window_size, features*num_timeframes] reshaped_states_list = [] for state in states: # state shape: [timeframes, window_size, features] # Target shape: [window_size, features*num_timeframes] reshaped_state = state.transpose(1, 0, 2).reshape(window_size, -1) reshaped_states_list.append(reshaped_state) if reshaped_states_list: reshaped_states_batch = np.array(reshaped_states_list) states_tensor = torch.tensor(reshaped_states_batch, dtype=torch.float32).to(self.device) # Process in batches if necessary to avoid memory issues sv_batch_size = 128 num_batches = int(np.ceil(len(states_tensor) / sv_batch_size)) for j in range(num_batches): batch_start = j * sv_batch_size batch_end = min((j + 1) * sv_batch_size, len(states_tensor)) batch_tensor = states_tensor[batch_start:batch_end] outputs, _ = self.supervised_model.model(batch_tensor) _, predicted_actions = torch.max(outputs.data, 1) sv_predictions.extend(predicted_actions.cpu().numpy()) self.logger.info(f"Finished pre-calculating {len(sv_predictions)} supervised predictions.") if len(sv_predictions) != len(states): self.logger.error(f"Mismatch in supervised predictions ({len(sv_predictions)}) and states ({len(states)}). Aborting RL phase.") return stats except Exception as e: self.logger.error(f"Error during supervised prediction pre-calculation: {e}") import traceback self.logger.error(traceback.format_exc()) return stats # Cannot proceed without supervised predictions for consensus # Reset and initialize chart for trading information if self.chart: # Reset trading stats on the chart if hasattr(self.chart, 'positions'): self.chart.positions = [] if hasattr(self.chart, 'accumulative_pnl'): self.chart.accumulative_pnl = 0.0 if hasattr(self.chart, 'current_balance'): self.chart.current_balance = 100.0 if hasattr(self.chart, 'update_trading_info'): self.chart.update_trading_info(action="INIT", prediction=None, price=0.0, timestamp=int(time.time() * 1000)) # Training loop for episode in range(episodes): # --- Check and potentially bump epsilon --- if self.rl_agent.epsilon <= self.rl_agent.epsilon_min + 1e-6: # Check if epsilon is at/near minimum # Bump epsilon slightly to encourage exploration if stuck bump_value = 0.1 self.rl_agent.epsilon = min(self.rl_agent.epsilon_min + bump_value, self.rl_agent.epsilon_start) self.logger.warning(f"RL agent epsilon was at minimum. Bumped to {self.rl_agent.epsilon:.4f} for episode {episode+1}") if not running: self.logger.info("RL training interrupted") break episode_reward = 0 correct_actions = 0 consensus_actions = 0 # Sample a segment of the data # Ensure segment size is reasonable and doesn't exceed available states segment_size = min(200, len(states) -1) # Max 200 steps or available data if segment_size <= 0: self.logger.warning(f"Not enough states ({len(states)}) to form a training segment. Skipping episode {episode+1}.") continue start_idx = np.random.randint(0, len(states) - segment_size) if len(states) > segment_size else 0 end_idx = start_idx + segment_size self.logger.info(f"RL Episode {episode+1}/{episodes}: Training on segment [{start_idx}:{end_idx}]") # Train on segment for i in range(start_idx, end_idx): state = states[i] # Original intended action based on labels true_action = actions[i] # Get RL agent's predicted action rl_pred_action = self.rl_agent.act(state) # Get pre-calculated supervised prediction sv_pred_action = sv_predictions[i] next_state = states[i + 1] # Calculate reward based on price change (standard reward) try: # Ensure indices are valid for X_train_dict if i < len(X_train_dict[reference_tf]) and i+1 < len(X_train_dict[reference_tf]): price_current = X_train_dict[reference_tf][i][-1, -1] # Closing price price_next = X_train_dict[reference_tf][i+1][-1, -1] price_change = (price_next - price_current) / price_current if price_current != 0 else 0 else: price_change = 0 self.logger.warning(f"Index {i} or {i+1} out of bounds for price calculation.") except IndexError: price_change = 0 self.logger.warning(f"IndexError during price calculation at step {i}. Using price_change = 0.") except Exception as e: price_change = 0 self.logger.error(f"Unexpected error during price calculation: {e}") # Define standard reward based on the RL agent's action and outcome if rl_pred_action == 0: # Buy reward = price_change * 100 elif rl_pred_action == 2: # Sell reward = -price_change * 100 else: # Hold (action 1) # Penalize holding during significant moves, slightly reward holding in stable periods reward = -abs(price_change) * 50 if abs(price_change) > 0.0005 else abs(price_change) * 10 # --- Apply Consensus Modifier --- consensus_met = (sv_pred_action == rl_pred_action) if not consensus_met and rl_pred_action != 1: # If actions disagree and RL didn't choose HOLD reward -= 5 # REDUCED Penalty for disagreement # self.logger.debug(f"Step {i}: RL ({rl_pred_action}) vs SV ({sv_pred_action}) disagree. Penalty applied.") elif consensus_met and rl_pred_action != 1: consensus_actions += 1 # Count consensus non-hold actions # Check if RL action matches the true label action if rl_pred_action == true_action: correct_actions += 1 # Remember experience (using the true action from labels, but the modified reward) done = (i == end_idx - 1) self.rl_agent.remember(state, true_action, reward, next_state, done) # Replay experiences periodically if i % 10 == 0: self.rl_agent.replay() episode_reward += reward # Update chart with predicted trading information (no actual trades logged here) if self.chart and i % 5 == 0: timestamp_ms = int(time.time() * 1000) action_names = ["BUY", "HOLD", "SELL"] action_name = action_names[rl_pred_action] # Show RL's predicted action # Display price logic... (remains the same) if hasattr(self.chart, 'latest_price') and self.chart.latest_price is not None: display_price = self.chart.latest_price else: display_price = price_current if 'price_current' in locals() else 0 # Add predicted signal to chart if hasattr(self.chart, 'add_nn_signal'): # Indicate consensus in the signal display if possible signal_text = f"{action_name}{'*' if consensus_met else ''}" self.chart.add_nn_signal( symbol=self.config['market_data']['symbol'], signal=signal_text, # Append '*' for consensus confidence=0.7, # Placeholder timestamp=timestamp_ms ) # Update info display if hasattr(self.chart, 'update_trading_info'): consensus_status = "Yes" if consensus_met else "No" info_text = f"RL: {action_name}, SV: {action_names[sv_pred_action]}, Consensus: {consensus_status}" self.chart.update_trading_info( action=action_name, # Still show RL action mainly prediction=info_text, # Add consensus info price=display_price, timestamp=timestamp_ms ) # Calculate accuracy & consensus rate for the episode segment_len = end_idx - start_idx accuracy = (correct_actions / segment_len) * 100 if segment_len > 0 else 0 consensus_rate = (consensus_actions / segment_len) * 100 if segment_len > 0 else 0 # Rate of non-hold consensus actions # Update the chart with final episode metrics if self.chart: # Keep updating the text display if needed if hasattr(self.chart, 'update_trading_info'): self.chart.update_trading_info( action="RL_EP_END", prediction=f"Reward: {episode_reward:.1f}, Acc: {accuracy:.1f}%, Cons: {consensus_rate:.1f}%", price=getattr(self.chart, 'latest_price', 0), timestamp=int(time.time() * 1000) ) # Log results self.logger.info(f"RL Episode {episode+1} - Reward: {episode_reward:.2f}, " + f"Accuracy: {accuracy:.2f}%, Consensus Rate: {consensus_rate:.2f}%, Epsilon: {self.rl_agent.epsilon:.4f}") # Update stats stats["completed"] += 1 stats["final_reward"] = episode_reward stats["avg_reward"] = self.rl_agent.avg_reward # Save best model based on reward if episode_reward > stats["best_reward"]: stats["best_reward"] = episode_reward self.rl_agent.save(str(self.models_dir / "rl_agent_best")) self.logger.info(f"New best RL model saved with reward: {episode_reward:.2f}") self.rl_episodes += episodes except Exception as e: self.logger.error(f"Error in reinforcement learning: {str(e)}") import traceback self.logger.error(traceback.format_exc()) return stats def _update_training_stats(self, sv_stats, rl_stats): """Update global training statistics""" global training_stats # Ensure sv_stats has the necessary keys if not isinstance(sv_stats, dict) or "completed" not in sv_stats: self.logger.warning("Supervised training stats missing expected keys, using defaults") sv_stats = { "completed": 0, "best_val_pnl": -float('inf'), "best_win_rate": 0, "final_loss": 0 } # Ensure rl_stats has the necessary keys if not isinstance(rl_stats, dict) or "completed" not in rl_stats: self.logger.warning("RL training stats missing expected keys, using defaults") rl_stats = { "completed": 0, "best_reward": -float('inf'), "final_reward": 0, "avg_reward": 0 } # Update supervised stats training_stats["supervised"]["epochs_completed"] += sv_stats.get("completed", 0) if sv_stats.get("best_val_pnl", -float('inf')) > training_stats["supervised"]["best_val_pnl"]: training_stats["supervised"]["best_val_pnl"] = sv_stats["best_val_pnl"] training_stats["supervised"]["best_epoch"] = self.supervised_epochs if sv_stats.get("best_win_rate", 0) > training_stats["supervised"]["best_win_rate"]: training_stats["supervised"]["best_win_rate"] = sv_stats["best_win_rate"] # Update reinforcement stats training_stats["reinforcement"]["episodes_completed"] += rl_stats.get("completed", 0) if rl_stats.get("best_reward", -float('inf')) > training_stats["reinforcement"]["best_reward"]: training_stats["reinforcement"]["best_reward"] = rl_stats["best_reward"] training_stats["reinforcement"]["best_episode"] = self.rl_episodes # Update hybrid stats training_stats["hybrid"]["iterations_completed"] = self.iter_count # Calculate a combined score (simple weighted average) # Ensure values are valid numbers before calculation sv_pnl_score = training_stats["supervised"]["best_val_pnl"] if isinstance(training_stats["supervised"]["best_val_pnl"], (int, float)) and np.isfinite(training_stats["supervised"]["best_val_pnl"]) else 0.0 rl_reward_score = training_stats["reinforcement"]["best_reward"] if isinstance(training_stats["reinforcement"]["best_reward"], (int, float)) and np.isfinite(training_stats["reinforcement"]["best_reward"]) else 0.0 combined_score = (sv_pnl_score * 0.5) + (rl_reward_score * 0.5) if combined_score > training_stats["hybrid"]["best_combined_score"]: training_stats["hybrid"]["best_combined_score"] = combined_score training_stats["hybrid"]["last_update"] = datetime.now().isoformat() # Log updated stats self.logger.info(f"Updated training stats - Combined score: {combined_score:.2f}") def _save_models_and_stats(self): """Save models and training statistics""" # Save models (Best models are saved within their respective training methods) # Consider saving latest models here if needed # Save stats to JSON stats_path = self.models_dir / f"hybrid_stats_{timestamp}.json" try: with open(stats_path, 'w') as f: # Use a custom JSON encoder for numpy types if necessary json.dump(training_stats, f, indent=2, default=lambda x: float(x) if isinstance(x, (np.float32, np.float64)) else x) # Also save to a consistent filename for easy access latest_path = self.models_dir / "hybrid_stats_latest.json" with open(latest_path, 'w') as f: json.dump(training_stats, f, indent=2, default=lambda x: float(x) if isinstance(x, (np.float32, np.float64)) else x) self.logger.info(f"Saved training stats to {stats_path} and {latest_path}") except Exception as e: self.logger.error(f"Error saving training stats: {e}") def _log_to_tensorboard(self, iteration, sv_stats, rl_stats): """Log metrics to TensorBoard""" if not self.tensorboard_writer: return # Ensure stats are dictionaries sv_stats = sv_stats or {} rl_stats = rl_stats or {} # Log supervised metrics self.tensorboard_writer.add_scalar('Supervised/FinalLoss_PerIter', sv_stats.get("final_loss", 0), iteration) self.tensorboard_writer.add_scalar('Supervised/BestPnL_Overall', training_stats['supervised']['best_val_pnl'], iteration) self.tensorboard_writer.add_scalar('Supervised/BestWinRate_Overall', training_stats['supervised']['best_win_rate'], iteration) # Log RL metrics self.tensorboard_writer.add_scalar('RL/FinalReward_PerIter', rl_stats.get("final_reward", 0), iteration) self.tensorboard_writer.add_scalar('RL/BestReward_Overall', training_stats['reinforcement']['best_reward'], iteration) self.tensorboard_writer.add_scalar('RL/AvgReward_PerIter', rl_stats.get("avg_reward", 0), iteration) self.tensorboard_writer.add_scalar('RL/Epsilon_Current', self.rl_agent.epsilon if self.rl_agent else 0, iteration) # Log combined metrics combined_score = training_stats['hybrid']['best_combined_score'] self.tensorboard_writer.add_scalar('Hybrid/CombinedScore_Overall', combined_score, iteration) self.tensorboard_writer.add_scalar('Hybrid/Iterations', self.iter_count, iteration) def main(): """Main entry point""" # Parse command line arguments parser = argparse.ArgumentParser(description='Hybrid Training with CPU Compatibility Fixes') parser.add_argument('--iterations', type=int, default=10, help='Number of hybrid iterations') parser.add_argument('--sv-epochs', type=int, default=5, help='Supervised epochs per iteration') parser.add_argument('--rl-episodes', type=int, default=2, help='RL episodes per iteration') parser.add_argument('--symbol', type=str, default='BTC/USDT', help='Trading symbol') parser.add_argument('--timeframes', type=str, default='1m,5m,15m', help='Comma-separated timeframes') parser.add_argument('--window', type=int, default=24, help='Window size for input data') parser.add_argument('--batch-size', type=int, default=64, help='Batch size for training') parser.add_argument('--new-model', action='store_true', help='Start with new models instead of loading existing') parser.add_argument('--no-dashboard', action='store_true', help='Disable web dashboard') parser.add_argument('--dashboard-port', type=int, default=8050, help='Port for web dashboard') args = parser.parse_args() # Create custom config custom_config = { 'market_data': { 'symbol': args.symbol, 'timeframes': args.timeframes.split(','), 'window_size': args.window }, 'training': { 'batch_size': args.batch_size, 'learning_rate': 0.0001, # Conservative learning rate }, 'hardware': { 'device': 'cpu', # Force CPU 'mixed_precision': False # Disable mixed precision }, 'model': { 'new_model': args.new_model }, 'visualization': { 'enabled': not args.no_dashboard, 'port': args.dashboard_port } } # Get config from train_config config = train_config.get_config('hybrid', custom_config) # Save the config for reference config_dir = Path('configs') config_dir.mkdir(exist_ok=True) train_config.save_config(config, f"configs/hybrid_training_{timestamp}.json") # Initialize the hybrid model model = HybridModel(config) if not model.initialize(): logger.error("Failed to initialize hybrid model") return # Show instructions for the web dashboard if enabled if not args.no_dashboard: dash_url = f"http://localhost:{args.dashboard_port}" logger.info(f"Web dashboard is enabled at {dash_url}") logger.info("You can monitor training progress, see predictions and track PnL in real-time.") logger.info("Press Ctrl+C to gracefully terminate training (models will be saved).") # Run the training stats = model.train_hybrid( iterations=args.iterations, sv_epochs_per_iter=args.sv_epochs, rl_episodes_per_iter=args.rl_episodes ) # Log final results logger.info("Training completed successfully") logger.info(f"Best supervised PnL: {stats['supervised']['best_val_pnl']:.4f}") logger.info(f"Best RL reward: {stats['reinforcement']['best_reward']:.4f}") logger.info(f"Best combined score: {stats['hybrid']['best_combined_score']:.4f}") # Close TensorBoard writer if model.tensorboard_writer: model.tensorboard_writer.close() if __name__ == "__main__": main()