gogo2/train_hybrid_fixed.py
2025-05-24 00:59:29 +03:00

1247 lines
60 KiB
Python

#!/usr/bin/env python
"""
Hybrid Training Script with Device Compatibility Fixes
This is a fixed version of the hybrid training script that:
1. Forces CPU use to avoid CUDA/device mismatch errors
2. Adds better error handling and recovery for model initialization
3. Implements direct model movement to CPU
Usage:
python train_hybrid_fixed.py --iterations 10 --sv-epochs 5 --rl-episodes 2
"""
import os
import sys
import logging
import argparse
import numpy as np
import torch
import time
import json
import asyncio
import signal
import threading
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
from torch import optim
import torch.nn.functional as F
# Force CPU usage to avoid device mismatch errors
os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['DISABLE_MIXED_PRECISION'] = '1'
# Force PyTorch to use CPU
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
os.environ['PYTORCH_JIT'] = '0'
# Disable CUDA completely in PyTorch
torch.cuda.is_available = lambda: False
# Add project root to path if needed
project_root = os.path.dirname(os.path.abspath(__file__))
if project_root not in sys.path:
sys.path.append(project_root)
# Import configurations
import train_config
# Import key components
from NN.models.cnn_model_pytorch import CNNModelPyTorch, CNNPyTorch
from NN.models.dqn_agent import DQNAgent
from dataprovider_realtime import MultiTimeframeDataInterface, RealTimeChart
from NN.utils.signal_interpreter import SignalInterpreter
# Configure logging
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = log_dir / f"hybrid_training_{timestamp}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
logger = logging.getLogger('hybrid_training')
# Global variables for graceful shutdown
running = True
training_stats = {
"supervised": {
"epochs_completed": 0,
"best_val_pnl": -float('inf'),
"best_epoch": 0,
"best_win_rate": 0
},
"reinforcement": {
"episodes_completed": 0,
"best_reward": -float('inf'),
"best_episode": 0,
"best_win_rate": 0
},
"hybrid": {
"iterations_completed": 0,
"best_combined_score": -float('inf'),
"training_started": datetime.now().isoformat(),
"last_update": datetime.now().isoformat()
}
}
# Configure signal handler for graceful shutdown
def signal_handler(sig, frame):
global running
logger.info("Received interrupt signal. Finishing current training cycle and saving models...")
running = False
# Register signal handler
signal.signal(signal.SIGINT, signal_handler)
class HybridModel:
"""
Hybrid model that combines supervised CNN learning with RL-based decision optimization
"""
def __init__(self, config):
self.config = config
# Force CPU for all operations
config['hardware']['device'] = 'cpu'
config['hardware']['mixed_precision'] = False
self.device = torch.device('cpu')
self.supervised_model = None
self.rl_agent = None
self.data_interface = None
self.signal_interpreter = None
self.chart = None
# Training stats
self.tensorboard_writer = None
self.iter_count = 0
self.supervised_epochs = 0
self.rl_episodes = 0
# Initialize logging
self.logger = logging.getLogger('hybrid_model')
# Paths
self.models_dir = Path(config['paths']['models_dir'])
self.models_dir.mkdir(exist_ok=True, parents=True)
def initialize(self):
"""Initialize all components of the hybrid model"""
# Set up TensorBoard
tb_dir = Path(self.config['paths']['tensorboard_dir'])
tb_dir.mkdir(exist_ok=True, parents=True)
log_dir = tb_dir / f"hybrid_{timestamp}"
self.tensorboard_writer = SummaryWriter(log_dir=str(log_dir))
self.logger.info(f"TensorBoard initialized at {log_dir}")
# Initialize data interface
symbol = self.config['market_data']['symbol']
timeframes = self.config['market_data']['timeframes']
window_size = self.config['market_data']['window_size']
self.logger.info(f"Initializing data interface for {symbol} with timeframes {timeframes}")
self.data_interface = MultiTimeframeDataInterface(
symbol=symbol,
timeframes=timeframes
)
# Initialize supervised model (CNN)
self._initialize_supervised_model(window_size)
# Initialize RL agent
self._initialize_rl_agent(window_size)
# Initialize signal interpreter
self.signal_interpreter = SignalInterpreter(config={
'buy_threshold': 0.65,
'sell_threshold': 0.65,
'hold_threshold': 0.75,
'trend_filter_enabled': True,
'volume_filter_enabled': True
})
# Initialize chart if visualization is enabled
if self.config.get('visualization', {}).get('enabled', False):
self._initialize_chart()
return True
def _initialize_supervised_model(self, window_size):
"""Initialize the supervised CNN model"""
try:
# Get data shape information
X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data(
window_size=window_size,
refresh=True
)
if X_train_dict is None or y_train is None:
raise ValueError("Failed to load training data")
# Get reference timeframe (lowest timeframe)
reference_tf = min(
self.config['market_data']['timeframes'],
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
)
# Get feature count from the data
features_per_tf = X_train_dict[reference_tf].shape[2]
total_features = features_per_tf * len(self.config['market_data']['timeframes'])
# Initialize model
self.logger.info(f"Initializing CNN model with {total_features} features")
self.supervised_model = CNNModelPyTorch(
window_size=window_size,
timeframes=self.config['market_data']['timeframes'],
output_size=3, # BUY/HOLD/SELL
num_pairs=1 # Single pair for now
)
# Create a new model instance with the correct input shape
if hasattr(self.supervised_model, 'model'):
# The underlying model needs to be recreated with the correct input shape
input_shape = (window_size, total_features)
# Force CPU device for this model
self.supervised_model.device = self.device
# Create a new CNNPyTorch model on the CPU
new_model = CNNPyTorch(input_shape, self.supervised_model.output_size)
new_model.device = self.device
new_model.to(self.device)
# Make sure class_weights tensor is on CPU
if hasattr(new_model, 'class_weights'):
new_model.class_weights = new_model.class_weights.to(self.device)
# Replace the model
self.supervised_model.model = new_model
# Reinitialize the optimizer
self.supervised_model.optimizer = optim.Adam(
self.supervised_model.model.parameters(),
lr=0.0001,
weight_decay=0.01
)
# Initialize the criterion (missing in the model)
self.supervised_model.criterion = torch.nn.CrossEntropyLoss()
# Ensure model is on CPU
self.supervised_model.device = self.device
if hasattr(self.supervised_model, 'model'):
self.supervised_model.model.to(self.device)
# Load existing model if available and not creating new model
model_path = self.models_dir / "supervised_model_best.pt"
if model_path.exists() and not self.config.get('model', {}).get('new_model', False):
self.logger.info(f"Loading existing CNN model from {model_path}")
try:
self.supervised_model.load(str(model_path))
self.logger.info("CNN model loaded successfully")
except Exception as e:
self.logger.error(f"Error loading CNN model: {str(e)}")
self.logger.info("Starting with a new CNN model")
else:
self.logger.info("Starting with a new CNN model")
except Exception as e:
self.logger.error(f"Error initializing supervised model: {str(e)}")
import traceback
self.logger.error(traceback.format_exc())
raise
def _initialize_rl_agent(self, window_size):
"""Initialize the RL agent"""
try:
# Get data for RL training
X_train_dict, _, _, _, _, _ = self.data_interface.prepare_training_data(
window_size=window_size,
refresh=True
)
if X_train_dict is None:
raise ValueError("Failed to load training data for RL agent")
# Get reference timeframe features
reference_tf = min(
self.config['market_data']['timeframes'],
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
)
# Get feature count from the data
num_features = X_train_dict[reference_tf].shape[2]
# Initialize RL agent
self.logger.info(f"Initializing RL agent")
# State shape for DQN agent: (timeframes, window_size, features)
state_shape = (len(self.config['market_data']['timeframes']), window_size, num_features)
self.rl_agent = DQNAgent(
state_shape=state_shape,
n_actions=3, # BUY/HOLD/SELL
epsilon=1.0,
epsilon_min=0.01,
epsilon_decay=0.995,
learning_rate=self.config['training']['learning_rate'],
gamma=0.95,
buffer_size=10000,
batch_size=self.config['training']['batch_size'],
device=self.device # Explicitly pass CPU device
)
# Explicitly move agent to CPU and force it to stay there
try:
# First set the device in the agent itself
self.rl_agent.device = self.device
# Force PyTorch to use CPU by setting device on each model
if hasattr(self.rl_agent, 'policy_net'):
self.rl_agent.policy_net.to(self.device)
# Force all layers to CPU
for parameter in self.rl_agent.policy_net.parameters():
parameter.data = parameter.data.to(self.device)
if hasattr(self.rl_agent, 'target_net'):
self.rl_agent.target_net.to(self.device)
# Force all layers to CPU
for parameter in self.rl_agent.target_net.parameters():
parameter.data = parameter.data.to(self.device)
# Move models to the specified device
self.rl_agent.move_models_to_device(self.device)
self.logger.info(f"RL agent models moved to {self.device}")
except Exception as e:
self.logger.warning(f"Could not move RL agent models to device: {str(e)}")
# Load existing agent if available and not creating new model
agent_path = self.models_dir / "rl_agent_best"
if os.path.exists(f"{agent_path}_policy.pt") and not self.config.get('model', {}).get('new_model', False):
self.logger.info(f"Loading existing RL agent from {agent_path}")
try:
self.rl_agent.load(str(agent_path))
self.logger.info("RL agent loaded successfully")
except Exception as e:
self.logger.error(f"Error loading RL agent: {str(e)}")
self.logger.info("Starting with a new RL agent")
else:
self.logger.info("Starting with a new RL agent")
# Reset epsilon if training a new model
if self.config.get('model', {}).get('new_model', False):
if hasattr(self.rl_agent, 'epsilon_start'):
self.rl_agent.epsilon = self.rl_agent.epsilon_start
self.logger.info(f"New model requested. Reset RL agent epsilon to starting value: {self.rl_agent.epsilon:.2f}")
else:
# Fallback if epsilon_start isn't defined, assume 1.0
self.rl_agent.epsilon = 1.0
self.logger.info("New model requested. Reset RL agent epsilon to default starting value: 1.00")
except Exception as e:
self.logger.error(f"Error initializing RL agent: {str(e)}")
import traceback
self.logger.error(traceback.format_exc())
raise
def _initialize_chart(self):
"""Initialize the RealTimeChart for visualization"""
try:
symbol = self.config['market_data']['symbol']
self.logger.info(f"Initializing RealTimeChart for {symbol}")
self.chart = RealTimeChart(symbol=symbol)
# Start chart server in a background thread
dashboard_port = self.config.get('visualization', {}).get('port', 8050)
self.logger.info(f"Starting web dashboard for {symbol} on port {dashboard_port}")
self.chart_thread = threading.Thread(
target=lambda: self.chart.run(host='localhost', port=dashboard_port)
)
self.chart_thread.daemon = True # Allow the thread to exit when main program exits
self.chart_thread.start()
self.logger.info(f"Web dashboard started at http://localhost:{dashboard_port}/")
# Also start the websocket connection for real-time data
self.websocket_thread = threading.Thread(
target=lambda: asyncio.run(self.chart.start_websocket())
)
self.websocket_thread.daemon = True
self.websocket_thread.start()
self.logger.info(f"WebSocket connection started for {symbol}")
except Exception as e:
self.logger.error(f"Error initializing chart: {str(e)}")
import traceback
self.logger.error(traceback.format_exc())
self.chart = None
def train_hybrid(self, iterations=10, sv_epochs_per_iter=5, rl_episodes_per_iter=2):
"""
Main hybrid training loop
Args:
iterations: Number of hybrid iterations to run
sv_epochs_per_iter: Number of supervised epochs per iteration
rl_episodes_per_iter: Number of RL episodes per iteration
Returns:
dict: Training statistics
"""
self.logger.info(f"Starting hybrid training with {iterations} iterations")
self.logger.info(f"Each iteration includes {sv_epochs_per_iter} supervised epochs and {rl_episodes_per_iter} RL episodes")
# Training loop
for iteration in range(iterations):
if not running:
self.logger.info("Training stopped by user")
break
self.logger.info(f"Iteration {iteration+1}/{iterations}")
self.iter_count += 1
# 1. Supervised learning phase
self.logger.info("Starting supervised learning phase")
sv_stats = self.train_supervised(epochs=sv_epochs_per_iter)
# 2. Reinforcement learning phase
self.logger.info("Starting reinforcement learning phase")
rl_stats = self.train_reinforcement(episodes=rl_episodes_per_iter)
# 3. Update global training stats
self._update_training_stats(sv_stats, rl_stats)
# 4. Save models and stats
self._save_models_and_stats()
# 5. Log to TensorBoard
if self.tensorboard_writer:
self._log_to_tensorboard(iteration, sv_stats, rl_stats)
self.logger.info("Hybrid training completed")
return training_stats
def train_supervised(self, epochs=5):
"""Train the supervised CNN model"""
stats = {
"epochs": epochs,
"completed": 0,
"best_val_pnl": -float('inf'),
"best_win_rate": 0,
"final_loss": 0
}
self.logger.info(f"Training supervised model for {epochs} epochs")
try:
# Prepare training data
window_size = self.config['market_data']['window_size']
X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data(
window_size=window_size,
refresh=True
)
# Get reference timeframe for consistency
reference_tf = min(
self.config['market_data']['timeframes'],
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
)
# Check available samples
min_samples = min(X_train_dict[tf].shape[0] for tf in self.config['market_data']['timeframes'])
self.logger.info(f"Using {min_samples} samples for training")
# Get the feature count per timeframe
features_per_tf = X_train_dict[reference_tf].shape[2]
total_features = features_per_tf * len(self.config['market_data']['timeframes'])
self.logger.info(f"Features per timeframe: {features_per_tf}, Total features: {total_features}")
# Log timeframe data shapes for debugging
for tf in self.config['market_data']['timeframes']:
self.logger.info(f"Timeframe {tf} data shape: {X_train_dict[tf].shape}")
# Prepare concatenated inputs for each sample across all timeframes
# Shape will be [samples, window_size, features*num_timeframes]
X_train_combined = np.zeros((min_samples, window_size, total_features))
# Fill the array with data from all timeframes
for i in range(min_samples):
# For each timeframe, stack the features horizontally for the same window
for tf_idx, tf in enumerate(self.config['market_data']['timeframes']):
# Place this timeframe's features at the appropriate position
start_idx = tf_idx * features_per_tf
end_idx = (tf_idx + 1) * features_per_tf
X_train_combined[i, :, start_idx:end_idx] = X_train_dict[tf][i]
# For validation data - ensure we have validation data by splitting training data if needed
if X_val_dict is None or y_val is None or min(X_val_dict[tf].shape[0] for tf in self.config['market_data']['timeframes']) == 0:
# No validation data provided, use a portion of training data
self.logger.info("No validation data available, using 20% of training data for validation")
train_size = int(0.8 * min_samples)
# Split the training data
X_train_split = X_train_combined[:train_size]
y_train_split = y_train[:train_size]
X_val_combined = X_train_combined[train_size:min_samples]
y_val_np = y_train[train_size:min_samples]
# Update training data
X_train_combined = X_train_split
y_train_np = y_train_split
else:
# For validation data
min_val_samples = min(X_val_dict[tf].shape[0] for tf in self.config['market_data']['timeframes'])
X_val_combined = np.zeros((min_val_samples, window_size, features_per_tf * len(self.config['market_data']['timeframes'])))
for i in range(min_val_samples):
for tf_idx, tf in enumerate(self.config['market_data']['timeframes']):
start_idx = tf_idx * features_per_tf
end_idx = (tf_idx + 1) * features_per_tf
X_val_combined[i, :, start_idx:end_idx] = X_val_dict[tf][i]
y_train_np = y_train[:min_samples]
y_val_np = y_val[:min_val_samples]
self.logger.info(f"Prepared data: X_train shape: {X_train_combined.shape}, X_val shape: {X_val_combined.shape}")
# Reset and initialize chart for trading information
if self.chart:
# Reset trading stats on the chart
if hasattr(self.chart, 'positions'):
self.chart.positions = []
if hasattr(self.chart, 'accumulative_pnl'):
self.chart.accumulative_pnl = 0.0
if hasattr(self.chart, 'current_balance'):
self.chart.current_balance = 100.0
if hasattr(self.chart, 'update_trading_info'):
self.chart.update_trading_info(
action="INIT",
prediction=None,
price=0.0,
timestamp=int(time.time() * 1000)
)
# Create a custom training loop instead of using the model's train method
# This gives us more control over the process
self.supervised_model.model.train()
# History to store metrics
history = {
'loss': [],
'val_loss': [],
'accuracy': [],
'val_accuracy': [],
'val_pnl': []
}
# Convert data to tensors
X_train_tensor = torch.tensor(X_train_combined, dtype=torch.float32).to(self.device)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.long).to(self.device)
X_val_tensor = torch.tensor(X_val_combined, dtype=torch.float32).to(self.device)
y_val_tensor = torch.tensor(y_val_np, dtype=torch.long).to(self.device)
# Verify that model's feature dimensions match the input data
if hasattr(self.supervised_model, 'total_features'):
expected_features = X_train_combined.shape[2]
if self.supervised_model.total_features != expected_features:
self.logger.warning(f"Model features ({self.supervised_model.total_features}) don't match input features ({expected_features})")
self.logger.info(f"Updating model's total_features to match input data")
self.supervised_model.total_features = expected_features
# Rebuild the layers with correct dimensions
if hasattr(self.supervised_model, '_create_layers'):
self.supervised_model._create_layers()
self.supervised_model.to(self.device)
# Reinitialize optimizer after changing the model
self.supervised_model.optimizer = optim.Adam(
self.supervised_model.parameters(),
lr=0.0001,
weight_decay=0.01
)
# Create dataloaders
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=self.config['training']['batch_size'],
shuffle=True
)
val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
val_loader = torch.utils.data.DataLoader(
val_dataset,
batch_size=self.config['training']['batch_size']
)
# Training loop
for epoch in range(epochs):
# Training phase
self.supervised_model.model.train()
train_loss = 0.0
train_correct = 0
train_total = 0
for inputs, targets in train_loader:
# Zero the parameter gradients
self.supervised_model.optimizer.zero_grad()
# Forward pass
outputs, _ = self.supervised_model.model(inputs)
loss = self.supervised_model.criterion(outputs, targets)
# Backward pass and optimize
loss.backward()
self.supervised_model.optimizer.step()
# Statistics
train_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
train_total += targets.size(0)
train_correct += (predicted == targets).sum().item()
# Calculate training metrics
train_loss = train_loss / len(train_loader)
train_accuracy = 100 * train_correct / train_total if train_total > 0 else 0
# Validation phase
self.supervised_model.model.eval()
val_loss = 0.0
val_correct = 0
val_total = 0
all_predictions = []
all_targets = []
with torch.no_grad():
for inputs, targets in val_loader:
# Forward pass
outputs, _ = self.supervised_model.model(inputs)
loss = self.supervised_model.criterion(outputs, targets)
# Statistics
val_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
val_total += targets.size(0)
val_correct += (predicted == targets).sum().item()
# Store for PnL calculation
all_predictions.append(predicted.cpu().numpy())
all_targets.append(targets.cpu().numpy())
# Calculate validation metrics
val_loss = val_loss / len(val_loader)
val_accuracy = 100 * val_correct / val_total if val_total > 0 else 0
# Calculate PnL using the robust DataInterface method
all_predictions = np.concatenate(all_predictions)
# We need the corresponding prices for the validation set
# Fetch the raw prices used for validation data
val_prices_start_index = len(y_train_np) # Assuming validation data follows training data
val_prices_end_index = val_prices_start_index + len(y_val_np)
# Get prices from the reference timeframe dataframe prepared earlier
if hasattr(self.data_interface, 'dataframes') and reference_tf in self.data_interface.dataframes:
reference_df = self.data_interface.dataframes[reference_tf]
# Ensure indices align with the X_val_combined data length
# We need prices corresponding to the END of each window in validation
price_indices = np.arange(len(X_train_combined) + window_size -1 , len(X_train_combined) + len(X_val_combined) + window_size - 1)
# Clamp indices to be within bounds of the reference dataframe
price_indices = np.clip(price_indices, 0, len(reference_df) - 1)
if len(price_indices) == len(all_predictions):
actual_val_prices = reference_df['close'].iloc[price_indices].values
pnl, win_rate, _ = self.data_interface.calculate_pnl(all_predictions, actual_val_prices)
self.logger.info(f"PnL calculation (robust) - Trades based on {len(actual_val_prices)} prices. Net PnL: {pnl:.4f}, Win Rate: {win_rate:.2f}")
else:
self.logger.warning(f"Price indices length ({len(price_indices)}) doesn't match predictions length ({len(all_predictions)}). Cannot calculate robust PnL.")
pnl, win_rate = 0.0, 0.0 # Fallback
else:
self.logger.warning("Reference timeframe data not available for robust PnL calculation.")
pnl, win_rate = 0.0, 0.0 # Fallback
# Update history
history['loss'].append(train_loss)
history['val_loss'].append(val_loss)
history['accuracy'].append(train_accuracy)
history['val_accuracy'].append(val_accuracy)
history['val_pnl'].append(pnl)
# Update stats
stats["completed"] += 1
stats["final_loss"] = val_loss
if pnl > stats["best_val_pnl"]:
stats["best_val_pnl"] = pnl
# Save best model by PnL
model_path = self.models_dir / "supervised_model_best.pt"
self.supervised_model.save(str(model_path))
self.logger.info(f"New best CNN model saved with PnL: {pnl:.2f}")
if win_rate > stats["best_win_rate"]:
stats["best_win_rate"] = win_rate
# Log epoch results
self.logger.info(f"Epoch {epoch+1}/{epochs} - Train loss: {train_loss:.4f}, " +
f"Train acc: {train_accuracy:.2f}%, Val loss: {val_loss:.4f}, " +
f"Val acc: {val_accuracy:.2f}%, PnL: {pnl:.2f}, Win rate: {win_rate:.2f}")
# Log to TensorBoard
if self.tensorboard_writer:
self.tensorboard_writer.add_scalar('SupervisedTrain/Loss', train_loss, self.supervised_epochs + epoch)
self.tensorboard_writer.add_scalar('SupervisedTrain/Accuracy', train_accuracy, self.supervised_epochs + epoch)
self.tensorboard_writer.add_scalar('SupervisedVal/Loss', val_loss, self.supervised_epochs + epoch)
self.tensorboard_writer.add_scalar('SupervisedVal/Accuracy', val_accuracy, self.supervised_epochs + epoch)
self.tensorboard_writer.add_scalar('SupervisedVal/PnL', pnl, self.supervised_epochs + epoch)
self.tensorboard_writer.add_scalar('SupervisedVal/WinRate', win_rate * 100, self.supervised_epochs + epoch)
# Update chart with model predictions
if self.chart and epoch % 2 == 0: # Update every other epoch
# Use the model to make predictions on some validation data for visualization
try:
# Choose a subset of validation data for visualization
viz_size = min(20, len(X_val_tensor))
viz_indices = np.random.choice(len(X_val_tensor), viz_size, replace=False)
viz_inputs = X_val_tensor[viz_indices]
viz_targets = y_val_tensor[viz_indices]
# Get predictions
self.supervised_model.model.eval()
with torch.no_grad():
outputs, _ = self.supervised_model.model(viz_inputs)
probs = F.softmax(outputs, dim=1)
_, predictions = torch.max(probs, 1)
# Display last few predictions in the chart
for i in range(min(5, viz_size)):
timestamp_ms = int(time.time() * 1000) + i * 1000 # Space them out
# Get prediction and target
pred_idx = predictions[i].item()
target_idx = viz_targets[i].item()
action_names = ["BUY", "HOLD", "SELL"]
pred_action = action_names[pred_idx]
# Get confidence
confidence = probs[i, pred_idx].item()
# Add to chart
if hasattr(self.chart, 'latest_price') and self.chart.latest_price is not None:
display_price = self.chart.latest_price
else:
display_price = 20000 + np.random.randn() * 100 # Placeholder price for BTC
# Add signal to chart
if hasattr(self.chart, 'add_nn_signal'):
self.chart.add_nn_signal(
symbol=self.config['market_data']['symbol'],
signal=pred_action,
confidence=confidence,
timestamp=timestamp_ms
)
# Update trading info
if hasattr(self.chart, 'update_trading_info'):
self.chart.update_trading_info(
action="EPOCH_VIZ",
prediction=f"SV Acc: {val_accuracy:.1f}%, PnL: {pnl:.1f}",
price=display_price,
timestamp=int(time.time() * 1000)
)
except Exception as e:
self.logger.warning(f"Error updating chart during supervised viz: {str(e)}")
# Update supervised epochs counter
self.supervised_epochs += epochs
except Exception as e:
self.logger.error(f"Error in supervised learning: {str(e)}")
import traceback
self.logger.error(traceback.format_exc())
return stats
def train_reinforcement(self, episodes=2):
"""Train the RL agent"""
stats = {
"episodes": episodes,
"completed": 0,
"best_reward": -float('inf'),
"final_reward": 0,
"avg_reward": 0
}
self.logger.info(f"Training RL agent for {episodes} episodes")
try:
# Prepare data for RL training
window_size = self.config['market_data']['window_size']
X_train_dict, y_train, _, _, _, _ = self.data_interface.prepare_training_data(
window_size=window_size,
refresh=True # Ensure we get relatively fresh data for this iteration
)
if X_train_dict is None or not X_train_dict or y_train is None:
self.logger.error("Failed to get training data for RL phase.")
return stats
# Get reference timeframe
reference_tf = min(
self.config['market_data']['timeframes'],
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
)
# Find minimum length across all timeframes
min_length = min(len(X_train_dict[tf]) for tf in self.config['market_data']['timeframes'] if X_train_dict[tf] is not None)
if min_length <= window_size + 1:
self.logger.error(f"Not enough data samples ({min_length}) for RL training with window size {window_size}.")
return stats
self.logger.info(f"Using {min_length} samples from each timeframe for RL training preparation")
# For DQN we need to reshape data according to state_shape=(timeframes, window_size, features)
states = []
num_features = X_train_dict[reference_tf].shape[2]
num_timeframes = len(self.config['market_data']['timeframes'])
for i in range(min_length - 1): # -1 to ensure we have next states
state = np.zeros((num_timeframes, window_size, num_features), dtype=np.float32)
valid_state = True
for tf_idx, tf in enumerate(self.config['market_data']['timeframes']):
if X_train_dict[tf] is None or len(X_train_dict[tf]) <= i:
valid_state = False
break
state[tf_idx] = X_train_dict[tf][i]
if valid_state:
states.append(state)
else:
# This should ideally not happen if min_length was calculated correctly
self.logger.warning(f"Skipping state preparation at index {i} due to insufficient data in a timeframe.")
# Get actions from labels (corresponding to the prepared states)
actions = []
# Ensure y_train is sliced correctly to match the number of prepared states
num_states = len(states)
if len(y_train) >= num_states:
y_train_sliced = y_train[:num_states]
for i in range(num_states):
# Ensure y_train_sliced[i] is a valid array/list before argmax
if isinstance(y_train_sliced[i], (np.ndarray, list)) and len(y_train_sliced[i]) > 0:
actions.append(np.argmax(y_train_sliced[i]))
else:
# Handle cases where y_train_sliced[i] might be invalid
self.logger.warning(f"Invalid label found at index {i}, using default action (HOLD=1). Label: {y_train_sliced[i]}")
actions.append(1) # Default to HOLD
else:
self.logger.error(f"Mismatch between number of states ({num_states}) and labels ({len(y_train)}). Cannot proceed with RL training.")
return stats
self.logger.info(f"Prepared {len(states)} state-action pairs for RL training")
if not states:
self.logger.error("No states were prepared for RL training.")
return stats
# --- Pre-calculate Supervised Predictions ---
self.logger.info("Pre-calculating supervised model predictions for RL states...")
sv_predictions = []
try:
self.supervised_model.model.eval() # Set model to evaluation mode
with torch.no_grad():
# Reshape states for supervised model: [batch, window_size, features*num_timeframes]
reshaped_states_list = []
for state in states:
# state shape: [timeframes, window_size, features]
# Target shape: [window_size, features*num_timeframes]
reshaped_state = state.transpose(1, 0, 2).reshape(window_size, -1)
reshaped_states_list.append(reshaped_state)
if reshaped_states_list:
reshaped_states_batch = np.array(reshaped_states_list)
states_tensor = torch.tensor(reshaped_states_batch, dtype=torch.float32).to(self.device)
# Process in batches if necessary to avoid memory issues
sv_batch_size = 128
num_batches = int(np.ceil(len(states_tensor) / sv_batch_size))
for j in range(num_batches):
batch_start = j * sv_batch_size
batch_end = min((j + 1) * sv_batch_size, len(states_tensor))
batch_tensor = states_tensor[batch_start:batch_end]
outputs, _ = self.supervised_model.model(batch_tensor)
_, predicted_actions = torch.max(outputs.data, 1)
sv_predictions.extend(predicted_actions.cpu().numpy())
self.logger.info(f"Finished pre-calculating {len(sv_predictions)} supervised predictions.")
if len(sv_predictions) != len(states):
self.logger.error(f"Mismatch in supervised predictions ({len(sv_predictions)}) and states ({len(states)}). Aborting RL phase.")
return stats
except Exception as e:
self.logger.error(f"Error during supervised prediction pre-calculation: {e}")
import traceback
self.logger.error(traceback.format_exc())
return stats # Cannot proceed without supervised predictions for consensus
# Reset and initialize chart for trading information
if self.chart:
# Reset trading stats on the chart
if hasattr(self.chart, 'positions'):
self.chart.positions = []
if hasattr(self.chart, 'accumulative_pnl'):
self.chart.accumulative_pnl = 0.0
if hasattr(self.chart, 'current_balance'):
self.chart.current_balance = 100.0
if hasattr(self.chart, 'update_trading_info'):
self.chart.update_trading_info(action="INIT", prediction=None, price=0.0, timestamp=int(time.time() * 1000))
# Training loop
for episode in range(episodes):
# --- Check and potentially bump epsilon ---
if self.rl_agent.epsilon <= self.rl_agent.epsilon_min + 1e-6: # Check if epsilon is at/near minimum
# Bump epsilon slightly to encourage exploration if stuck
bump_value = 0.1
self.rl_agent.epsilon = min(self.rl_agent.epsilon_min + bump_value, self.rl_agent.epsilon_start)
self.logger.warning(f"RL agent epsilon was at minimum. Bumped to {self.rl_agent.epsilon:.4f} for episode {episode+1}")
if not running:
self.logger.info("RL training interrupted")
break
episode_reward = 0
correct_actions = 0
consensus_actions = 0
# Sample a segment of the data
# Ensure segment size is reasonable and doesn't exceed available states
segment_size = min(200, len(states) -1) # Max 200 steps or available data
if segment_size <= 0:
self.logger.warning(f"Not enough states ({len(states)}) to form a training segment. Skipping episode {episode+1}.")
continue
start_idx = np.random.randint(0, len(states) - segment_size) if len(states) > segment_size else 0
end_idx = start_idx + segment_size
self.logger.info(f"RL Episode {episode+1}/{episodes}: Training on segment [{start_idx}:{end_idx}]")
# Train on segment
for i in range(start_idx, end_idx):
state = states[i]
# Original intended action based on labels
true_action = actions[i]
# Get RL agent's predicted action
rl_pred_action = self.rl_agent.act(state)
# Get pre-calculated supervised prediction
sv_pred_action = sv_predictions[i]
next_state = states[i + 1]
# Calculate reward based on price change (standard reward)
try:
# Ensure indices are valid for X_train_dict
if i < len(X_train_dict[reference_tf]) and i+1 < len(X_train_dict[reference_tf]):
price_current = X_train_dict[reference_tf][i][-1, -1] # Closing price
price_next = X_train_dict[reference_tf][i+1][-1, -1]
price_change = (price_next - price_current) / price_current if price_current != 0 else 0
else:
price_change = 0
self.logger.warning(f"Index {i} or {i+1} out of bounds for price calculation.")
except IndexError:
price_change = 0
self.logger.warning(f"IndexError during price calculation at step {i}. Using price_change = 0.")
except Exception as e:
price_change = 0
self.logger.error(f"Unexpected error during price calculation: {e}")
# Define standard reward based on the RL agent's action and outcome
if rl_pred_action == 0: # Buy
reward = price_change * 100
elif rl_pred_action == 2: # Sell
reward = -price_change * 100
else: # Hold (action 1)
# Penalize holding during significant moves, slightly reward holding in stable periods
reward = -abs(price_change) * 50 if abs(price_change) > 0.0005 else abs(price_change) * 10
# --- Apply Consensus Modifier ---
consensus_met = (sv_pred_action == rl_pred_action)
if not consensus_met and rl_pred_action != 1: # If actions disagree and RL didn't choose HOLD
reward -= 5 # REDUCED Penalty for disagreement
# self.logger.debug(f"Step {i}: RL ({rl_pred_action}) vs SV ({sv_pred_action}) disagree. Penalty applied.")
elif consensus_met and rl_pred_action != 1:
consensus_actions += 1 # Count consensus non-hold actions
# Check if RL action matches the true label action
if rl_pred_action == true_action:
correct_actions += 1
# Remember experience (using the true action from labels, but the modified reward)
done = (i == end_idx - 1)
self.rl_agent.remember(state, true_action, reward, next_state, done)
# Replay experiences periodically
if i % 10 == 0:
self.rl_agent.replay()
episode_reward += reward
# Update chart with predicted trading information (no actual trades logged here)
if self.chart and i % 5 == 0:
timestamp_ms = int(time.time() * 1000)
action_names = ["BUY", "HOLD", "SELL"]
action_name = action_names[rl_pred_action] # Show RL's predicted action
# Display price logic... (remains the same)
if hasattr(self.chart, 'latest_price') and self.chart.latest_price is not None:
display_price = self.chart.latest_price
else:
display_price = price_current if 'price_current' in locals() else 0
# Add predicted signal to chart
if hasattr(self.chart, 'add_nn_signal'):
# Indicate consensus in the signal display if possible
signal_text = f"{action_name}{'*' if consensus_met else ''}"
self.chart.add_nn_signal(
symbol=self.config['market_data']['symbol'],
signal=signal_text, # Append '*' for consensus
confidence=0.7, # Placeholder
timestamp=timestamp_ms
)
# Update info display
if hasattr(self.chart, 'update_trading_info'):
consensus_status = "Yes" if consensus_met else "No"
info_text = f"RL: {action_name}, SV: {action_names[sv_pred_action]}, Consensus: {consensus_status}"
self.chart.update_trading_info(
action=action_name, # Still show RL action mainly
prediction=info_text, # Add consensus info
price=display_price,
timestamp=timestamp_ms
)
# Calculate accuracy & consensus rate for the episode
segment_len = end_idx - start_idx
accuracy = (correct_actions / segment_len) * 100 if segment_len > 0 else 0
consensus_rate = (consensus_actions / segment_len) * 100 if segment_len > 0 else 0 # Rate of non-hold consensus actions
# Update the chart with final episode metrics
if self.chart:
# Keep updating the text display if needed
if hasattr(self.chart, 'update_trading_info'):
self.chart.update_trading_info(
action="RL_EP_END",
prediction=f"Reward: {episode_reward:.1f}, Acc: {accuracy:.1f}%, Cons: {consensus_rate:.1f}%",
price=getattr(self.chart, 'latest_price', 0),
timestamp=int(time.time() * 1000)
)
# Log results
self.logger.info(f"RL Episode {episode+1} - Reward: {episode_reward:.2f}, " +
f"Accuracy: {accuracy:.2f}%, Consensus Rate: {consensus_rate:.2f}%, Epsilon: {self.rl_agent.epsilon:.4f}")
# Update stats
stats["completed"] += 1
stats["final_reward"] = episode_reward
stats["avg_reward"] = self.rl_agent.avg_reward
# Save best model based on reward
if episode_reward > stats["best_reward"]:
stats["best_reward"] = episode_reward
self.rl_agent.save(str(self.models_dir / "rl_agent_best"))
self.logger.info(f"New best RL model saved with reward: {episode_reward:.2f}")
self.rl_episodes += episodes
except Exception as e:
self.logger.error(f"Error in reinforcement learning: {str(e)}")
import traceback
self.logger.error(traceback.format_exc())
return stats
def _update_training_stats(self, sv_stats, rl_stats):
"""Update global training statistics"""
global training_stats
# Ensure sv_stats has the necessary keys
if not isinstance(sv_stats, dict) or "completed" not in sv_stats:
self.logger.warning("Supervised training stats missing expected keys, using defaults")
sv_stats = {
"completed": 0,
"best_val_pnl": -float('inf'),
"best_win_rate": 0,
"final_loss": 0
}
# Ensure rl_stats has the necessary keys
if not isinstance(rl_stats, dict) or "completed" not in rl_stats:
self.logger.warning("RL training stats missing expected keys, using defaults")
rl_stats = {
"completed": 0,
"best_reward": -float('inf'),
"final_reward": 0,
"avg_reward": 0
}
# Update supervised stats
training_stats["supervised"]["epochs_completed"] += sv_stats.get("completed", 0)
if sv_stats.get("best_val_pnl", -float('inf')) > training_stats["supervised"]["best_val_pnl"]:
training_stats["supervised"]["best_val_pnl"] = sv_stats["best_val_pnl"]
training_stats["supervised"]["best_epoch"] = self.supervised_epochs
if sv_stats.get("best_win_rate", 0) > training_stats["supervised"]["best_win_rate"]:
training_stats["supervised"]["best_win_rate"] = sv_stats["best_win_rate"]
# Update reinforcement stats
training_stats["reinforcement"]["episodes_completed"] += rl_stats.get("completed", 0)
if rl_stats.get("best_reward", -float('inf')) > training_stats["reinforcement"]["best_reward"]:
training_stats["reinforcement"]["best_reward"] = rl_stats["best_reward"]
training_stats["reinforcement"]["best_episode"] = self.rl_episodes
# Update hybrid stats
training_stats["hybrid"]["iterations_completed"] = self.iter_count
# Calculate a combined score (simple weighted average)
# Ensure values are valid numbers before calculation
sv_pnl_score = training_stats["supervised"]["best_val_pnl"] if isinstance(training_stats["supervised"]["best_val_pnl"], (int, float)) and np.isfinite(training_stats["supervised"]["best_val_pnl"]) else 0.0
rl_reward_score = training_stats["reinforcement"]["best_reward"] if isinstance(training_stats["reinforcement"]["best_reward"], (int, float)) and np.isfinite(training_stats["reinforcement"]["best_reward"]) else 0.0
combined_score = (sv_pnl_score * 0.5) + (rl_reward_score * 0.5)
if combined_score > training_stats["hybrid"]["best_combined_score"]:
training_stats["hybrid"]["best_combined_score"] = combined_score
training_stats["hybrid"]["last_update"] = datetime.now().isoformat()
# Log updated stats
self.logger.info(f"Updated training stats - Combined score: {combined_score:.2f}")
def _save_models_and_stats(self):
"""Save models and training statistics"""
# Save models (Best models are saved within their respective training methods)
# Consider saving latest models here if needed
# Save stats to JSON
stats_path = self.models_dir / f"hybrid_stats_{timestamp}.json"
try:
with open(stats_path, 'w') as f:
# Use a custom JSON encoder for numpy types if necessary
json.dump(training_stats, f, indent=2, default=lambda x: float(x) if isinstance(x, (np.float32, np.float64)) else x)
# Also save to a consistent filename for easy access
latest_path = self.models_dir / "hybrid_stats_latest.json"
with open(latest_path, 'w') as f:
json.dump(training_stats, f, indent=2, default=lambda x: float(x) if isinstance(x, (np.float32, np.float64)) else x)
self.logger.info(f"Saved training stats to {stats_path} and {latest_path}")
except Exception as e:
self.logger.error(f"Error saving training stats: {e}")
def _log_to_tensorboard(self, iteration, sv_stats, rl_stats):
"""Log metrics to TensorBoard"""
if not self.tensorboard_writer:
return
# Ensure stats are dictionaries
sv_stats = sv_stats or {}
rl_stats = rl_stats or {}
# Log supervised metrics
self.tensorboard_writer.add_scalar('Supervised/FinalLoss_PerIter', sv_stats.get("final_loss", 0), iteration)
self.tensorboard_writer.add_scalar('Supervised/BestPnL_Overall', training_stats['supervised']['best_val_pnl'], iteration)
self.tensorboard_writer.add_scalar('Supervised/BestWinRate_Overall', training_stats['supervised']['best_win_rate'], iteration)
# Log RL metrics
self.tensorboard_writer.add_scalar('RL/FinalReward_PerIter', rl_stats.get("final_reward", 0), iteration)
self.tensorboard_writer.add_scalar('RL/BestReward_Overall', training_stats['reinforcement']['best_reward'], iteration)
self.tensorboard_writer.add_scalar('RL/AvgReward_PerIter', rl_stats.get("avg_reward", 0), iteration)
self.tensorboard_writer.add_scalar('RL/Epsilon_Current', self.rl_agent.epsilon if self.rl_agent else 0, iteration)
# Log combined metrics
combined_score = training_stats['hybrid']['best_combined_score']
self.tensorboard_writer.add_scalar('Hybrid/CombinedScore_Overall', combined_score, iteration)
self.tensorboard_writer.add_scalar('Hybrid/Iterations', self.iter_count, iteration)
def main():
"""Main entry point"""
# Parse command line arguments
parser = argparse.ArgumentParser(description='Hybrid Training with CPU Compatibility Fixes')
parser.add_argument('--iterations', type=int, default=10, help='Number of hybrid iterations')
parser.add_argument('--sv-epochs', type=int, default=5, help='Supervised epochs per iteration')
parser.add_argument('--rl-episodes', type=int, default=2, help='RL episodes per iteration')
parser.add_argument('--symbol', type=str, default='BTC/USDT', help='Trading symbol')
parser.add_argument('--timeframes', type=str, default='1m,5m,15m', help='Comma-separated timeframes')
parser.add_argument('--window', type=int, default=24, help='Window size for input data')
parser.add_argument('--batch-size', type=int, default=64, help='Batch size for training')
parser.add_argument('--new-model', action='store_true', help='Start with new models instead of loading existing')
parser.add_argument('--no-dashboard', action='store_true', help='Disable web dashboard')
parser.add_argument('--dashboard-port', type=int, default=8050, help='Port for web dashboard')
args = parser.parse_args()
# Create custom config
custom_config = {
'market_data': {
'symbol': args.symbol,
'timeframes': args.timeframes.split(','),
'window_size': args.window
},
'training': {
'batch_size': args.batch_size,
'learning_rate': 0.0001, # Conservative learning rate
},
'hardware': {
'device': 'cpu', # Force CPU
'mixed_precision': False # Disable mixed precision
},
'model': {
'new_model': args.new_model
},
'visualization': {
'enabled': not args.no_dashboard,
'port': args.dashboard_port
}
}
# Get config from train_config
config = train_config.get_config('hybrid', custom_config)
# Save the config for reference
config_dir = Path('configs')
config_dir.mkdir(exist_ok=True)
train_config.save_config(config, f"configs/hybrid_training_{timestamp}.json")
# Initialize the hybrid model
model = HybridModel(config)
if not model.initialize():
logger.error("Failed to initialize hybrid model")
return
# Show instructions for the web dashboard if enabled
if not args.no_dashboard:
dash_url = f"http://localhost:{args.dashboard_port}"
logger.info(f"Web dashboard is enabled at {dash_url}")
logger.info("You can monitor training progress, see predictions and track PnL in real-time.")
logger.info("Press Ctrl+C to gracefully terminate training (models will be saved).")
# Run the training
stats = model.train_hybrid(
iterations=args.iterations,
sv_epochs_per_iter=args.sv_epochs,
rl_episodes_per_iter=args.rl_episodes
)
# Log final results
logger.info("Training completed successfully")
logger.info(f"Best supervised PnL: {stats['supervised']['best_val_pnl']:.4f}")
logger.info(f"Best RL reward: {stats['reinforcement']['best_reward']:.4f}")
logger.info(f"Best combined score: {stats['hybrid']['best_combined_score']:.4f}")
# Close TensorBoard writer
if model.tensorboard_writer:
model.tensorboard_writer.close()
if __name__ == "__main__":
main()