1247 lines
60 KiB
Python
1247 lines
60 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Hybrid Training Script with Device Compatibility Fixes
|
|
|
|
This is a fixed version of the hybrid training script that:
|
|
1. Forces CPU use to avoid CUDA/device mismatch errors
|
|
2. Adds better error handling and recovery for model initialization
|
|
3. Implements direct model movement to CPU
|
|
|
|
Usage:
|
|
python train_hybrid_fixed.py --iterations 10 --sv-epochs 5 --rl-episodes 2
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import logging
|
|
import argparse
|
|
import numpy as np
|
|
import torch
|
|
import time
|
|
import json
|
|
import asyncio
|
|
import signal
|
|
import threading
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import matplotlib.pyplot as plt
|
|
from torch.utils.tensorboard import SummaryWriter
|
|
from torch import optim
|
|
import torch.nn.functional as F
|
|
|
|
# Force CPU usage to avoid device mismatch errors
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
|
os.environ['DISABLE_MIXED_PRECISION'] = '1'
|
|
# Force PyTorch to use CPU
|
|
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
|
|
os.environ['PYTORCH_JIT'] = '0'
|
|
# Disable CUDA completely in PyTorch
|
|
torch.cuda.is_available = lambda: False
|
|
|
|
# Add project root to path if needed
|
|
project_root = os.path.dirname(os.path.abspath(__file__))
|
|
if project_root not in sys.path:
|
|
sys.path.append(project_root)
|
|
|
|
# Import configurations
|
|
import train_config
|
|
|
|
# Import key components
|
|
from NN.models.cnn_model_pytorch import CNNModelPyTorch, CNNPyTorch
|
|
from NN.models.dqn_agent import DQNAgent
|
|
from dataprovider_realtime import MultiTimeframeDataInterface, RealTimeChart
|
|
from NN.utils.signal_interpreter import SignalInterpreter
|
|
|
|
# Configure logging
|
|
log_dir = Path("logs")
|
|
log_dir.mkdir(exist_ok=True)
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
log_file = log_dir / f"hybrid_training_{timestamp}.log"
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler(log_file),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
logger = logging.getLogger('hybrid_training')
|
|
|
|
# Global variables for graceful shutdown
|
|
running = True
|
|
training_stats = {
|
|
"supervised": {
|
|
"epochs_completed": 0,
|
|
"best_val_pnl": -float('inf'),
|
|
"best_epoch": 0,
|
|
"best_win_rate": 0
|
|
},
|
|
"reinforcement": {
|
|
"episodes_completed": 0,
|
|
"best_reward": -float('inf'),
|
|
"best_episode": 0,
|
|
"best_win_rate": 0
|
|
},
|
|
"hybrid": {
|
|
"iterations_completed": 0,
|
|
"best_combined_score": -float('inf'),
|
|
"training_started": datetime.now().isoformat(),
|
|
"last_update": datetime.now().isoformat()
|
|
}
|
|
}
|
|
|
|
# Configure signal handler for graceful shutdown
|
|
def signal_handler(sig, frame):
|
|
global running
|
|
logger.info("Received interrupt signal. Finishing current training cycle and saving models...")
|
|
running = False
|
|
|
|
# Register signal handler
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
class HybridModel:
|
|
"""
|
|
Hybrid model that combines supervised CNN learning with RL-based decision optimization
|
|
"""
|
|
def __init__(self, config):
|
|
self.config = config
|
|
|
|
# Force CPU for all operations
|
|
config['hardware']['device'] = 'cpu'
|
|
config['hardware']['mixed_precision'] = False
|
|
|
|
self.device = torch.device('cpu')
|
|
self.supervised_model = None
|
|
self.rl_agent = None
|
|
self.data_interface = None
|
|
self.signal_interpreter = None
|
|
self.chart = None
|
|
|
|
# Training stats
|
|
self.tensorboard_writer = None
|
|
self.iter_count = 0
|
|
self.supervised_epochs = 0
|
|
self.rl_episodes = 0
|
|
|
|
# Initialize logging
|
|
self.logger = logging.getLogger('hybrid_model')
|
|
|
|
# Paths
|
|
self.models_dir = Path(config['paths']['models_dir'])
|
|
self.models_dir.mkdir(exist_ok=True, parents=True)
|
|
|
|
def initialize(self):
|
|
"""Initialize all components of the hybrid model"""
|
|
# Set up TensorBoard
|
|
tb_dir = Path(self.config['paths']['tensorboard_dir'])
|
|
tb_dir.mkdir(exist_ok=True, parents=True)
|
|
log_dir = tb_dir / f"hybrid_{timestamp}"
|
|
self.tensorboard_writer = SummaryWriter(log_dir=str(log_dir))
|
|
self.logger.info(f"TensorBoard initialized at {log_dir}")
|
|
|
|
# Initialize data interface
|
|
symbol = self.config['market_data']['symbol']
|
|
timeframes = self.config['market_data']['timeframes']
|
|
window_size = self.config['market_data']['window_size']
|
|
|
|
self.logger.info(f"Initializing data interface for {symbol} with timeframes {timeframes}")
|
|
self.data_interface = MultiTimeframeDataInterface(
|
|
symbol=symbol,
|
|
timeframes=timeframes
|
|
)
|
|
|
|
# Initialize supervised model (CNN)
|
|
self._initialize_supervised_model(window_size)
|
|
|
|
# Initialize RL agent
|
|
self._initialize_rl_agent(window_size)
|
|
|
|
# Initialize signal interpreter
|
|
self.signal_interpreter = SignalInterpreter(config={
|
|
'buy_threshold': 0.65,
|
|
'sell_threshold': 0.65,
|
|
'hold_threshold': 0.75,
|
|
'trend_filter_enabled': True,
|
|
'volume_filter_enabled': True
|
|
})
|
|
|
|
# Initialize chart if visualization is enabled
|
|
if self.config.get('visualization', {}).get('enabled', False):
|
|
self._initialize_chart()
|
|
|
|
return True
|
|
|
|
def _initialize_supervised_model(self, window_size):
|
|
"""Initialize the supervised CNN model"""
|
|
try:
|
|
# Get data shape information
|
|
X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data(
|
|
window_size=window_size,
|
|
refresh=True
|
|
)
|
|
|
|
if X_train_dict is None or y_train is None:
|
|
raise ValueError("Failed to load training data")
|
|
|
|
# Get reference timeframe (lowest timeframe)
|
|
reference_tf = min(
|
|
self.config['market_data']['timeframes'],
|
|
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
|
|
)
|
|
|
|
# Get feature count from the data
|
|
features_per_tf = X_train_dict[reference_tf].shape[2]
|
|
total_features = features_per_tf * len(self.config['market_data']['timeframes'])
|
|
|
|
# Initialize model
|
|
self.logger.info(f"Initializing CNN model with {total_features} features")
|
|
|
|
self.supervised_model = CNNModelPyTorch(
|
|
window_size=window_size,
|
|
timeframes=self.config['market_data']['timeframes'],
|
|
output_size=3, # BUY/HOLD/SELL
|
|
num_pairs=1 # Single pair for now
|
|
)
|
|
|
|
# Create a new model instance with the correct input shape
|
|
if hasattr(self.supervised_model, 'model'):
|
|
# The underlying model needs to be recreated with the correct input shape
|
|
input_shape = (window_size, total_features)
|
|
# Force CPU device for this model
|
|
self.supervised_model.device = self.device
|
|
|
|
# Create a new CNNPyTorch model on the CPU
|
|
new_model = CNNPyTorch(input_shape, self.supervised_model.output_size)
|
|
new_model.device = self.device
|
|
new_model.to(self.device)
|
|
|
|
# Make sure class_weights tensor is on CPU
|
|
if hasattr(new_model, 'class_weights'):
|
|
new_model.class_weights = new_model.class_weights.to(self.device)
|
|
|
|
# Replace the model
|
|
self.supervised_model.model = new_model
|
|
|
|
# Reinitialize the optimizer
|
|
self.supervised_model.optimizer = optim.Adam(
|
|
self.supervised_model.model.parameters(),
|
|
lr=0.0001,
|
|
weight_decay=0.01
|
|
)
|
|
|
|
# Initialize the criterion (missing in the model)
|
|
self.supervised_model.criterion = torch.nn.CrossEntropyLoss()
|
|
|
|
# Ensure model is on CPU
|
|
self.supervised_model.device = self.device
|
|
if hasattr(self.supervised_model, 'model'):
|
|
self.supervised_model.model.to(self.device)
|
|
|
|
# Load existing model if available and not creating new model
|
|
model_path = self.models_dir / "supervised_model_best.pt"
|
|
if model_path.exists() and not self.config.get('model', {}).get('new_model', False):
|
|
self.logger.info(f"Loading existing CNN model from {model_path}")
|
|
try:
|
|
self.supervised_model.load(str(model_path))
|
|
self.logger.info("CNN model loaded successfully")
|
|
except Exception as e:
|
|
self.logger.error(f"Error loading CNN model: {str(e)}")
|
|
self.logger.info("Starting with a new CNN model")
|
|
else:
|
|
self.logger.info("Starting with a new CNN model")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error initializing supervised model: {str(e)}")
|
|
import traceback
|
|
self.logger.error(traceback.format_exc())
|
|
raise
|
|
|
|
def _initialize_rl_agent(self, window_size):
|
|
"""Initialize the RL agent"""
|
|
try:
|
|
# Get data for RL training
|
|
X_train_dict, _, _, _, _, _ = self.data_interface.prepare_training_data(
|
|
window_size=window_size,
|
|
refresh=True
|
|
)
|
|
|
|
if X_train_dict is None:
|
|
raise ValueError("Failed to load training data for RL agent")
|
|
|
|
# Get reference timeframe features
|
|
reference_tf = min(
|
|
self.config['market_data']['timeframes'],
|
|
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
|
|
)
|
|
|
|
# Get feature count from the data
|
|
num_features = X_train_dict[reference_tf].shape[2]
|
|
|
|
# Initialize RL agent
|
|
self.logger.info(f"Initializing RL agent")
|
|
|
|
# State shape for DQN agent: (timeframes, window_size, features)
|
|
state_shape = (len(self.config['market_data']['timeframes']), window_size, num_features)
|
|
|
|
self.rl_agent = DQNAgent(
|
|
state_shape=state_shape,
|
|
n_actions=3, # BUY/HOLD/SELL
|
|
epsilon=1.0,
|
|
epsilon_min=0.01,
|
|
epsilon_decay=0.995,
|
|
learning_rate=self.config['training']['learning_rate'],
|
|
gamma=0.95,
|
|
buffer_size=10000,
|
|
batch_size=self.config['training']['batch_size'],
|
|
device=self.device # Explicitly pass CPU device
|
|
)
|
|
|
|
# Explicitly move agent to CPU and force it to stay there
|
|
try:
|
|
# First set the device in the agent itself
|
|
self.rl_agent.device = self.device
|
|
|
|
# Force PyTorch to use CPU by setting device on each model
|
|
if hasattr(self.rl_agent, 'policy_net'):
|
|
self.rl_agent.policy_net.to(self.device)
|
|
# Force all layers to CPU
|
|
for parameter in self.rl_agent.policy_net.parameters():
|
|
parameter.data = parameter.data.to(self.device)
|
|
|
|
if hasattr(self.rl_agent, 'target_net'):
|
|
self.rl_agent.target_net.to(self.device)
|
|
# Force all layers to CPU
|
|
for parameter in self.rl_agent.target_net.parameters():
|
|
parameter.data = parameter.data.to(self.device)
|
|
|
|
# Move models to the specified device
|
|
self.rl_agent.move_models_to_device(self.device)
|
|
self.logger.info(f"RL agent models moved to {self.device}")
|
|
except Exception as e:
|
|
self.logger.warning(f"Could not move RL agent models to device: {str(e)}")
|
|
|
|
# Load existing agent if available and not creating new model
|
|
agent_path = self.models_dir / "rl_agent_best"
|
|
if os.path.exists(f"{agent_path}_policy.pt") and not self.config.get('model', {}).get('new_model', False):
|
|
self.logger.info(f"Loading existing RL agent from {agent_path}")
|
|
try:
|
|
self.rl_agent.load(str(agent_path))
|
|
self.logger.info("RL agent loaded successfully")
|
|
except Exception as e:
|
|
self.logger.error(f"Error loading RL agent: {str(e)}")
|
|
self.logger.info("Starting with a new RL agent")
|
|
else:
|
|
self.logger.info("Starting with a new RL agent")
|
|
|
|
# Reset epsilon if training a new model
|
|
if self.config.get('model', {}).get('new_model', False):
|
|
if hasattr(self.rl_agent, 'epsilon_start'):
|
|
self.rl_agent.epsilon = self.rl_agent.epsilon_start
|
|
self.logger.info(f"New model requested. Reset RL agent epsilon to starting value: {self.rl_agent.epsilon:.2f}")
|
|
else:
|
|
# Fallback if epsilon_start isn't defined, assume 1.0
|
|
self.rl_agent.epsilon = 1.0
|
|
self.logger.info("New model requested. Reset RL agent epsilon to default starting value: 1.00")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error initializing RL agent: {str(e)}")
|
|
import traceback
|
|
self.logger.error(traceback.format_exc())
|
|
raise
|
|
|
|
def _initialize_chart(self):
|
|
"""Initialize the RealTimeChart for visualization"""
|
|
try:
|
|
symbol = self.config['market_data']['symbol']
|
|
self.logger.info(f"Initializing RealTimeChart for {symbol}")
|
|
|
|
self.chart = RealTimeChart(symbol=symbol)
|
|
|
|
# Start chart server in a background thread
|
|
dashboard_port = self.config.get('visualization', {}).get('port', 8050)
|
|
self.logger.info(f"Starting web dashboard for {symbol} on port {dashboard_port}")
|
|
self.chart_thread = threading.Thread(
|
|
target=lambda: self.chart.run(host='localhost', port=dashboard_port)
|
|
)
|
|
self.chart_thread.daemon = True # Allow the thread to exit when main program exits
|
|
self.chart_thread.start()
|
|
self.logger.info(f"Web dashboard started at http://localhost:{dashboard_port}/")
|
|
|
|
# Also start the websocket connection for real-time data
|
|
self.websocket_thread = threading.Thread(
|
|
target=lambda: asyncio.run(self.chart.start_websocket())
|
|
)
|
|
self.websocket_thread.daemon = True
|
|
self.websocket_thread.start()
|
|
self.logger.info(f"WebSocket connection started for {symbol}")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error initializing chart: {str(e)}")
|
|
import traceback
|
|
self.logger.error(traceback.format_exc())
|
|
self.chart = None
|
|
|
|
def train_hybrid(self, iterations=10, sv_epochs_per_iter=5, rl_episodes_per_iter=2):
|
|
"""
|
|
Main hybrid training loop
|
|
|
|
Args:
|
|
iterations: Number of hybrid iterations to run
|
|
sv_epochs_per_iter: Number of supervised epochs per iteration
|
|
rl_episodes_per_iter: Number of RL episodes per iteration
|
|
|
|
Returns:
|
|
dict: Training statistics
|
|
"""
|
|
self.logger.info(f"Starting hybrid training with {iterations} iterations")
|
|
self.logger.info(f"Each iteration includes {sv_epochs_per_iter} supervised epochs and {rl_episodes_per_iter} RL episodes")
|
|
|
|
# Training loop
|
|
for iteration in range(iterations):
|
|
if not running:
|
|
self.logger.info("Training stopped by user")
|
|
break
|
|
|
|
self.logger.info(f"Iteration {iteration+1}/{iterations}")
|
|
self.iter_count += 1
|
|
|
|
# 1. Supervised learning phase
|
|
self.logger.info("Starting supervised learning phase")
|
|
sv_stats = self.train_supervised(epochs=sv_epochs_per_iter)
|
|
|
|
# 2. Reinforcement learning phase
|
|
self.logger.info("Starting reinforcement learning phase")
|
|
rl_stats = self.train_reinforcement(episodes=rl_episodes_per_iter)
|
|
|
|
# 3. Update global training stats
|
|
self._update_training_stats(sv_stats, rl_stats)
|
|
|
|
# 4. Save models and stats
|
|
self._save_models_and_stats()
|
|
|
|
# 5. Log to TensorBoard
|
|
if self.tensorboard_writer:
|
|
self._log_to_tensorboard(iteration, sv_stats, rl_stats)
|
|
|
|
self.logger.info("Hybrid training completed")
|
|
return training_stats
|
|
|
|
def train_supervised(self, epochs=5):
|
|
"""Train the supervised CNN model"""
|
|
stats = {
|
|
"epochs": epochs,
|
|
"completed": 0,
|
|
"best_val_pnl": -float('inf'),
|
|
"best_win_rate": 0,
|
|
"final_loss": 0
|
|
}
|
|
|
|
self.logger.info(f"Training supervised model for {epochs} epochs")
|
|
|
|
try:
|
|
# Prepare training data
|
|
window_size = self.config['market_data']['window_size']
|
|
X_train_dict, y_train, X_val_dict, y_val, _, _ = self.data_interface.prepare_training_data(
|
|
window_size=window_size,
|
|
refresh=True
|
|
)
|
|
|
|
# Get reference timeframe for consistency
|
|
reference_tf = min(
|
|
self.config['market_data']['timeframes'],
|
|
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
|
|
)
|
|
|
|
# Check available samples
|
|
min_samples = min(X_train_dict[tf].shape[0] for tf in self.config['market_data']['timeframes'])
|
|
self.logger.info(f"Using {min_samples} samples for training")
|
|
|
|
# Get the feature count per timeframe
|
|
features_per_tf = X_train_dict[reference_tf].shape[2]
|
|
total_features = features_per_tf * len(self.config['market_data']['timeframes'])
|
|
self.logger.info(f"Features per timeframe: {features_per_tf}, Total features: {total_features}")
|
|
|
|
# Log timeframe data shapes for debugging
|
|
for tf in self.config['market_data']['timeframes']:
|
|
self.logger.info(f"Timeframe {tf} data shape: {X_train_dict[tf].shape}")
|
|
|
|
# Prepare concatenated inputs for each sample across all timeframes
|
|
# Shape will be [samples, window_size, features*num_timeframes]
|
|
X_train_combined = np.zeros((min_samples, window_size, total_features))
|
|
|
|
# Fill the array with data from all timeframes
|
|
for i in range(min_samples):
|
|
# For each timeframe, stack the features horizontally for the same window
|
|
for tf_idx, tf in enumerate(self.config['market_data']['timeframes']):
|
|
# Place this timeframe's features at the appropriate position
|
|
start_idx = tf_idx * features_per_tf
|
|
end_idx = (tf_idx + 1) * features_per_tf
|
|
X_train_combined[i, :, start_idx:end_idx] = X_train_dict[tf][i]
|
|
|
|
# For validation data - ensure we have validation data by splitting training data if needed
|
|
if X_val_dict is None or y_val is None or min(X_val_dict[tf].shape[0] for tf in self.config['market_data']['timeframes']) == 0:
|
|
# No validation data provided, use a portion of training data
|
|
self.logger.info("No validation data available, using 20% of training data for validation")
|
|
train_size = int(0.8 * min_samples)
|
|
|
|
# Split the training data
|
|
X_train_split = X_train_combined[:train_size]
|
|
y_train_split = y_train[:train_size]
|
|
|
|
X_val_combined = X_train_combined[train_size:min_samples]
|
|
y_val_np = y_train[train_size:min_samples]
|
|
|
|
# Update training data
|
|
X_train_combined = X_train_split
|
|
y_train_np = y_train_split
|
|
else:
|
|
# For validation data
|
|
min_val_samples = min(X_val_dict[tf].shape[0] for tf in self.config['market_data']['timeframes'])
|
|
X_val_combined = np.zeros((min_val_samples, window_size, features_per_tf * len(self.config['market_data']['timeframes'])))
|
|
|
|
for i in range(min_val_samples):
|
|
for tf_idx, tf in enumerate(self.config['market_data']['timeframes']):
|
|
start_idx = tf_idx * features_per_tf
|
|
end_idx = (tf_idx + 1) * features_per_tf
|
|
X_val_combined[i, :, start_idx:end_idx] = X_val_dict[tf][i]
|
|
|
|
y_train_np = y_train[:min_samples]
|
|
y_val_np = y_val[:min_val_samples]
|
|
|
|
self.logger.info(f"Prepared data: X_train shape: {X_train_combined.shape}, X_val shape: {X_val_combined.shape}")
|
|
|
|
# Reset and initialize chart for trading information
|
|
if self.chart:
|
|
# Reset trading stats on the chart
|
|
if hasattr(self.chart, 'positions'):
|
|
self.chart.positions = []
|
|
|
|
if hasattr(self.chart, 'accumulative_pnl'):
|
|
self.chart.accumulative_pnl = 0.0
|
|
|
|
if hasattr(self.chart, 'current_balance'):
|
|
self.chart.current_balance = 100.0
|
|
|
|
if hasattr(self.chart, 'update_trading_info'):
|
|
self.chart.update_trading_info(
|
|
action="INIT",
|
|
prediction=None,
|
|
price=0.0,
|
|
timestamp=int(time.time() * 1000)
|
|
)
|
|
|
|
# Create a custom training loop instead of using the model's train method
|
|
# This gives us more control over the process
|
|
self.supervised_model.model.train()
|
|
|
|
# History to store metrics
|
|
history = {
|
|
'loss': [],
|
|
'val_loss': [],
|
|
'accuracy': [],
|
|
'val_accuracy': [],
|
|
'val_pnl': []
|
|
}
|
|
|
|
# Convert data to tensors
|
|
X_train_tensor = torch.tensor(X_train_combined, dtype=torch.float32).to(self.device)
|
|
y_train_tensor = torch.tensor(y_train_np, dtype=torch.long).to(self.device)
|
|
X_val_tensor = torch.tensor(X_val_combined, dtype=torch.float32).to(self.device)
|
|
y_val_tensor = torch.tensor(y_val_np, dtype=torch.long).to(self.device)
|
|
|
|
# Verify that model's feature dimensions match the input data
|
|
if hasattr(self.supervised_model, 'total_features'):
|
|
expected_features = X_train_combined.shape[2]
|
|
if self.supervised_model.total_features != expected_features:
|
|
self.logger.warning(f"Model features ({self.supervised_model.total_features}) don't match input features ({expected_features})")
|
|
self.logger.info(f"Updating model's total_features to match input data")
|
|
self.supervised_model.total_features = expected_features
|
|
# Rebuild the layers with correct dimensions
|
|
if hasattr(self.supervised_model, '_create_layers'):
|
|
self.supervised_model._create_layers()
|
|
self.supervised_model.to(self.device)
|
|
# Reinitialize optimizer after changing the model
|
|
self.supervised_model.optimizer = optim.Adam(
|
|
self.supervised_model.parameters(),
|
|
lr=0.0001,
|
|
weight_decay=0.01
|
|
)
|
|
|
|
# Create dataloaders
|
|
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
|
|
train_loader = torch.utils.data.DataLoader(
|
|
train_dataset,
|
|
batch_size=self.config['training']['batch_size'],
|
|
shuffle=True
|
|
)
|
|
|
|
val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
|
|
val_loader = torch.utils.data.DataLoader(
|
|
val_dataset,
|
|
batch_size=self.config['training']['batch_size']
|
|
)
|
|
|
|
# Training loop
|
|
for epoch in range(epochs):
|
|
# Training phase
|
|
self.supervised_model.model.train()
|
|
train_loss = 0.0
|
|
train_correct = 0
|
|
train_total = 0
|
|
|
|
for inputs, targets in train_loader:
|
|
# Zero the parameter gradients
|
|
self.supervised_model.optimizer.zero_grad()
|
|
|
|
# Forward pass
|
|
outputs, _ = self.supervised_model.model(inputs)
|
|
loss = self.supervised_model.criterion(outputs, targets)
|
|
|
|
# Backward pass and optimize
|
|
loss.backward()
|
|
self.supervised_model.optimizer.step()
|
|
|
|
# Statistics
|
|
train_loss += loss.item()
|
|
_, predicted = torch.max(outputs.data, 1)
|
|
train_total += targets.size(0)
|
|
train_correct += (predicted == targets).sum().item()
|
|
|
|
# Calculate training metrics
|
|
train_loss = train_loss / len(train_loader)
|
|
train_accuracy = 100 * train_correct / train_total if train_total > 0 else 0
|
|
|
|
# Validation phase
|
|
self.supervised_model.model.eval()
|
|
val_loss = 0.0
|
|
val_correct = 0
|
|
val_total = 0
|
|
all_predictions = []
|
|
all_targets = []
|
|
|
|
with torch.no_grad():
|
|
for inputs, targets in val_loader:
|
|
# Forward pass
|
|
outputs, _ = self.supervised_model.model(inputs)
|
|
loss = self.supervised_model.criterion(outputs, targets)
|
|
|
|
# Statistics
|
|
val_loss += loss.item()
|
|
_, predicted = torch.max(outputs.data, 1)
|
|
val_total += targets.size(0)
|
|
val_correct += (predicted == targets).sum().item()
|
|
|
|
# Store for PnL calculation
|
|
all_predictions.append(predicted.cpu().numpy())
|
|
all_targets.append(targets.cpu().numpy())
|
|
|
|
# Calculate validation metrics
|
|
val_loss = val_loss / len(val_loader)
|
|
val_accuracy = 100 * val_correct / val_total if val_total > 0 else 0
|
|
|
|
# Calculate PnL using the robust DataInterface method
|
|
all_predictions = np.concatenate(all_predictions)
|
|
# We need the corresponding prices for the validation set
|
|
# Fetch the raw prices used for validation data
|
|
val_prices_start_index = len(y_train_np) # Assuming validation data follows training data
|
|
val_prices_end_index = val_prices_start_index + len(y_val_np)
|
|
# Get prices from the reference timeframe dataframe prepared earlier
|
|
if hasattr(self.data_interface, 'dataframes') and reference_tf in self.data_interface.dataframes:
|
|
reference_df = self.data_interface.dataframes[reference_tf]
|
|
# Ensure indices align with the X_val_combined data length
|
|
# We need prices corresponding to the END of each window in validation
|
|
price_indices = np.arange(len(X_train_combined) + window_size -1 , len(X_train_combined) + len(X_val_combined) + window_size - 1)
|
|
|
|
# Clamp indices to be within bounds of the reference dataframe
|
|
price_indices = np.clip(price_indices, 0, len(reference_df) - 1)
|
|
|
|
if len(price_indices) == len(all_predictions):
|
|
actual_val_prices = reference_df['close'].iloc[price_indices].values
|
|
pnl, win_rate, _ = self.data_interface.calculate_pnl(all_predictions, actual_val_prices)
|
|
self.logger.info(f"PnL calculation (robust) - Trades based on {len(actual_val_prices)} prices. Net PnL: {pnl:.4f}, Win Rate: {win_rate:.2f}")
|
|
else:
|
|
self.logger.warning(f"Price indices length ({len(price_indices)}) doesn't match predictions length ({len(all_predictions)}). Cannot calculate robust PnL.")
|
|
pnl, win_rate = 0.0, 0.0 # Fallback
|
|
else:
|
|
self.logger.warning("Reference timeframe data not available for robust PnL calculation.")
|
|
pnl, win_rate = 0.0, 0.0 # Fallback
|
|
|
|
# Update history
|
|
history['loss'].append(train_loss)
|
|
history['val_loss'].append(val_loss)
|
|
history['accuracy'].append(train_accuracy)
|
|
history['val_accuracy'].append(val_accuracy)
|
|
history['val_pnl'].append(pnl)
|
|
|
|
# Update stats
|
|
stats["completed"] += 1
|
|
stats["final_loss"] = val_loss
|
|
|
|
if pnl > stats["best_val_pnl"]:
|
|
stats["best_val_pnl"] = pnl
|
|
# Save best model by PnL
|
|
model_path = self.models_dir / "supervised_model_best.pt"
|
|
self.supervised_model.save(str(model_path))
|
|
self.logger.info(f"New best CNN model saved with PnL: {pnl:.2f}")
|
|
|
|
if win_rate > stats["best_win_rate"]:
|
|
stats["best_win_rate"] = win_rate
|
|
|
|
# Log epoch results
|
|
self.logger.info(f"Epoch {epoch+1}/{epochs} - Train loss: {train_loss:.4f}, " +
|
|
f"Train acc: {train_accuracy:.2f}%, Val loss: {val_loss:.4f}, " +
|
|
f"Val acc: {val_accuracy:.2f}%, PnL: {pnl:.2f}, Win rate: {win_rate:.2f}")
|
|
|
|
# Log to TensorBoard
|
|
if self.tensorboard_writer:
|
|
self.tensorboard_writer.add_scalar('SupervisedTrain/Loss', train_loss, self.supervised_epochs + epoch)
|
|
self.tensorboard_writer.add_scalar('SupervisedTrain/Accuracy', train_accuracy, self.supervised_epochs + epoch)
|
|
self.tensorboard_writer.add_scalar('SupervisedVal/Loss', val_loss, self.supervised_epochs + epoch)
|
|
self.tensorboard_writer.add_scalar('SupervisedVal/Accuracy', val_accuracy, self.supervised_epochs + epoch)
|
|
self.tensorboard_writer.add_scalar('SupervisedVal/PnL', pnl, self.supervised_epochs + epoch)
|
|
self.tensorboard_writer.add_scalar('SupervisedVal/WinRate', win_rate * 100, self.supervised_epochs + epoch)
|
|
|
|
# Update chart with model predictions
|
|
if self.chart and epoch % 2 == 0: # Update every other epoch
|
|
# Use the model to make predictions on some validation data for visualization
|
|
try:
|
|
# Choose a subset of validation data for visualization
|
|
viz_size = min(20, len(X_val_tensor))
|
|
viz_indices = np.random.choice(len(X_val_tensor), viz_size, replace=False)
|
|
viz_inputs = X_val_tensor[viz_indices]
|
|
viz_targets = y_val_tensor[viz_indices]
|
|
|
|
# Get predictions
|
|
self.supervised_model.model.eval()
|
|
with torch.no_grad():
|
|
outputs, _ = self.supervised_model.model(viz_inputs)
|
|
probs = F.softmax(outputs, dim=1)
|
|
_, predictions = torch.max(probs, 1)
|
|
|
|
# Display last few predictions in the chart
|
|
for i in range(min(5, viz_size)):
|
|
timestamp_ms = int(time.time() * 1000) + i * 1000 # Space them out
|
|
|
|
# Get prediction and target
|
|
pred_idx = predictions[i].item()
|
|
target_idx = viz_targets[i].item()
|
|
action_names = ["BUY", "HOLD", "SELL"]
|
|
pred_action = action_names[pred_idx]
|
|
|
|
# Get confidence
|
|
confidence = probs[i, pred_idx].item()
|
|
|
|
# Add to chart
|
|
if hasattr(self.chart, 'latest_price') and self.chart.latest_price is not None:
|
|
display_price = self.chart.latest_price
|
|
else:
|
|
display_price = 20000 + np.random.randn() * 100 # Placeholder price for BTC
|
|
|
|
# Add signal to chart
|
|
if hasattr(self.chart, 'add_nn_signal'):
|
|
self.chart.add_nn_signal(
|
|
symbol=self.config['market_data']['symbol'],
|
|
signal=pred_action,
|
|
confidence=confidence,
|
|
timestamp=timestamp_ms
|
|
)
|
|
|
|
# Update trading info
|
|
if hasattr(self.chart, 'update_trading_info'):
|
|
self.chart.update_trading_info(
|
|
action="EPOCH_VIZ",
|
|
prediction=f"SV Acc: {val_accuracy:.1f}%, PnL: {pnl:.1f}",
|
|
price=display_price,
|
|
timestamp=int(time.time() * 1000)
|
|
)
|
|
except Exception as e:
|
|
self.logger.warning(f"Error updating chart during supervised viz: {str(e)}")
|
|
|
|
# Update supervised epochs counter
|
|
self.supervised_epochs += epochs
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in supervised learning: {str(e)}")
|
|
import traceback
|
|
self.logger.error(traceback.format_exc())
|
|
|
|
return stats
|
|
|
|
def train_reinforcement(self, episodes=2):
|
|
"""Train the RL agent"""
|
|
stats = {
|
|
"episodes": episodes,
|
|
"completed": 0,
|
|
"best_reward": -float('inf'),
|
|
"final_reward": 0,
|
|
"avg_reward": 0
|
|
}
|
|
|
|
self.logger.info(f"Training RL agent for {episodes} episodes")
|
|
|
|
try:
|
|
# Prepare data for RL training
|
|
window_size = self.config['market_data']['window_size']
|
|
X_train_dict, y_train, _, _, _, _ = self.data_interface.prepare_training_data(
|
|
window_size=window_size,
|
|
refresh=True # Ensure we get relatively fresh data for this iteration
|
|
)
|
|
|
|
if X_train_dict is None or not X_train_dict or y_train is None:
|
|
self.logger.error("Failed to get training data for RL phase.")
|
|
return stats
|
|
|
|
# Get reference timeframe
|
|
reference_tf = min(
|
|
self.config['market_data']['timeframes'],
|
|
key=lambda x: self.data_interface.timeframe_to_seconds.get(x, 3600)
|
|
)
|
|
|
|
# Find minimum length across all timeframes
|
|
min_length = min(len(X_train_dict[tf]) for tf in self.config['market_data']['timeframes'] if X_train_dict[tf] is not None)
|
|
if min_length <= window_size + 1:
|
|
self.logger.error(f"Not enough data samples ({min_length}) for RL training with window size {window_size}.")
|
|
return stats
|
|
self.logger.info(f"Using {min_length} samples from each timeframe for RL training preparation")
|
|
|
|
# For DQN we need to reshape data according to state_shape=(timeframes, window_size, features)
|
|
states = []
|
|
num_features = X_train_dict[reference_tf].shape[2]
|
|
num_timeframes = len(self.config['market_data']['timeframes'])
|
|
|
|
for i in range(min_length - 1): # -1 to ensure we have next states
|
|
state = np.zeros((num_timeframes, window_size, num_features), dtype=np.float32)
|
|
valid_state = True
|
|
for tf_idx, tf in enumerate(self.config['market_data']['timeframes']):
|
|
if X_train_dict[tf] is None or len(X_train_dict[tf]) <= i:
|
|
valid_state = False
|
|
break
|
|
state[tf_idx] = X_train_dict[tf][i]
|
|
if valid_state:
|
|
states.append(state)
|
|
else:
|
|
# This should ideally not happen if min_length was calculated correctly
|
|
self.logger.warning(f"Skipping state preparation at index {i} due to insufficient data in a timeframe.")
|
|
|
|
# Get actions from labels (corresponding to the prepared states)
|
|
actions = []
|
|
# Ensure y_train is sliced correctly to match the number of prepared states
|
|
num_states = len(states)
|
|
if len(y_train) >= num_states:
|
|
y_train_sliced = y_train[:num_states]
|
|
for i in range(num_states):
|
|
# Ensure y_train_sliced[i] is a valid array/list before argmax
|
|
if isinstance(y_train_sliced[i], (np.ndarray, list)) and len(y_train_sliced[i]) > 0:
|
|
actions.append(np.argmax(y_train_sliced[i]))
|
|
else:
|
|
# Handle cases where y_train_sliced[i] might be invalid
|
|
self.logger.warning(f"Invalid label found at index {i}, using default action (HOLD=1). Label: {y_train_sliced[i]}")
|
|
actions.append(1) # Default to HOLD
|
|
else:
|
|
self.logger.error(f"Mismatch between number of states ({num_states}) and labels ({len(y_train)}). Cannot proceed with RL training.")
|
|
return stats
|
|
|
|
self.logger.info(f"Prepared {len(states)} state-action pairs for RL training")
|
|
if not states:
|
|
self.logger.error("No states were prepared for RL training.")
|
|
return stats
|
|
|
|
# --- Pre-calculate Supervised Predictions ---
|
|
self.logger.info("Pre-calculating supervised model predictions for RL states...")
|
|
sv_predictions = []
|
|
try:
|
|
self.supervised_model.model.eval() # Set model to evaluation mode
|
|
with torch.no_grad():
|
|
# Reshape states for supervised model: [batch, window_size, features*num_timeframes]
|
|
reshaped_states_list = []
|
|
for state in states:
|
|
# state shape: [timeframes, window_size, features]
|
|
# Target shape: [window_size, features*num_timeframes]
|
|
reshaped_state = state.transpose(1, 0, 2).reshape(window_size, -1)
|
|
reshaped_states_list.append(reshaped_state)
|
|
|
|
if reshaped_states_list:
|
|
reshaped_states_batch = np.array(reshaped_states_list)
|
|
states_tensor = torch.tensor(reshaped_states_batch, dtype=torch.float32).to(self.device)
|
|
|
|
# Process in batches if necessary to avoid memory issues
|
|
sv_batch_size = 128
|
|
num_batches = int(np.ceil(len(states_tensor) / sv_batch_size))
|
|
for j in range(num_batches):
|
|
batch_start = j * sv_batch_size
|
|
batch_end = min((j + 1) * sv_batch_size, len(states_tensor))
|
|
batch_tensor = states_tensor[batch_start:batch_end]
|
|
|
|
outputs, _ = self.supervised_model.model(batch_tensor)
|
|
_, predicted_actions = torch.max(outputs.data, 1)
|
|
sv_predictions.extend(predicted_actions.cpu().numpy())
|
|
|
|
self.logger.info(f"Finished pre-calculating {len(sv_predictions)} supervised predictions.")
|
|
if len(sv_predictions) != len(states):
|
|
self.logger.error(f"Mismatch in supervised predictions ({len(sv_predictions)}) and states ({len(states)}). Aborting RL phase.")
|
|
return stats
|
|
except Exception as e:
|
|
self.logger.error(f"Error during supervised prediction pre-calculation: {e}")
|
|
import traceback
|
|
self.logger.error(traceback.format_exc())
|
|
return stats # Cannot proceed without supervised predictions for consensus
|
|
|
|
# Reset and initialize chart for trading information
|
|
if self.chart:
|
|
# Reset trading stats on the chart
|
|
if hasattr(self.chart, 'positions'):
|
|
self.chart.positions = []
|
|
if hasattr(self.chart, 'accumulative_pnl'):
|
|
self.chart.accumulative_pnl = 0.0
|
|
if hasattr(self.chart, 'current_balance'):
|
|
self.chart.current_balance = 100.0
|
|
if hasattr(self.chart, 'update_trading_info'):
|
|
self.chart.update_trading_info(action="INIT", prediction=None, price=0.0, timestamp=int(time.time() * 1000))
|
|
|
|
# Training loop
|
|
for episode in range(episodes):
|
|
# --- Check and potentially bump epsilon ---
|
|
if self.rl_agent.epsilon <= self.rl_agent.epsilon_min + 1e-6: # Check if epsilon is at/near minimum
|
|
# Bump epsilon slightly to encourage exploration if stuck
|
|
bump_value = 0.1
|
|
self.rl_agent.epsilon = min(self.rl_agent.epsilon_min + bump_value, self.rl_agent.epsilon_start)
|
|
self.logger.warning(f"RL agent epsilon was at minimum. Bumped to {self.rl_agent.epsilon:.4f} for episode {episode+1}")
|
|
|
|
if not running:
|
|
self.logger.info("RL training interrupted")
|
|
break
|
|
|
|
episode_reward = 0
|
|
correct_actions = 0
|
|
consensus_actions = 0
|
|
|
|
# Sample a segment of the data
|
|
# Ensure segment size is reasonable and doesn't exceed available states
|
|
segment_size = min(200, len(states) -1) # Max 200 steps or available data
|
|
if segment_size <= 0:
|
|
self.logger.warning(f"Not enough states ({len(states)}) to form a training segment. Skipping episode {episode+1}.")
|
|
continue
|
|
|
|
start_idx = np.random.randint(0, len(states) - segment_size) if len(states) > segment_size else 0
|
|
end_idx = start_idx + segment_size
|
|
|
|
self.logger.info(f"RL Episode {episode+1}/{episodes}: Training on segment [{start_idx}:{end_idx}]")
|
|
|
|
# Train on segment
|
|
for i in range(start_idx, end_idx):
|
|
state = states[i]
|
|
# Original intended action based on labels
|
|
true_action = actions[i]
|
|
# Get RL agent's predicted action
|
|
rl_pred_action = self.rl_agent.act(state)
|
|
# Get pre-calculated supervised prediction
|
|
sv_pred_action = sv_predictions[i]
|
|
|
|
next_state = states[i + 1]
|
|
|
|
# Calculate reward based on price change (standard reward)
|
|
try:
|
|
# Ensure indices are valid for X_train_dict
|
|
if i < len(X_train_dict[reference_tf]) and i+1 < len(X_train_dict[reference_tf]):
|
|
price_current = X_train_dict[reference_tf][i][-1, -1] # Closing price
|
|
price_next = X_train_dict[reference_tf][i+1][-1, -1]
|
|
price_change = (price_next - price_current) / price_current if price_current != 0 else 0
|
|
else:
|
|
price_change = 0
|
|
self.logger.warning(f"Index {i} or {i+1} out of bounds for price calculation.")
|
|
|
|
except IndexError:
|
|
price_change = 0
|
|
self.logger.warning(f"IndexError during price calculation at step {i}. Using price_change = 0.")
|
|
except Exception as e:
|
|
price_change = 0
|
|
self.logger.error(f"Unexpected error during price calculation: {e}")
|
|
|
|
# Define standard reward based on the RL agent's action and outcome
|
|
if rl_pred_action == 0: # Buy
|
|
reward = price_change * 100
|
|
elif rl_pred_action == 2: # Sell
|
|
reward = -price_change * 100
|
|
else: # Hold (action 1)
|
|
# Penalize holding during significant moves, slightly reward holding in stable periods
|
|
reward = -abs(price_change) * 50 if abs(price_change) > 0.0005 else abs(price_change) * 10
|
|
|
|
# --- Apply Consensus Modifier ---
|
|
consensus_met = (sv_pred_action == rl_pred_action)
|
|
if not consensus_met and rl_pred_action != 1: # If actions disagree and RL didn't choose HOLD
|
|
reward -= 5 # REDUCED Penalty for disagreement
|
|
# self.logger.debug(f"Step {i}: RL ({rl_pred_action}) vs SV ({sv_pred_action}) disagree. Penalty applied.")
|
|
elif consensus_met and rl_pred_action != 1:
|
|
consensus_actions += 1 # Count consensus non-hold actions
|
|
|
|
# Check if RL action matches the true label action
|
|
if rl_pred_action == true_action:
|
|
correct_actions += 1
|
|
|
|
# Remember experience (using the true action from labels, but the modified reward)
|
|
done = (i == end_idx - 1)
|
|
self.rl_agent.remember(state, true_action, reward, next_state, done)
|
|
|
|
# Replay experiences periodically
|
|
if i % 10 == 0:
|
|
self.rl_agent.replay()
|
|
|
|
episode_reward += reward
|
|
|
|
# Update chart with predicted trading information (no actual trades logged here)
|
|
if self.chart and i % 5 == 0:
|
|
timestamp_ms = int(time.time() * 1000)
|
|
action_names = ["BUY", "HOLD", "SELL"]
|
|
action_name = action_names[rl_pred_action] # Show RL's predicted action
|
|
|
|
# Display price logic... (remains the same)
|
|
if hasattr(self.chart, 'latest_price') and self.chart.latest_price is not None:
|
|
display_price = self.chart.latest_price
|
|
else:
|
|
display_price = price_current if 'price_current' in locals() else 0
|
|
|
|
# Add predicted signal to chart
|
|
if hasattr(self.chart, 'add_nn_signal'):
|
|
# Indicate consensus in the signal display if possible
|
|
signal_text = f"{action_name}{'*' if consensus_met else ''}"
|
|
self.chart.add_nn_signal(
|
|
symbol=self.config['market_data']['symbol'],
|
|
signal=signal_text, # Append '*' for consensus
|
|
confidence=0.7, # Placeholder
|
|
timestamp=timestamp_ms
|
|
)
|
|
|
|
# Update info display
|
|
if hasattr(self.chart, 'update_trading_info'):
|
|
consensus_status = "Yes" if consensus_met else "No"
|
|
info_text = f"RL: {action_name}, SV: {action_names[sv_pred_action]}, Consensus: {consensus_status}"
|
|
self.chart.update_trading_info(
|
|
action=action_name, # Still show RL action mainly
|
|
prediction=info_text, # Add consensus info
|
|
price=display_price,
|
|
timestamp=timestamp_ms
|
|
)
|
|
|
|
# Calculate accuracy & consensus rate for the episode
|
|
segment_len = end_idx - start_idx
|
|
accuracy = (correct_actions / segment_len) * 100 if segment_len > 0 else 0
|
|
consensus_rate = (consensus_actions / segment_len) * 100 if segment_len > 0 else 0 # Rate of non-hold consensus actions
|
|
|
|
# Update the chart with final episode metrics
|
|
if self.chart:
|
|
# Keep updating the text display if needed
|
|
if hasattr(self.chart, 'update_trading_info'):
|
|
self.chart.update_trading_info(
|
|
action="RL_EP_END",
|
|
prediction=f"Reward: {episode_reward:.1f}, Acc: {accuracy:.1f}%, Cons: {consensus_rate:.1f}%",
|
|
price=getattr(self.chart, 'latest_price', 0),
|
|
timestamp=int(time.time() * 1000)
|
|
)
|
|
|
|
# Log results
|
|
self.logger.info(f"RL Episode {episode+1} - Reward: {episode_reward:.2f}, " +
|
|
f"Accuracy: {accuracy:.2f}%, Consensus Rate: {consensus_rate:.2f}%, Epsilon: {self.rl_agent.epsilon:.4f}")
|
|
|
|
# Update stats
|
|
stats["completed"] += 1
|
|
stats["final_reward"] = episode_reward
|
|
stats["avg_reward"] = self.rl_agent.avg_reward
|
|
|
|
# Save best model based on reward
|
|
if episode_reward > stats["best_reward"]:
|
|
stats["best_reward"] = episode_reward
|
|
self.rl_agent.save(str(self.models_dir / "rl_agent_best"))
|
|
self.logger.info(f"New best RL model saved with reward: {episode_reward:.2f}")
|
|
|
|
self.rl_episodes += episodes
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in reinforcement learning: {str(e)}")
|
|
import traceback
|
|
self.logger.error(traceback.format_exc())
|
|
|
|
return stats
|
|
|
|
def _update_training_stats(self, sv_stats, rl_stats):
|
|
"""Update global training statistics"""
|
|
global training_stats
|
|
|
|
# Ensure sv_stats has the necessary keys
|
|
if not isinstance(sv_stats, dict) or "completed" not in sv_stats:
|
|
self.logger.warning("Supervised training stats missing expected keys, using defaults")
|
|
sv_stats = {
|
|
"completed": 0,
|
|
"best_val_pnl": -float('inf'),
|
|
"best_win_rate": 0,
|
|
"final_loss": 0
|
|
}
|
|
|
|
# Ensure rl_stats has the necessary keys
|
|
if not isinstance(rl_stats, dict) or "completed" not in rl_stats:
|
|
self.logger.warning("RL training stats missing expected keys, using defaults")
|
|
rl_stats = {
|
|
"completed": 0,
|
|
"best_reward": -float('inf'),
|
|
"final_reward": 0,
|
|
"avg_reward": 0
|
|
}
|
|
|
|
# Update supervised stats
|
|
training_stats["supervised"]["epochs_completed"] += sv_stats.get("completed", 0)
|
|
if sv_stats.get("best_val_pnl", -float('inf')) > training_stats["supervised"]["best_val_pnl"]:
|
|
training_stats["supervised"]["best_val_pnl"] = sv_stats["best_val_pnl"]
|
|
training_stats["supervised"]["best_epoch"] = self.supervised_epochs
|
|
if sv_stats.get("best_win_rate", 0) > training_stats["supervised"]["best_win_rate"]:
|
|
training_stats["supervised"]["best_win_rate"] = sv_stats["best_win_rate"]
|
|
|
|
# Update reinforcement stats
|
|
training_stats["reinforcement"]["episodes_completed"] += rl_stats.get("completed", 0)
|
|
if rl_stats.get("best_reward", -float('inf')) > training_stats["reinforcement"]["best_reward"]:
|
|
training_stats["reinforcement"]["best_reward"] = rl_stats["best_reward"]
|
|
training_stats["reinforcement"]["best_episode"] = self.rl_episodes
|
|
|
|
# Update hybrid stats
|
|
training_stats["hybrid"]["iterations_completed"] = self.iter_count
|
|
|
|
# Calculate a combined score (simple weighted average)
|
|
# Ensure values are valid numbers before calculation
|
|
sv_pnl_score = training_stats["supervised"]["best_val_pnl"] if isinstance(training_stats["supervised"]["best_val_pnl"], (int, float)) and np.isfinite(training_stats["supervised"]["best_val_pnl"]) else 0.0
|
|
rl_reward_score = training_stats["reinforcement"]["best_reward"] if isinstance(training_stats["reinforcement"]["best_reward"], (int, float)) and np.isfinite(training_stats["reinforcement"]["best_reward"]) else 0.0
|
|
|
|
combined_score = (sv_pnl_score * 0.5) + (rl_reward_score * 0.5)
|
|
|
|
if combined_score > training_stats["hybrid"]["best_combined_score"]:
|
|
training_stats["hybrid"]["best_combined_score"] = combined_score
|
|
|
|
training_stats["hybrid"]["last_update"] = datetime.now().isoformat()
|
|
|
|
# Log updated stats
|
|
self.logger.info(f"Updated training stats - Combined score: {combined_score:.2f}")
|
|
|
|
def _save_models_and_stats(self):
|
|
"""Save models and training statistics"""
|
|
# Save models (Best models are saved within their respective training methods)
|
|
# Consider saving latest models here if needed
|
|
|
|
# Save stats to JSON
|
|
stats_path = self.models_dir / f"hybrid_stats_{timestamp}.json"
|
|
try:
|
|
with open(stats_path, 'w') as f:
|
|
# Use a custom JSON encoder for numpy types if necessary
|
|
json.dump(training_stats, f, indent=2, default=lambda x: float(x) if isinstance(x, (np.float32, np.float64)) else x)
|
|
|
|
# Also save to a consistent filename for easy access
|
|
latest_path = self.models_dir / "hybrid_stats_latest.json"
|
|
with open(latest_path, 'w') as f:
|
|
json.dump(training_stats, f, indent=2, default=lambda x: float(x) if isinstance(x, (np.float32, np.float64)) else x)
|
|
|
|
self.logger.info(f"Saved training stats to {stats_path} and {latest_path}")
|
|
except Exception as e:
|
|
self.logger.error(f"Error saving training stats: {e}")
|
|
|
|
def _log_to_tensorboard(self, iteration, sv_stats, rl_stats):
|
|
"""Log metrics to TensorBoard"""
|
|
if not self.tensorboard_writer:
|
|
return
|
|
|
|
# Ensure stats are dictionaries
|
|
sv_stats = sv_stats or {}
|
|
rl_stats = rl_stats or {}
|
|
|
|
# Log supervised metrics
|
|
self.tensorboard_writer.add_scalar('Supervised/FinalLoss_PerIter', sv_stats.get("final_loss", 0), iteration)
|
|
self.tensorboard_writer.add_scalar('Supervised/BestPnL_Overall', training_stats['supervised']['best_val_pnl'], iteration)
|
|
self.tensorboard_writer.add_scalar('Supervised/BestWinRate_Overall', training_stats['supervised']['best_win_rate'], iteration)
|
|
|
|
# Log RL metrics
|
|
self.tensorboard_writer.add_scalar('RL/FinalReward_PerIter', rl_stats.get("final_reward", 0), iteration)
|
|
self.tensorboard_writer.add_scalar('RL/BestReward_Overall', training_stats['reinforcement']['best_reward'], iteration)
|
|
self.tensorboard_writer.add_scalar('RL/AvgReward_PerIter', rl_stats.get("avg_reward", 0), iteration)
|
|
self.tensorboard_writer.add_scalar('RL/Epsilon_Current', self.rl_agent.epsilon if self.rl_agent else 0, iteration)
|
|
|
|
# Log combined metrics
|
|
combined_score = training_stats['hybrid']['best_combined_score']
|
|
self.tensorboard_writer.add_scalar('Hybrid/CombinedScore_Overall', combined_score, iteration)
|
|
self.tensorboard_writer.add_scalar('Hybrid/Iterations', self.iter_count, iteration)
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
# Parse command line arguments
|
|
parser = argparse.ArgumentParser(description='Hybrid Training with CPU Compatibility Fixes')
|
|
parser.add_argument('--iterations', type=int, default=10, help='Number of hybrid iterations')
|
|
parser.add_argument('--sv-epochs', type=int, default=5, help='Supervised epochs per iteration')
|
|
parser.add_argument('--rl-episodes', type=int, default=2, help='RL episodes per iteration')
|
|
parser.add_argument('--symbol', type=str, default='BTC/USDT', help='Trading symbol')
|
|
parser.add_argument('--timeframes', type=str, default='1m,5m,15m', help='Comma-separated timeframes')
|
|
parser.add_argument('--window', type=int, default=24, help='Window size for input data')
|
|
parser.add_argument('--batch-size', type=int, default=64, help='Batch size for training')
|
|
parser.add_argument('--new-model', action='store_true', help='Start with new models instead of loading existing')
|
|
parser.add_argument('--no-dashboard', action='store_true', help='Disable web dashboard')
|
|
parser.add_argument('--dashboard-port', type=int, default=8050, help='Port for web dashboard')
|
|
args = parser.parse_args()
|
|
|
|
# Create custom config
|
|
custom_config = {
|
|
'market_data': {
|
|
'symbol': args.symbol,
|
|
'timeframes': args.timeframes.split(','),
|
|
'window_size': args.window
|
|
},
|
|
'training': {
|
|
'batch_size': args.batch_size,
|
|
'learning_rate': 0.0001, # Conservative learning rate
|
|
},
|
|
'hardware': {
|
|
'device': 'cpu', # Force CPU
|
|
'mixed_precision': False # Disable mixed precision
|
|
},
|
|
'model': {
|
|
'new_model': args.new_model
|
|
},
|
|
'visualization': {
|
|
'enabled': not args.no_dashboard,
|
|
'port': args.dashboard_port
|
|
}
|
|
}
|
|
|
|
# Get config from train_config
|
|
config = train_config.get_config('hybrid', custom_config)
|
|
|
|
# Save the config for reference
|
|
config_dir = Path('configs')
|
|
config_dir.mkdir(exist_ok=True)
|
|
train_config.save_config(config, f"configs/hybrid_training_{timestamp}.json")
|
|
|
|
# Initialize the hybrid model
|
|
model = HybridModel(config)
|
|
if not model.initialize():
|
|
logger.error("Failed to initialize hybrid model")
|
|
return
|
|
|
|
# Show instructions for the web dashboard if enabled
|
|
if not args.no_dashboard:
|
|
dash_url = f"http://localhost:{args.dashboard_port}"
|
|
logger.info(f"Web dashboard is enabled at {dash_url}")
|
|
logger.info("You can monitor training progress, see predictions and track PnL in real-time.")
|
|
logger.info("Press Ctrl+C to gracefully terminate training (models will be saved).")
|
|
|
|
# Run the training
|
|
stats = model.train_hybrid(
|
|
iterations=args.iterations,
|
|
sv_epochs_per_iter=args.sv_epochs,
|
|
rl_episodes_per_iter=args.rl_episodes
|
|
)
|
|
|
|
# Log final results
|
|
logger.info("Training completed successfully")
|
|
logger.info(f"Best supervised PnL: {stats['supervised']['best_val_pnl']:.4f}")
|
|
logger.info(f"Best RL reward: {stats['reinforcement']['best_reward']:.4f}")
|
|
logger.info(f"Best combined score: {stats['hybrid']['best_combined_score']:.4f}")
|
|
|
|
# Close TensorBoard writer
|
|
if model.tensorboard_writer:
|
|
model.tensorboard_writer.close()
|
|
|
|
if __name__ == "__main__":
|
|
main() |