new__training

This commit is contained in:
Dobromir Popov
2025-05-24 02:42:11 +03:00
parent b181d11923
commit ef71160282
10 changed files with 1613 additions and 190 deletions

519
training/cnn_trainer.py Normal file
View File

@ -0,0 +1,519 @@
"""
CNN Training Pipeline - Scalping Pattern Recognition
Comprehensive training pipeline for multi-timeframe CNN models:
- Automated data generation and preprocessing
- Training with validation and early stopping
- Memory-efficient batch processing
- Model evaluation and metrics
"""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import logging
from typing import Dict, List, Tuple, Optional
import time
from pathlib import Path
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# Add project imports
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core.config import get_config
from core.data_provider import DataProvider
from models.cnn.scalping_cnn import MultiTimeframeCNN, ScalpingDataGenerator
logger = logging.getLogger(__name__)
class TradingDataset(Dataset):
"""PyTorch dataset for trading data"""
def __init__(self, features: np.ndarray, labels: np.ndarray, metadata: Optional[Dict] = None):
self.features = torch.FloatTensor(features)
self.labels = torch.FloatTensor(labels)
self.metadata = metadata or {}
def __len__(self):
return len(self.features)
def __getitem__(self, idx):
return self.features[idx], self.labels[idx]
class CNNTrainer:
"""
CNN Training Pipeline for Scalping
"""
def __init__(self, data_provider: DataProvider, config: Optional[Dict] = None):
self.data_provider = data_provider
self.config = config or get_config()
# Training parameters
self.learning_rate = 1e-4
self.batch_size = 64
self.num_epochs = 100
self.patience = 15
self.validation_split = 0.2
# Data parameters
self.timeframes = ['1s', '1m', '5m', '1h']
self.window_size = 20
self.num_samples = 20000
# Model parameters
self.n_timeframes = len(self.timeframes)
self.n_features = 26 # Number of technical indicators
self.n_classes = 3 # BUY, SELL, HOLD
# Device
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Initialize data generator
self.data_generator = ScalpingDataGenerator(data_provider, self.window_size)
# Training state
self.model = None
self.train_losses = []
self.val_losses = []
self.train_accuracies = []
self.val_accuracies = []
logger.info(f"CNNTrainer initialized with {self.n_timeframes} timeframes, {self.n_features} features")
def prepare_data(self, symbols: List[str]) -> Tuple[DataLoader, DataLoader, Dict]:
"""Prepare training and validation data"""
logger.info("Preparing training data...")
all_features = []
all_labels = []
all_metadata = {'symbols': []}
# Generate data for each symbol
for symbol in symbols:
logger.info(f"Generating data for {symbol}...")
features, labels, metadata = self.data_generator.generate_training_cases(
symbol, self.timeframes, self.num_samples // len(symbols)
)
if features is not None and labels is not None:
all_features.append(features)
all_labels.append(labels)
all_metadata['symbols'].extend([symbol] * len(features))
logger.info(f"Generated {len(features)} samples for {symbol}")
# Update feature count based on actual data
if len(all_features) == 1:
actual_features = features.shape[-1]
if actual_features != self.n_features:
logger.info(f"Updating feature count from {self.n_features} to {actual_features}")
self.n_features = actual_features
else:
logger.warning(f"No data generated for {symbol}")
if not all_features:
raise ValueError("No training data generated")
# Combine all data
combined_features = np.concatenate(all_features, axis=0)
combined_labels = np.concatenate(all_labels, axis=0)
logger.info(f"Total dataset: {len(combined_features)} samples")
logger.info(f"Features shape: {combined_features.shape}")
logger.info(f"Labels shape: {combined_labels.shape}")
# Split into train/validation
X_train, X_val, y_train, y_val = train_test_split(
combined_features, combined_labels,
test_size=self.validation_split,
stratify=np.argmax(combined_labels, axis=1),
random_state=42
)
# Create datasets
train_dataset = TradingDataset(X_train, y_train)
val_dataset = TradingDataset(X_val, y_val)
# Create data loaders
train_loader = DataLoader(
train_dataset,
batch_size=self.batch_size,
shuffle=True,
num_workers=0, # Set to 0 to avoid multiprocessing issues
pin_memory=True if torch.cuda.is_available() else False
)
val_loader = DataLoader(
val_dataset,
batch_size=self.batch_size,
shuffle=False,
num_workers=0,
pin_memory=True if torch.cuda.is_available() else False
)
# Prepare metadata for return
dataset_info = {
'train_size': len(train_dataset),
'val_size': len(val_dataset),
'feature_shape': combined_features.shape[1:],
'label_distribution': {
'train': np.bincount(np.argmax(y_train, axis=1)),
'val': np.bincount(np.argmax(y_val, axis=1))
}
}
logger.info(f"Train samples: {dataset_info['train_size']}")
logger.info(f"Validation samples: {dataset_info['val_size']}")
logger.info(f"Train label distribution: {dataset_info['label_distribution']['train']}")
logger.info(f"Val label distribution: {dataset_info['label_distribution']['val']}")
return train_loader, val_loader, dataset_info
def create_model(self) -> MultiTimeframeCNN:
"""Create and initialize the CNN model"""
model = MultiTimeframeCNN(
n_timeframes=self.n_timeframes,
window_size=self.window_size,
n_features=self.n_features,
n_classes=self.n_classes
)
model.to(self.device)
# Log model info
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info(f"Model created with {total_params:,} total parameters")
logger.info(f"Trainable parameters: {trainable_params:,}")
logger.info(f"Estimated memory usage: {model.get_memory_usage()}MB")
return model
def train_epoch(self, model: nn.Module, train_loader: DataLoader,
optimizer: optim.Optimizer, criterion: nn.Module) -> Tuple[float, float]:
"""Train for one epoch"""
model.train()
total_loss = 0.0
correct_predictions = 0
total_predictions = 0
for batch_idx, (features, labels) in enumerate(train_loader):
features = features.to(self.device)
labels = labels.to(self.device)
# Zero gradients
optimizer.zero_grad()
# Forward pass
predictions = model(features)
# Calculate loss (multi-task loss)
action_loss = criterion(predictions['action'], labels)
# Additional losses for auxiliary tasks
confidence_loss = torch.mean(torch.abs(predictions['confidence'] - 0.5)) # Encourage diversity
# Total loss
total_loss_batch = action_loss + 0.1 * confidence_loss
# Backward pass
total_loss_batch.backward()
# Gradient clipping
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update weights
optimizer.step()
# Track metrics
total_loss += total_loss_batch.item()
# Calculate accuracy
pred_classes = torch.argmax(predictions['action'], dim=1)
true_classes = torch.argmax(labels, dim=1)
correct_predictions += (pred_classes == true_classes).sum().item()
total_predictions += labels.size(0)
# Log progress
if batch_idx % 100 == 0:
logger.debug(f"Batch {batch_idx}/{len(train_loader)}, Loss: {total_loss_batch.item():.4f}")
avg_loss = total_loss / len(train_loader)
accuracy = correct_predictions / total_predictions
return avg_loss, accuracy
def validate_epoch(self, model: nn.Module, val_loader: DataLoader,
criterion: nn.Module) -> Tuple[float, float, Dict]:
"""Validate for one epoch"""
model.eval()
total_loss = 0.0
correct_predictions = 0
total_predictions = 0
all_predictions = []
all_labels = []
all_confidences = []
with torch.no_grad():
for features, labels in val_loader:
features = features.to(self.device)
labels = labels.to(self.device)
# Forward pass
predictions = model(features)
# Calculate loss
loss = criterion(predictions['action'], labels)
total_loss += loss.item()
# Track predictions
pred_classes = torch.argmax(predictions['action'], dim=1)
true_classes = torch.argmax(labels, dim=1)
correct_predictions += (pred_classes == true_classes).sum().item()
total_predictions += labels.size(0)
# Store for detailed analysis
all_predictions.extend(pred_classes.cpu().numpy())
all_labels.extend(true_classes.cpu().numpy())
all_confidences.extend(predictions['confidence'].cpu().numpy())
avg_loss = total_loss / len(val_loader)
accuracy = correct_predictions / total_predictions
# Additional metrics
metrics = {
'predictions': np.array(all_predictions),
'labels': np.array(all_labels),
'confidences': np.array(all_confidences),
'accuracy_by_class': {},
'avg_confidence': np.mean(all_confidences)
}
# Calculate per-class accuracy
for class_idx in range(self.n_classes):
class_mask = metrics['labels'] == class_idx
if np.sum(class_mask) > 0:
class_accuracy = np.mean(metrics['predictions'][class_mask] == metrics['labels'][class_mask])
metrics['accuracy_by_class'][class_idx] = class_accuracy
return avg_loss, accuracy, metrics
def train(self, symbols: List[str], save_path: Optional[str] = None) -> Dict:
"""Train the CNN model"""
logger.info("Starting CNN training...")
# Prepare data first to get actual feature count
train_loader, val_loader, dataset_info = self.prepare_data(symbols)
# Create model with correct feature count
self.model = self.create_model()
# Setup training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
optimizer, mode='min', factor=0.5, patience=5, verbose=True
)
# Training state
best_val_loss = float('inf')
best_val_accuracy = 0.0
patience_counter = 0
start_time = time.time()
# Training loop
for epoch in range(self.num_epochs):
epoch_start_time = time.time()
# Train
train_loss, train_accuracy = self.train_epoch(
self.model, train_loader, optimizer, criterion
)
# Validate
val_loss, val_accuracy, val_metrics = self.validate_epoch(
self.model, val_loader, criterion
)
# Update learning rate
scheduler.step(val_loss)
# Track metrics
self.train_losses.append(train_loss)
self.val_losses.append(val_loss)
self.train_accuracies.append(train_accuracy)
self.val_accuracies.append(val_accuracy)
# Check for improvement
if val_loss < best_val_loss:
best_val_loss = val_loss
best_val_accuracy = val_accuracy
patience_counter = 0
# Save best model
if save_path:
best_path = save_path.replace('.pt', '_best.pt')
self.model.save(best_path)
logger.info(f"New best model saved: {best_path}")
else:
patience_counter += 1
# Log progress
epoch_time = time.time() - epoch_start_time
logger.info(
f"Epoch {epoch+1}/{self.num_epochs} - "
f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f} - "
f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f} - "
f"Time: {epoch_time:.2f}s"
)
# Detailed validation metrics every 10 epochs
if (epoch + 1) % 10 == 0:
logger.info(f"Class accuracies: {val_metrics['accuracy_by_class']}")
logger.info(f"Average confidence: {val_metrics['avg_confidence']:.4f}")
# Early stopping
if patience_counter >= self.patience:
logger.info(f"Early stopping triggered after {epoch+1} epochs")
break
# Training complete
total_time = time.time() - start_time
logger.info(f"Training completed in {total_time:.2f} seconds")
logger.info(f"Best validation loss: {best_val_loss:.4f}")
logger.info(f"Best validation accuracy: {best_val_accuracy:.4f}")
# Save final model
if save_path:
self.model.save(save_path)
logger.info(f"Final model saved: {save_path}")
# Prepare training results
results = {
'best_val_loss': best_val_loss,
'best_val_accuracy': best_val_accuracy,
'total_epochs': epoch + 1,
'total_time': total_time,
'train_losses': self.train_losses,
'val_losses': self.val_losses,
'train_accuracies': self.train_accuracies,
'val_accuracies': self.val_accuracies,
'dataset_info': dataset_info,
'final_metrics': val_metrics
}
return results
def evaluate_model(self, test_symbols: List[str]) -> Dict:
"""Evaluate trained model on test data"""
if self.model is None:
raise ValueError("Model not trained yet")
logger.info("Evaluating model...")
# Generate test data
test_features = []
test_labels = []
for symbol in test_symbols:
features, labels, _ = self.data_generator.generate_training_cases(
symbol, self.timeframes, 5000
)
if features is not None:
test_features.append(features)
test_labels.append(labels)
if not test_features:
raise ValueError("No test data generated")
test_features = np.concatenate(test_features, axis=0)
test_labels = np.concatenate(test_labels, axis=0)
# Create test loader
test_dataset = TradingDataset(test_features, test_labels)
test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)
# Evaluate
criterion = nn.CrossEntropyLoss()
test_loss, test_accuracy, test_metrics = self.validate_epoch(
self.model, test_loader, criterion
)
# Generate classification report
class_names = ['BUY', 'SELL', 'HOLD']
classification_rep = classification_report(
test_metrics['labels'],
test_metrics['predictions'],
target_names=class_names,
output_dict=True
)
# Confusion matrix
conf_matrix = confusion_matrix(
test_metrics['labels'],
test_metrics['predictions']
)
evaluation_results = {
'test_loss': test_loss,
'test_accuracy': test_accuracy,
'classification_report': classification_rep,
'confusion_matrix': conf_matrix,
'class_accuracies': test_metrics['accuracy_by_class'],
'avg_confidence': test_metrics['avg_confidence']
}
logger.info(f"Test accuracy: {test_accuracy:.4f}")
logger.info(f"Test loss: {test_loss:.4f}")
return evaluation_results
def plot_training_history(self, save_path: Optional[str] = None):
"""Plot training history"""
if not self.train_losses:
logger.warning("No training history to plot")
return
fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=(12, 4))
# Loss plot
epochs = range(1, len(self.train_losses) + 1)
ax1.plot(epochs, self.train_losses, 'b-', label='Training Loss')
ax1.plot(epochs, self.val_losses, 'r-', label='Validation Loss')
ax1.set_title('Training and Validation Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)
# Accuracy plot
ax2.plot(epochs, self.train_accuracies, 'b-', label='Training Accuracy')
ax2.plot(epochs, self.val_accuracies, 'r-', label='Validation Accuracy')
ax2.set_title('Training and Validation Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True)
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
logger.info(f"Training history plot saved: {save_path}")
plt.show()
# Export
__all__ = ['CNNTrainer', 'TradingDataset']

483
training/rl_trainer.py Normal file
View File

@ -0,0 +1,483 @@
"""
RL Training Pipeline - Scalping Agent Training
Comprehensive training pipeline for scalping RL agents:
- Environment setup and management
- Agent training with experience replay
- Performance tracking and evaluation
- Memory-efficient training loops
"""
import torch
import numpy as np
import pandas as pd
import logging
from typing import Dict, List, Tuple, Optional, Any
import time
from pathlib import Path
import matplotlib.pyplot as plt
from collections import deque
import random
# Add project imports
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core.config import get_config
from core.data_provider import DataProvider
from models.rl.scalping_agent import ScalpingEnvironment, ScalpingRLAgent
logger = logging.getLogger(__name__)
class RLTrainer:
"""
RL Training Pipeline for Scalping
"""
def __init__(self, data_provider: DataProvider, config: Optional[Dict] = None):
self.data_provider = data_provider
self.config = config or get_config()
# Training parameters
self.num_episodes = 1000
self.max_steps_per_episode = 1000
self.training_frequency = 4 # Train every N steps
self.evaluation_frequency = 50 # Evaluate every N episodes
self.save_frequency = 100 # Save model every N episodes
# Environment parameters
self.symbols = ['ETH/USDT']
self.initial_balance = 1000.0
self.max_position_size = 0.1
# Agent parameters (will be set when we know state dimension)
self.state_dim = None
self.action_dim = 3 # BUY, SELL, HOLD
self.learning_rate = 1e-4
self.memory_size = 50000
# Device
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Training state
self.environment = None
self.agent = None
self.episode_rewards = []
self.episode_lengths = []
self.episode_balances = []
self.episode_trades = []
self.training_losses = []
# Performance tracking
self.best_reward = -float('inf')
self.best_balance = 0.0
self.win_rates = []
self.avg_rewards = []
logger.info(f"RLTrainer initialized for symbols: {self.symbols}")
def setup_environment_and_agent(self) -> Tuple[ScalpingEnvironment, ScalpingRLAgent]:
"""Setup trading environment and RL agent"""
logger.info("Setting up environment and agent...")
# Create environment
environment = ScalpingEnvironment(
data_provider=self.data_provider,
symbol=self.symbols[0],
initial_balance=self.initial_balance,
max_position_size=self.max_position_size
)
# Get state dimension by resetting environment
initial_state = environment.reset()
if initial_state is None:
raise ValueError("Could not get initial state from environment")
self.state_dim = len(initial_state)
logger.info(f"State dimension: {self.state_dim}")
# Create agent
agent = ScalpingRLAgent(
state_dim=self.state_dim,
action_dim=self.action_dim,
learning_rate=self.learning_rate,
memory_size=self.memory_size
)
return environment, agent
def run_episode(self, episode_num: int, training: bool = True) -> Dict:
"""Run a single episode"""
state = self.environment.reset()
if state is None:
return {'error': 'Could not reset environment'}
episode_reward = 0.0
episode_loss = 0.0
step_count = 0
trades_made = 0
# Episode loop
for step in range(self.max_steps_per_episode):
# Select action
action = self.agent.act(state, training=training)
# Execute action in environment
next_state, reward, done, info = self.environment.step(action, step)
if next_state is None:
break
# Store experience if training
if training:
# Determine if this is a high-priority experience
priority = (abs(reward) > 0.1 or
info.get('trade_info', {}).get('executed', False))
self.agent.remember(state, action, reward, next_state, done, priority)
# Train agent
if step % self.training_frequency == 0 and len(self.agent.memory) > self.agent.batch_size:
loss = self.agent.replay()
if loss is not None:
episode_loss += loss
# Update state
state = next_state
episode_reward += reward
step_count += 1
# Track trades
if info.get('trade_info', {}).get('executed', False):
trades_made += 1
if done:
break
# Episode results
final_balance = info.get('balance', self.initial_balance)
total_fees = info.get('total_fees', 0.0)
episode_results = {
'episode': episode_num,
'reward': episode_reward,
'steps': step_count,
'balance': final_balance,
'trades': trades_made,
'fees': total_fees,
'pnl': final_balance - self.initial_balance,
'pnl_percentage': (final_balance - self.initial_balance) / self.initial_balance * 100,
'avg_loss': episode_loss / max(step_count // self.training_frequency, 1) if training else 0
}
return episode_results
def evaluate_agent(self, num_episodes: int = 10) -> Dict:
"""Evaluate agent performance"""
logger.info(f"Evaluating agent over {num_episodes} episodes...")
evaluation_results = []
total_reward = 0.0
total_balance = 0.0
total_trades = 0
winning_episodes = 0
# Set agent to evaluation mode
original_epsilon = self.agent.epsilon
self.agent.epsilon = 0.0 # No exploration during evaluation
for episode in range(num_episodes):
results = self.run_episode(episode, training=False)
evaluation_results.append(results)
total_reward += results['reward']
total_balance += results['balance']
total_trades += results['trades']
if results['pnl'] > 0:
winning_episodes += 1
# Restore original epsilon
self.agent.epsilon = original_epsilon
# Calculate summary statistics
avg_reward = total_reward / num_episodes
avg_balance = total_balance / num_episodes
avg_trades = total_trades / num_episodes
win_rate = winning_episodes / num_episodes
evaluation_summary = {
'num_episodes': num_episodes,
'avg_reward': avg_reward,
'avg_balance': avg_balance,
'avg_pnl': avg_balance - self.initial_balance,
'avg_pnl_percentage': (avg_balance - self.initial_balance) / self.initial_balance * 100,
'avg_trades': avg_trades,
'win_rate': win_rate,
'results': evaluation_results
}
logger.info(f"Evaluation complete - Avg Reward: {avg_reward:.4f}, Win Rate: {win_rate:.2%}")
return evaluation_summary
def train(self, save_path: Optional[str] = None) -> Dict:
"""Train the RL agent"""
logger.info("Starting RL agent training...")
# Setup environment and agent
self.environment, self.agent = self.setup_environment_and_agent()
# Training state
start_time = time.time()
best_eval_reward = -float('inf')
# Training loop
for episode in range(self.num_episodes):
episode_start_time = time.time()
# Run training episode
results = self.run_episode(episode, training=True)
# Track metrics
self.episode_rewards.append(results['reward'])
self.episode_lengths.append(results['steps'])
self.episode_balances.append(results['balance'])
self.episode_trades.append(results['trades'])
if results.get('avg_loss', 0) > 0:
self.training_losses.append(results['avg_loss'])
# Update best metrics
if results['reward'] > self.best_reward:
self.best_reward = results['reward']
if results['balance'] > self.best_balance:
self.best_balance = results['balance']
# Calculate running averages
recent_rewards = self.episode_rewards[-100:] # Last 100 episodes
recent_balances = self.episode_balances[-100:]
avg_reward = np.mean(recent_rewards)
avg_balance = np.mean(recent_balances)
self.avg_rewards.append(avg_reward)
# Log progress
episode_time = time.time() - episode_start_time
if episode % 10 == 0:
logger.info(
f"Episode {episode}/{self.num_episodes} - "
f"Reward: {results['reward']:.4f}, Balance: ${results['balance']:.2f}, "
f"Trades: {results['trades']}, PnL: {results['pnl_percentage']:.2f}%, "
f"Epsilon: {self.agent.epsilon:.3f}, Time: {episode_time:.2f}s"
)
# Evaluation
if episode % self.evaluation_frequency == 0 and episode > 0:
eval_results = self.evaluate_agent(num_episodes=5)
# Track win rate
self.win_rates.append(eval_results['win_rate'])
logger.info(
f"Evaluation - Avg Reward: {eval_results['avg_reward']:.4f}, "
f"Win Rate: {eval_results['win_rate']:.2%}, "
f"Avg PnL: {eval_results['avg_pnl_percentage']:.2f}%"
)
# Save best model
if eval_results['avg_reward'] > best_eval_reward:
best_eval_reward = eval_results['avg_reward']
if save_path:
best_path = save_path.replace('.pt', '_best.pt')
self.agent.save(best_path)
logger.info(f"New best model saved: {best_path}")
# Save checkpoint
if episode % self.save_frequency == 0 and episode > 0 and save_path:
checkpoint_path = save_path.replace('.pt', f'_checkpoint_{episode}.pt')
self.agent.save(checkpoint_path)
logger.info(f"Checkpoint saved: {checkpoint_path}")
# Training complete
total_time = time.time() - start_time
logger.info(f"Training completed in {total_time:.2f} seconds")
# Final evaluation
final_eval = self.evaluate_agent(num_episodes=20)
# Save final model
if save_path:
self.agent.save(save_path)
logger.info(f"Final model saved: {save_path}")
# Prepare training results
training_results = {
'total_episodes': self.num_episodes,
'total_time': total_time,
'best_reward': self.best_reward,
'best_balance': self.best_balance,
'final_evaluation': final_eval,
'episode_rewards': self.episode_rewards,
'episode_balances': self.episode_balances,
'episode_trades': self.episode_trades,
'training_losses': self.training_losses,
'avg_rewards': self.avg_rewards,
'win_rates': self.win_rates,
'agent_config': {
'state_dim': self.state_dim,
'action_dim': self.action_dim,
'learning_rate': self.learning_rate,
'epsilon_final': self.agent.epsilon
}
}
return training_results
def backtest_agent(self, agent_path: str, test_episodes: int = 50) -> Dict:
"""Backtest trained agent"""
logger.info(f"Backtesting agent from {agent_path}...")
# Setup environment and agent
self.environment, self.agent = self.setup_environment_and_agent()
# Load trained agent
self.agent.load(agent_path)
# Run backtest
backtest_results = self.evaluate_agent(test_episodes)
# Additional analysis
results = backtest_results['results']
pnls = [r['pnl_percentage'] for r in results]
rewards = [r['reward'] for r in results]
trades = [r['trades'] for r in results]
analysis = {
'total_episodes': test_episodes,
'avg_pnl': np.mean(pnls),
'std_pnl': np.std(pnls),
'max_pnl': np.max(pnls),
'min_pnl': np.min(pnls),
'avg_reward': np.mean(rewards),
'avg_trades': np.mean(trades),
'win_rate': backtest_results['win_rate'],
'profit_factor': np.sum([p for p in pnls if p > 0]) / abs(np.sum([p for p in pnls if p < 0])) if any(p < 0 for p in pnls) else float('inf'),
'sharpe_ratio': np.mean(pnls) / np.std(pnls) if np.std(pnls) > 0 else 0,
'max_drawdown': self._calculate_max_drawdown(pnls)
}
logger.info(f"Backtest complete - Win Rate: {analysis['win_rate']:.2%}, Avg PnL: {analysis['avg_pnl']:.2f}%")
return {
'backtest_results': backtest_results,
'analysis': analysis
}
def _calculate_max_drawdown(self, pnls: List[float]) -> float:
"""Calculate maximum drawdown"""
cumulative = np.cumsum(pnls)
running_max = np.maximum.accumulate(cumulative)
drawdowns = running_max - cumulative
return np.max(drawdowns) if len(drawdowns) > 0 else 0.0
def plot_training_progress(self, save_path: Optional[str] = None):
"""Plot training progress"""
if not self.episode_rewards:
logger.warning("No training data to plot")
return
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
episodes = range(1, len(self.episode_rewards) + 1)
# Episode rewards
ax1.plot(episodes, self.episode_rewards, alpha=0.6, label='Episode Reward')
if self.avg_rewards:
ax1.plot(episodes, self.avg_rewards, 'r-', label='Avg Reward (100 episodes)')
ax1.set_title('Training Rewards')
ax1.set_xlabel('Episode')
ax1.set_ylabel('Reward')
ax1.legend()
ax1.grid(True)
# Episode balances
ax2.plot(episodes, self.episode_balances, alpha=0.6, label='Episode Balance')
ax2.axhline(y=self.initial_balance, color='r', linestyle='--', label='Initial Balance')
ax2.set_title('Portfolio Balance')
ax2.set_xlabel('Episode')
ax2.set_ylabel('Balance ($)')
ax2.legend()
ax2.grid(True)
# Training losses
if self.training_losses:
loss_episodes = np.linspace(1, len(self.episode_rewards), len(self.training_losses))
ax3.plot(loss_episodes, self.training_losses, 'g-', alpha=0.8)
ax3.set_title('Training Loss')
ax3.set_xlabel('Episode')
ax3.set_ylabel('Loss')
ax3.grid(True)
# Win rates
if self.win_rates:
eval_episodes = np.arange(self.evaluation_frequency,
len(self.episode_rewards) + 1,
self.evaluation_frequency)[:len(self.win_rates)]
ax4.plot(eval_episodes, self.win_rates, 'purple', marker='o')
ax4.set_title('Win Rate')
ax4.set_xlabel('Episode')
ax4.set_ylabel('Win Rate')
ax4.grid(True)
ax4.set_ylim(0, 1)
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
logger.info(f"Training progress plot saved: {save_path}")
plt.show()
class HybridTrainer:
"""
Hybrid training pipeline combining CNN and RL
"""
def __init__(self, data_provider: DataProvider):
self.data_provider = data_provider
self.cnn_trainer = None
self.rl_trainer = None
def train_hybrid(self, symbols: List[str], cnn_save_path: str, rl_save_path: str) -> Dict:
"""Train CNN first, then RL with CNN features"""
logger.info("Starting hybrid CNN + RL training...")
# Phase 1: Train CNN
logger.info("Phase 1: Training CNN...")
from training.cnn_trainer import CNNTrainer
self.cnn_trainer = CNNTrainer(self.data_provider)
cnn_results = self.cnn_trainer.train(symbols, cnn_save_path)
# Phase 2: Train RL
logger.info("Phase 2: Training RL...")
self.rl_trainer = RLTrainer(self.data_provider)
rl_results = self.rl_trainer.train(rl_save_path)
# Combine results
hybrid_results = {
'cnn_results': cnn_results,
'rl_results': rl_results,
'total_time': cnn_results['total_time'] + rl_results['total_time']
}
logger.info("Hybrid training completed!")
return hybrid_results
# Export
__all__ = ['RLTrainer', 'HybridTrainer']