new__training

2025-05-24 02:42:11 +03:00
parent b181d11923
commit ef71160282
10 changed files with 1613 additions and 190 deletions
--- a/training/cnn_trainer.py
+++ b/training/cnn_trainer.py
@@ -0,0 +1,519 @@
+"""
+CNN Training Pipeline - Scalping Pattern Recognition
+
+Comprehensive training pipeline for multi-timeframe CNN models:
+- Automated data generation and preprocessing
+- Training with validation and early stopping
+- Memory-efficient batch processing
+- Model evaluation and metrics
+"""
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+import numpy as np
+import pandas as pd
+import logging
+from typing import Dict, List, Tuple, Optional
+import time
+from pathlib import Path
+from sklearn.metrics import classification_report, confusion_matrix
+from sklearn.model_selection import train_test_split
+import matplotlib.pyplot as plt
+
+# Add project imports
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from core.config import get_config
+from core.data_provider import DataProvider
+from models.cnn.scalping_cnn import MultiTimeframeCNN, ScalpingDataGenerator
+
+logger = logging.getLogger(__name__)
+
+class TradingDataset(Dataset):
+    """PyTorch dataset for trading data"""
+    
+    def __init__(self, features: np.ndarray, labels: np.ndarray, metadata: Optional[Dict] = None):
+        self.features = torch.FloatTensor(features)
+        self.labels = torch.FloatTensor(labels)
+        self.metadata = metadata or {}
+        
+    def __len__(self):
+        return len(self.features)
+    
+    def __getitem__(self, idx):
+        return self.features[idx], self.labels[idx]
+
+class CNNTrainer:
+    """
+    CNN Training Pipeline for Scalping
+    """
+    
+    def __init__(self, data_provider: DataProvider, config: Optional[Dict] = None):
+        self.data_provider = data_provider
+        self.config = config or get_config()
+        
+        # Training parameters
+        self.learning_rate = 1e-4
+        self.batch_size = 64
+        self.num_epochs = 100
+        self.patience = 15
+        self.validation_split = 0.2
+        
+        # Data parameters
+        self.timeframes = ['1s', '1m', '5m', '1h']
+        self.window_size = 20
+        self.num_samples = 20000
+        
+        # Model parameters
+        self.n_timeframes = len(self.timeframes)
+        self.n_features = 26  # Number of technical indicators
+        self.n_classes = 3    # BUY, SELL, HOLD
+        
+        # Device
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        
+        # Initialize data generator
+        self.data_generator = ScalpingDataGenerator(data_provider, self.window_size)
+        
+        # Training state
+        self.model = None
+        self.train_losses = []
+        self.val_losses = []
+        self.train_accuracies = []
+        self.val_accuracies = []
+        
+        logger.info(f"CNNTrainer initialized with {self.n_timeframes} timeframes, {self.n_features} features")
+    
+    def prepare_data(self, symbols: List[str]) -> Tuple[DataLoader, DataLoader, Dict]:
+        """Prepare training and validation data"""
+        logger.info("Preparing training data...")
+        
+        all_features = []
+        all_labels = []
+        all_metadata = {'symbols': []}
+        
+        # Generate data for each symbol
+        for symbol in symbols:
+            logger.info(f"Generating data for {symbol}...")
+            
+            features, labels, metadata = self.data_generator.generate_training_cases(
+                symbol, self.timeframes, self.num_samples // len(symbols)
+            )
+            
+            if features is not None and labels is not None:
+                all_features.append(features)
+                all_labels.append(labels)
+                all_metadata['symbols'].extend([symbol] * len(features))
+                
+                logger.info(f"Generated {len(features)} samples for {symbol}")
+                
+                # Update feature count based on actual data
+                if len(all_features) == 1:
+                    actual_features = features.shape[-1]
+                    if actual_features != self.n_features:
+                        logger.info(f"Updating feature count from {self.n_features} to {actual_features}")
+                        self.n_features = actual_features
+            else:
+                logger.warning(f"No data generated for {symbol}")
+        
+        if not all_features:
+            raise ValueError("No training data generated")
+        
+        # Combine all data
+        combined_features = np.concatenate(all_features, axis=0)
+        combined_labels = np.concatenate(all_labels, axis=0)
+        
+        logger.info(f"Total dataset: {len(combined_features)} samples")
+        logger.info(f"Features shape: {combined_features.shape}")
+        logger.info(f"Labels shape: {combined_labels.shape}")
+        
+        # Split into train/validation
+        X_train, X_val, y_train, y_val = train_test_split(
+            combined_features, combined_labels, 
+            test_size=self.validation_split, 
+            stratify=np.argmax(combined_labels, axis=1),
+            random_state=42
+        )
+        
+        # Create datasets
+        train_dataset = TradingDataset(X_train, y_train)
+        val_dataset = TradingDataset(X_val, y_val)
+        
+        # Create data loaders
+        train_loader = DataLoader(
+            train_dataset, 
+            batch_size=self.batch_size, 
+            shuffle=True,
+            num_workers=0,  # Set to 0 to avoid multiprocessing issues
+            pin_memory=True if torch.cuda.is_available() else False
+        )
+        
+        val_loader = DataLoader(
+            val_dataset, 
+            batch_size=self.batch_size, 
+            shuffle=False,
+            num_workers=0,
+            pin_memory=True if torch.cuda.is_available() else False
+        )
+        
+        # Prepare metadata for return
+        dataset_info = {
+            'train_size': len(train_dataset),
+            'val_size': len(val_dataset),
+            'feature_shape': combined_features.shape[1:],
+            'label_distribution': {
+                'train': np.bincount(np.argmax(y_train, axis=1)),
+                'val': np.bincount(np.argmax(y_val, axis=1))
+            }
+        }
+        
+        logger.info(f"Train samples: {dataset_info['train_size']}")
+        logger.info(f"Validation samples: {dataset_info['val_size']}")
+        logger.info(f"Train label distribution: {dataset_info['label_distribution']['train']}")
+        logger.info(f"Val label distribution: {dataset_info['label_distribution']['val']}")
+        
+        return train_loader, val_loader, dataset_info
+    
+    def create_model(self) -> MultiTimeframeCNN:
+        """Create and initialize the CNN model"""
+        model = MultiTimeframeCNN(
+            n_timeframes=self.n_timeframes,
+            window_size=self.window_size,
+            n_features=self.n_features,
+            n_classes=self.n_classes
+        )
+        
+        model.to(self.device)
+        
+        # Log model info
+        total_params = sum(p.numel() for p in model.parameters())
+        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        
+        logger.info(f"Model created with {total_params:,} total parameters")
+        logger.info(f"Trainable parameters: {trainable_params:,}")
+        logger.info(f"Estimated memory usage: {model.get_memory_usage()}MB")
+        
+        return model
+    
+    def train_epoch(self, model: nn.Module, train_loader: DataLoader, 
+                   optimizer: optim.Optimizer, criterion: nn.Module) -> Tuple[float, float]:
+        """Train for one epoch"""
+        model.train()
+        total_loss = 0.0
+        correct_predictions = 0
+        total_predictions = 0
+        
+        for batch_idx, (features, labels) in enumerate(train_loader):
+            features = features.to(self.device)
+            labels = labels.to(self.device)
+            
+            # Zero gradients
+            optimizer.zero_grad()
+            
+            # Forward pass
+            predictions = model(features)
+            
+            # Calculate loss (multi-task loss)
+            action_loss = criterion(predictions['action'], labels)
+            
+            # Additional losses for auxiliary tasks
+            confidence_loss = torch.mean(torch.abs(predictions['confidence'] - 0.5))  # Encourage diversity
+            
+            # Total loss
+            total_loss_batch = action_loss + 0.1 * confidence_loss
+            
+            # Backward pass
+            total_loss_batch.backward()
+            
+            # Gradient clipping
+            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
+            
+            # Update weights
+            optimizer.step()
+            
+            # Track metrics
+            total_loss += total_loss_batch.item()
+            
+            # Calculate accuracy
+            pred_classes = torch.argmax(predictions['action'], dim=1)
+            true_classes = torch.argmax(labels, dim=1)
+            correct_predictions += (pred_classes == true_classes).sum().item()
+            total_predictions += labels.size(0)
+            
+            # Log progress
+            if batch_idx % 100 == 0:
+                logger.debug(f"Batch {batch_idx}/{len(train_loader)}, Loss: {total_loss_batch.item():.4f}")
+        
+        avg_loss = total_loss / len(train_loader)
+        accuracy = correct_predictions / total_predictions
+        
+        return avg_loss, accuracy
+    
+    def validate_epoch(self, model: nn.Module, val_loader: DataLoader, 
+                      criterion: nn.Module) -> Tuple[float, float, Dict]:
+        """Validate for one epoch"""
+        model.eval()
+        total_loss = 0.0
+        correct_predictions = 0
+        total_predictions = 0
+        
+        all_predictions = []
+        all_labels = []
+        all_confidences = []
+        
+        with torch.no_grad():
+            for features, labels in val_loader:
+                features = features.to(self.device)
+                labels = labels.to(self.device)
+                
+                # Forward pass
+                predictions = model(features)
+                
+                # Calculate loss
+                loss = criterion(predictions['action'], labels)
+                total_loss += loss.item()
+                
+                # Track predictions
+                pred_classes = torch.argmax(predictions['action'], dim=1)
+                true_classes = torch.argmax(labels, dim=1)
+                
+                correct_predictions += (pred_classes == true_classes).sum().item()
+                total_predictions += labels.size(0)
+                
+                # Store for detailed analysis
+                all_predictions.extend(pred_classes.cpu().numpy())
+                all_labels.extend(true_classes.cpu().numpy())
+                all_confidences.extend(predictions['confidence'].cpu().numpy())
+        
+        avg_loss = total_loss / len(val_loader)
+        accuracy = correct_predictions / total_predictions
+        
+        # Additional metrics
+        metrics = {
+            'predictions': np.array(all_predictions),
+            'labels': np.array(all_labels),
+            'confidences': np.array(all_confidences),
+            'accuracy_by_class': {},
+            'avg_confidence': np.mean(all_confidences)
+        }
+        
+        # Calculate per-class accuracy
+        for class_idx in range(self.n_classes):
+            class_mask = metrics['labels'] == class_idx
+            if np.sum(class_mask) > 0:
+                class_accuracy = np.mean(metrics['predictions'][class_mask] == metrics['labels'][class_mask])
+                metrics['accuracy_by_class'][class_idx] = class_accuracy
+        
+        return avg_loss, accuracy, metrics
+    
+    def train(self, symbols: List[str], save_path: Optional[str] = None) -> Dict:
+        """Train the CNN model"""
+        logger.info("Starting CNN training...")
+        
+        # Prepare data first to get actual feature count
+        train_loader, val_loader, dataset_info = self.prepare_data(symbols)
+        
+        # Create model with correct feature count
+        self.model = self.create_model()
+        
+        # Setup training
+        criterion = nn.CrossEntropyLoss()
+        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
+        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer, mode='min', factor=0.5, patience=5, verbose=True
+        )
+        
+        # Training state
+        best_val_loss = float('inf')
+        best_val_accuracy = 0.0
+        patience_counter = 0
+        start_time = time.time()
+        
+        # Training loop
+        for epoch in range(self.num_epochs):
+            epoch_start_time = time.time()
+            
+            # Train
+            train_loss, train_accuracy = self.train_epoch(
+                self.model, train_loader, optimizer, criterion
+            )
+            
+            # Validate
+            val_loss, val_accuracy, val_metrics = self.validate_epoch(
+                self.model, val_loader, criterion
+            )
+            
+            # Update learning rate
+            scheduler.step(val_loss)
+            
+            # Track metrics
+            self.train_losses.append(train_loss)
+            self.val_losses.append(val_loss)
+            self.train_accuracies.append(train_accuracy)
+            self.val_accuracies.append(val_accuracy)
+            
+            # Check for improvement
+            if val_loss < best_val_loss:
+                best_val_loss = val_loss
+                best_val_accuracy = val_accuracy
+                patience_counter = 0
+                
+                # Save best model
+                if save_path:
+                    best_path = save_path.replace('.pt', '_best.pt')
+                    self.model.save(best_path)
+                    logger.info(f"New best model saved: {best_path}")
+            else:
+                patience_counter += 1
+            
+            # Log progress
+            epoch_time = time.time() - epoch_start_time
+            logger.info(
+                f"Epoch {epoch+1}/{self.num_epochs} - "
+                f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f} - "
+                f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f} - "
+                f"Time: {epoch_time:.2f}s"
+            )
+            
+            # Detailed validation metrics every 10 epochs
+            if (epoch + 1) % 10 == 0:
+                logger.info(f"Class accuracies: {val_metrics['accuracy_by_class']}")
+                logger.info(f"Average confidence: {val_metrics['avg_confidence']:.4f}")
+            
+            # Early stopping
+            if patience_counter >= self.patience:
+                logger.info(f"Early stopping triggered after {epoch+1} epochs")
+                break
+        
+        # Training complete
+        total_time = time.time() - start_time
+        logger.info(f"Training completed in {total_time:.2f} seconds")
+        logger.info(f"Best validation loss: {best_val_loss:.4f}")
+        logger.info(f"Best validation accuracy: {best_val_accuracy:.4f}")
+        
+        # Save final model
+        if save_path:
+            self.model.save(save_path)
+            logger.info(f"Final model saved: {save_path}")
+        
+        # Prepare training results
+        results = {
+            'best_val_loss': best_val_loss,
+            'best_val_accuracy': best_val_accuracy,
+            'total_epochs': epoch + 1,
+            'total_time': total_time,
+            'train_losses': self.train_losses,
+            'val_losses': self.val_losses,
+            'train_accuracies': self.train_accuracies,
+            'val_accuracies': self.val_accuracies,
+            'dataset_info': dataset_info,
+            'final_metrics': val_metrics
+        }
+        
+        return results
+    
+    def evaluate_model(self, test_symbols: List[str]) -> Dict:
+        """Evaluate trained model on test data"""
+        if self.model is None:
+            raise ValueError("Model not trained yet")
+        
+        logger.info("Evaluating model...")
+        
+        # Generate test data
+        test_features = []
+        test_labels = []
+        
+        for symbol in test_symbols:
+            features, labels, _ = self.data_generator.generate_training_cases(
+                symbol, self.timeframes, 5000
+            )
+            if features is not None:
+                test_features.append(features)
+                test_labels.append(labels)
+        
+        if not test_features:
+            raise ValueError("No test data generated")
+        
+        test_features = np.concatenate(test_features, axis=0)
+        test_labels = np.concatenate(test_labels, axis=0)
+        
+        # Create test loader
+        test_dataset = TradingDataset(test_features, test_labels)
+        test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)
+        
+        # Evaluate
+        criterion = nn.CrossEntropyLoss()
+        test_loss, test_accuracy, test_metrics = self.validate_epoch(
+            self.model, test_loader, criterion
+        )
+        
+        # Generate classification report
+        class_names = ['BUY', 'SELL', 'HOLD']
+        classification_rep = classification_report(
+            test_metrics['labels'],
+            test_metrics['predictions'],
+            target_names=class_names,
+            output_dict=True
+        )
+        
+        # Confusion matrix
+        conf_matrix = confusion_matrix(
+            test_metrics['labels'],
+            test_metrics['predictions']
+        )
+        
+        evaluation_results = {
+            'test_loss': test_loss,
+            'test_accuracy': test_accuracy,
+            'classification_report': classification_rep,
+            'confusion_matrix': conf_matrix,
+            'class_accuracies': test_metrics['accuracy_by_class'],
+            'avg_confidence': test_metrics['avg_confidence']
+        }
+        
+        logger.info(f"Test accuracy: {test_accuracy:.4f}")
+        logger.info(f"Test loss: {test_loss:.4f}")
+        
+        return evaluation_results
+    
+    def plot_training_history(self, save_path: Optional[str] = None):
+        """Plot training history"""
+        if not self.train_losses:
+            logger.warning("No training history to plot")
+            return
+        
+        fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=(12, 4))
+        
+        # Loss plot
+        epochs = range(1, len(self.train_losses) + 1)
+        ax1.plot(epochs, self.train_losses, 'b-', label='Training Loss')
+        ax1.plot(epochs, self.val_losses, 'r-', label='Validation Loss')
+        ax1.set_title('Training and Validation Loss')
+        ax1.set_xlabel('Epoch')
+        ax1.set_ylabel('Loss')
+        ax1.legend()
+        ax1.grid(True)
+        
+        # Accuracy plot
+        ax2.plot(epochs, self.train_accuracies, 'b-', label='Training Accuracy')
+        ax2.plot(epochs, self.val_accuracies, 'r-', label='Validation Accuracy')
+        ax2.set_title('Training and Validation Accuracy')
+        ax2.set_xlabel('Epoch')
+        ax2.set_ylabel('Accuracy')
+        ax2.legend()
+        ax2.grid(True)
+        
+        plt.tight_layout()
+        
+        if save_path:
+            plt.savefig(save_path, dpi=300, bbox_inches='tight')
+            logger.info(f"Training history plot saved: {save_path}")
+        
+        plt.show()
+
+# Export
+__all__ = ['CNNTrainer', 'TradingDataset'] 
--- a/training/rl_trainer.py
+++ b/training/rl_trainer.py
@@ -0,0 +1,483 @@
+"""
+RL Training Pipeline - Scalping Agent Training
+
+Comprehensive training pipeline for scalping RL agents:
+- Environment setup and management
+- Agent training with experience replay
+- Performance tracking and evaluation
+- Memory-efficient training loops
+"""
+
+import torch
+import numpy as np
+import pandas as pd
+import logging
+from typing import Dict, List, Tuple, Optional, Any
+import time
+from pathlib import Path
+import matplotlib.pyplot as plt
+from collections import deque
+import random
+
+# Add project imports
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from core.config import get_config
+from core.data_provider import DataProvider
+from models.rl.scalping_agent import ScalpingEnvironment, ScalpingRLAgent
+
+logger = logging.getLogger(__name__)
+
+class RLTrainer:
+    """
+    RL Training Pipeline for Scalping
+    """
+    
+    def __init__(self, data_provider: DataProvider, config: Optional[Dict] = None):
+        self.data_provider = data_provider
+        self.config = config or get_config()
+        
+        # Training parameters
+        self.num_episodes = 1000
+        self.max_steps_per_episode = 1000
+        self.training_frequency = 4  # Train every N steps
+        self.evaluation_frequency = 50  # Evaluate every N episodes
+        self.save_frequency = 100  # Save model every N episodes
+        
+        # Environment parameters
+        self.symbols = ['ETH/USDT']
+        self.initial_balance = 1000.0
+        self.max_position_size = 0.1
+        
+        # Agent parameters (will be set when we know state dimension)
+        self.state_dim = None
+        self.action_dim = 3  # BUY, SELL, HOLD
+        self.learning_rate = 1e-4
+        self.memory_size = 50000
+        
+        # Device
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        
+        # Training state
+        self.environment = None
+        self.agent = None
+        self.episode_rewards = []
+        self.episode_lengths = []
+        self.episode_balances = []
+        self.episode_trades = []
+        self.training_losses = []
+        
+        # Performance tracking
+        self.best_reward = -float('inf')
+        self.best_balance = 0.0
+        self.win_rates = []
+        self.avg_rewards = []
+        
+        logger.info(f"RLTrainer initialized for symbols: {self.symbols}")
+    
+    def setup_environment_and_agent(self) -> Tuple[ScalpingEnvironment, ScalpingRLAgent]:
+        """Setup trading environment and RL agent"""
+        logger.info("Setting up environment and agent...")
+        
+        # Create environment
+        environment = ScalpingEnvironment(
+            data_provider=self.data_provider,
+            symbol=self.symbols[0],
+            initial_balance=self.initial_balance,
+            max_position_size=self.max_position_size
+        )
+        
+        # Get state dimension by resetting environment
+        initial_state = environment.reset()
+        if initial_state is None:
+            raise ValueError("Could not get initial state from environment")
+        
+        self.state_dim = len(initial_state)
+        logger.info(f"State dimension: {self.state_dim}")
+        
+        # Create agent
+        agent = ScalpingRLAgent(
+            state_dim=self.state_dim,
+            action_dim=self.action_dim,
+            learning_rate=self.learning_rate,
+            memory_size=self.memory_size
+        )
+        
+        return environment, agent
+    
+    def run_episode(self, episode_num: int, training: bool = True) -> Dict:
+        """Run a single episode"""
+        state = self.environment.reset()
+        if state is None:
+            return {'error': 'Could not reset environment'}
+        
+        episode_reward = 0.0
+        episode_loss = 0.0
+        step_count = 0
+        trades_made = 0
+        
+        # Episode loop
+        for step in range(self.max_steps_per_episode):
+            # Select action
+            action = self.agent.act(state, training=training)
+            
+            # Execute action in environment
+            next_state, reward, done, info = self.environment.step(action, step)
+            
+            if next_state is None:
+                break
+            
+            # Store experience if training
+            if training:
+                # Determine if this is a high-priority experience
+                priority = (abs(reward) > 0.1 or 
+                           info.get('trade_info', {}).get('executed', False))
+                
+                self.agent.remember(state, action, reward, next_state, done, priority)
+                
+                # Train agent
+                if step % self.training_frequency == 0 and len(self.agent.memory) > self.agent.batch_size:
+                    loss = self.agent.replay()
+                    if loss is not None:
+                        episode_loss += loss
+            
+            # Update state
+            state = next_state
+            episode_reward += reward
+            step_count += 1
+            
+            # Track trades
+            if info.get('trade_info', {}).get('executed', False):
+                trades_made += 1
+            
+            if done:
+                break
+        
+        # Episode results
+        final_balance = info.get('balance', self.initial_balance)
+        total_fees = info.get('total_fees', 0.0)
+        
+        episode_results = {
+            'episode': episode_num,
+            'reward': episode_reward,
+            'steps': step_count,
+            'balance': final_balance,
+            'trades': trades_made,
+            'fees': total_fees,
+            'pnl': final_balance - self.initial_balance,
+            'pnl_percentage': (final_balance - self.initial_balance) / self.initial_balance * 100,
+            'avg_loss': episode_loss / max(step_count // self.training_frequency, 1) if training else 0
+        }
+        
+        return episode_results
+    
+    def evaluate_agent(self, num_episodes: int = 10) -> Dict:
+        """Evaluate agent performance"""
+        logger.info(f"Evaluating agent over {num_episodes} episodes...")
+        
+        evaluation_results = []
+        total_reward = 0.0
+        total_balance = 0.0
+        total_trades = 0
+        winning_episodes = 0
+        
+        # Set agent to evaluation mode
+        original_epsilon = self.agent.epsilon
+        self.agent.epsilon = 0.0  # No exploration during evaluation
+        
+        for episode in range(num_episodes):
+            results = self.run_episode(episode, training=False)
+            evaluation_results.append(results)
+            
+            total_reward += results['reward']
+            total_balance += results['balance']
+            total_trades += results['trades']
+            
+            if results['pnl'] > 0:
+                winning_episodes += 1
+        
+        # Restore original epsilon
+        self.agent.epsilon = original_epsilon
+        
+        # Calculate summary statistics
+        avg_reward = total_reward / num_episodes
+        avg_balance = total_balance / num_episodes
+        avg_trades = total_trades / num_episodes
+        win_rate = winning_episodes / num_episodes
+        
+        evaluation_summary = {
+            'num_episodes': num_episodes,
+            'avg_reward': avg_reward,
+            'avg_balance': avg_balance,
+            'avg_pnl': avg_balance - self.initial_balance,
+            'avg_pnl_percentage': (avg_balance - self.initial_balance) / self.initial_balance * 100,
+            'avg_trades': avg_trades,
+            'win_rate': win_rate,
+            'results': evaluation_results
+        }
+        
+        logger.info(f"Evaluation complete - Avg Reward: {avg_reward:.4f}, Win Rate: {win_rate:.2%}")
+        
+        return evaluation_summary
+    
+    def train(self, save_path: Optional[str] = None) -> Dict:
+        """Train the RL agent"""
+        logger.info("Starting RL agent training...")
+        
+        # Setup environment and agent
+        self.environment, self.agent = self.setup_environment_and_agent()
+        
+        # Training state
+        start_time = time.time()
+        best_eval_reward = -float('inf')
+        
+        # Training loop
+        for episode in range(self.num_episodes):
+            episode_start_time = time.time()
+            
+            # Run training episode
+            results = self.run_episode(episode, training=True)
+            
+            # Track metrics
+            self.episode_rewards.append(results['reward'])
+            self.episode_lengths.append(results['steps'])
+            self.episode_balances.append(results['balance'])
+            self.episode_trades.append(results['trades'])
+            
+            if results.get('avg_loss', 0) > 0:
+                self.training_losses.append(results['avg_loss'])
+            
+            # Update best metrics
+            if results['reward'] > self.best_reward:
+                self.best_reward = results['reward']
+            
+            if results['balance'] > self.best_balance:
+                self.best_balance = results['balance']
+            
+            # Calculate running averages
+            recent_rewards = self.episode_rewards[-100:]  # Last 100 episodes
+            recent_balances = self.episode_balances[-100:]
+            
+            avg_reward = np.mean(recent_rewards)
+            avg_balance = np.mean(recent_balances)
+            
+            self.avg_rewards.append(avg_reward)
+            
+            # Log progress
+            episode_time = time.time() - episode_start_time
+            
+            if episode % 10 == 0:
+                logger.info(
+                    f"Episode {episode}/{self.num_episodes} - "
+                    f"Reward: {results['reward']:.4f}, Balance: ${results['balance']:.2f}, "
+                    f"Trades: {results['trades']}, PnL: {results['pnl_percentage']:.2f}%, "
+                    f"Epsilon: {self.agent.epsilon:.3f}, Time: {episode_time:.2f}s"
+                )
+            
+            # Evaluation
+            if episode % self.evaluation_frequency == 0 and episode > 0:
+                eval_results = self.evaluate_agent(num_episodes=5)
+                
+                # Track win rate
+                self.win_rates.append(eval_results['win_rate'])
+                
+                logger.info(
+                    f"Evaluation - Avg Reward: {eval_results['avg_reward']:.4f}, "
+                    f"Win Rate: {eval_results['win_rate']:.2%}, "
+                    f"Avg PnL: {eval_results['avg_pnl_percentage']:.2f}%"
+                )
+                
+                # Save best model
+                if eval_results['avg_reward'] > best_eval_reward:
+                    best_eval_reward = eval_results['avg_reward']
+                    if save_path:
+                        best_path = save_path.replace('.pt', '_best.pt')
+                        self.agent.save(best_path)
+                        logger.info(f"New best model saved: {best_path}")
+            
+            # Save checkpoint
+            if episode % self.save_frequency == 0 and episode > 0 and save_path:
+                checkpoint_path = save_path.replace('.pt', f'_checkpoint_{episode}.pt')
+                self.agent.save(checkpoint_path)
+                logger.info(f"Checkpoint saved: {checkpoint_path}")
+        
+        # Training complete
+        total_time = time.time() - start_time
+        logger.info(f"Training completed in {total_time:.2f} seconds")
+        
+        # Final evaluation
+        final_eval = self.evaluate_agent(num_episodes=20)
+        
+        # Save final model
+        if save_path:
+            self.agent.save(save_path)
+            logger.info(f"Final model saved: {save_path}")
+        
+        # Prepare training results
+        training_results = {
+            'total_episodes': self.num_episodes,
+            'total_time': total_time,
+            'best_reward': self.best_reward,
+            'best_balance': self.best_balance,
+            'final_evaluation': final_eval,
+            'episode_rewards': self.episode_rewards,
+            'episode_balances': self.episode_balances,
+            'episode_trades': self.episode_trades,
+            'training_losses': self.training_losses,
+            'avg_rewards': self.avg_rewards,
+            'win_rates': self.win_rates,
+            'agent_config': {
+                'state_dim': self.state_dim,
+                'action_dim': self.action_dim,
+                'learning_rate': self.learning_rate,
+                'epsilon_final': self.agent.epsilon
+            }
+        }
+        
+        return training_results
+    
+    def backtest_agent(self, agent_path: str, test_episodes: int = 50) -> Dict:
+        """Backtest trained agent"""
+        logger.info(f"Backtesting agent from {agent_path}...")
+        
+        # Setup environment and agent
+        self.environment, self.agent = self.setup_environment_and_agent()
+        
+        # Load trained agent
+        self.agent.load(agent_path)
+        
+        # Run backtest
+        backtest_results = self.evaluate_agent(test_episodes)
+        
+        # Additional analysis
+        results = backtest_results['results']
+        pnls = [r['pnl_percentage'] for r in results]
+        rewards = [r['reward'] for r in results]
+        trades = [r['trades'] for r in results]
+        
+        analysis = {
+            'total_episodes': test_episodes,
+            'avg_pnl': np.mean(pnls),
+            'std_pnl': np.std(pnls),
+            'max_pnl': np.max(pnls),
+            'min_pnl': np.min(pnls),
+            'avg_reward': np.mean(rewards),
+            'avg_trades': np.mean(trades),
+            'win_rate': backtest_results['win_rate'],
+            'profit_factor': np.sum([p for p in pnls if p > 0]) / abs(np.sum([p for p in pnls if p < 0])) if any(p < 0 for p in pnls) else float('inf'),
+            'sharpe_ratio': np.mean(pnls) / np.std(pnls) if np.std(pnls) > 0 else 0,
+            'max_drawdown': self._calculate_max_drawdown(pnls)
+        }
+        
+        logger.info(f"Backtest complete - Win Rate: {analysis['win_rate']:.2%}, Avg PnL: {analysis['avg_pnl']:.2f}%")
+        
+        return {
+            'backtest_results': backtest_results,
+            'analysis': analysis
+        }
+    
+    def _calculate_max_drawdown(self, pnls: List[float]) -> float:
+        """Calculate maximum drawdown"""
+        cumulative = np.cumsum(pnls)
+        running_max = np.maximum.accumulate(cumulative)
+        drawdowns = running_max - cumulative
+        return np.max(drawdowns) if len(drawdowns) > 0 else 0.0
+    
+    def plot_training_progress(self, save_path: Optional[str] = None):
+        """Plot training progress"""
+        if not self.episode_rewards:
+            logger.warning("No training data to plot")
+            return
+        
+        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
+        
+        episodes = range(1, len(self.episode_rewards) + 1)
+        
+        # Episode rewards
+        ax1.plot(episodes, self.episode_rewards, alpha=0.6, label='Episode Reward')
+        if self.avg_rewards:
+            ax1.plot(episodes, self.avg_rewards, 'r-', label='Avg Reward (100 episodes)')
+        ax1.set_title('Training Rewards')
+        ax1.set_xlabel('Episode')
+        ax1.set_ylabel('Reward')
+        ax1.legend()
+        ax1.grid(True)
+        
+        # Episode balances
+        ax2.plot(episodes, self.episode_balances, alpha=0.6, label='Episode Balance')
+        ax2.axhline(y=self.initial_balance, color='r', linestyle='--', label='Initial Balance')
+        ax2.set_title('Portfolio Balance')
+        ax2.set_xlabel('Episode')
+        ax2.set_ylabel('Balance ($)')
+        ax2.legend()
+        ax2.grid(True)
+        
+        # Training losses
+        if self.training_losses:
+            loss_episodes = np.linspace(1, len(self.episode_rewards), len(self.training_losses))
+            ax3.plot(loss_episodes, self.training_losses, 'g-', alpha=0.8)
+            ax3.set_title('Training Loss')
+            ax3.set_xlabel('Episode')
+            ax3.set_ylabel('Loss')
+            ax3.grid(True)
+        
+        # Win rates
+        if self.win_rates:
+            eval_episodes = np.arange(self.evaluation_frequency, 
+                                    len(self.episode_rewards) + 1, 
+                                    self.evaluation_frequency)[:len(self.win_rates)]
+            ax4.plot(eval_episodes, self.win_rates, 'purple', marker='o')
+            ax4.set_title('Win Rate')
+            ax4.set_xlabel('Episode')
+            ax4.set_ylabel('Win Rate')
+            ax4.grid(True)
+            ax4.set_ylim(0, 1)
+        
+        plt.tight_layout()
+        
+        if save_path:
+            plt.savefig(save_path, dpi=300, bbox_inches='tight')
+            logger.info(f"Training progress plot saved: {save_path}")
+        
+        plt.show()
+
+class HybridTrainer:
+    """
+    Hybrid training pipeline combining CNN and RL
+    """
+    
+    def __init__(self, data_provider: DataProvider):
+        self.data_provider = data_provider
+        self.cnn_trainer = None
+        self.rl_trainer = None
+        
+    def train_hybrid(self, symbols: List[str], cnn_save_path: str, rl_save_path: str) -> Dict:
+        """Train CNN first, then RL with CNN features"""
+        logger.info("Starting hybrid CNN + RL training...")
+        
+        # Phase 1: Train CNN
+        logger.info("Phase 1: Training CNN...")
+        from training.cnn_trainer import CNNTrainer
+        
+        self.cnn_trainer = CNNTrainer(self.data_provider)
+        cnn_results = self.cnn_trainer.train(symbols, cnn_save_path)
+        
+        # Phase 2: Train RL
+        logger.info("Phase 2: Training RL...")
+        self.rl_trainer = RLTrainer(self.data_provider)
+        rl_results = self.rl_trainer.train(rl_save_path)
+        
+        # Combine results
+        hybrid_results = {
+            'cnn_results': cnn_results,
+            'rl_results': rl_results,
+            'total_time': cnn_results['total_time'] + rl_results['total_time']
+        }
+        
+        logger.info("Hybrid training completed!")
+        return hybrid_results
+
+# Export
+__all__ = ['RLTrainer', 'HybridTrainer']