trying to fix training

2025-03-29 03:53:38 +02:00
parent 2255a8363a
commit ebbc0ed2d7
7 changed files with 533 additions and 304 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -108,22 +108,25 @@
            "name": "NN Training Pipeline",
            "type": "python",
            "request": "launch",
-            "program": "-m",
+            "module": "NN.realtime_main",
            "args": [
-                "NN.realtime-main",
                "--mode",
                "train",
+                "--model-type",
+                "cnn",
+                "--framework",
+                "pytorch",
                "--symbol",
                "BTC/USDT",
                "--timeframes",
                "1m", "5m", "1h", "4h",
                "--epochs",
-                "100",
-                "--batch_size",
-                "64",
-                "--window_size",
-                "30",
-                "--output_size",
+                "10",
+                "--batch-size",
+                "32",
+                "--window-size",
+                "20",
+                "--output-size",
                "3"
            ],
            "console": "integratedTerminal",
@@ -132,6 +135,7 @@
                "PYTHONUNBUFFERED": "1",
                "TF_CPP_MIN_LOG_LEVEL": "2"
            },
+            "pythonArgs": ["-c", "import sys; sys.path.append('f:/projects/gogo2')"],
            "postDebugTask": "Start TensorBoard"
        },
        {
--- a/NN/models/cnn_model_pytorch.py
+++ b/NN/models/cnn_model_pytorch.py
@@ -178,6 +178,148 @@ class CNNModelPyTorch:
        
        logger.info(f"Model built successfully with {sum(p.numel() for p in self.model.parameters())} parameters")
    
+    def train_epoch(self, X_train, y_train, batch_size=32):
+        """Train for one epoch and return loss and accuracy"""
+        # Convert to PyTorch tensors
+        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(self.device)
+        if self.output_size == 1:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(self.device)
+        else:
+            y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(self.device)
+        
+        # Create DataLoader
+        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
+        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+        
+        self.model.train()
+        running_loss = 0.0
+        correct = 0
+        total = 0
+        
+        for inputs, targets in train_loader:
+            # Zero gradients
+            self.optimizer.zero_grad()
+            
+            # Forward pass
+            outputs = self.model(inputs)
+            
+            # Calculate loss
+            if self.output_size == 1:
+                loss = self.criterion(outputs, targets.unsqueeze(1))
+            else:
+                loss = self.criterion(outputs, targets)
+            
+            # Backward pass and optimize
+            loss.backward()
+            self.optimizer.step()
+            
+            # Statistics
+            running_loss += loss.item()
+            if self.output_size > 1:
+                _, predicted = torch.max(outputs, 1)
+                total += targets.size(0)
+                correct += (predicted == targets).sum().item()
+        
+        epoch_loss = running_loss / len(train_loader)
+        epoch_acc = correct / total if total > 0 else 0
+        
+        return epoch_loss, epoch_acc
+
+    def evaluate(self, X_val, y_val):
+        """Evaluate on validation data and return loss and accuracy"""
+        X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
+        if self.output_size == 1:
+            y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
+        else:
+            y_val_tensor = torch.tensor(y_val, dtype=torch.long).to(self.device)
+            
+        val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
+        val_loader = DataLoader(val_dataset, batch_size=32)
+        
+        self.model.eval()
+        val_loss = 0.0
+        correct = 0
+        total = 0
+        
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                # Forward pass
+                outputs = self.model(inputs)
+                
+                # Calculate loss
+                if self.output_size == 1:
+                    loss = self.criterion(outputs, targets.unsqueeze(1))
+                else:
+                    loss = self.criterion(outputs, targets)
+                
+                val_loss += loss.item()
+                
+                # Calculate accuracy
+                if self.output_size > 1:
+                    _, predicted = torch.max(outputs, 1)
+                    total += targets.size(0)
+                    correct += (predicted == targets).sum().item()
+        
+        return val_loss / len(val_loader), correct / total if total > 0 else 0
+
+    def predict(self, X):
+        """Make predictions on input data"""
+        self.model.eval()
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        with torch.no_grad():
+            outputs = self.model(X_tensor)
+            if self.output_size > 1:
+                _, predicted = torch.max(outputs, 1)
+                return predicted.cpu().numpy()
+            else:
+                return outputs.cpu().numpy()
+
+    def predict_next_candles(self, X, n_candles=3):
+        """
+        Predict the next n candles for each timeframe.
+        
+        Args:
+            X: Input data of shape [batch_size, window_size, features]
+            n_candles: Number of future candles to predict
+            
+        Returns:
+            Dictionary of predictions for each timeframe
+        """
+        self.model.eval()
+        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
+        
+        with torch.no_grad():
+            # Get the last window of data
+            last_window = X_tensor[-1:]  # [1, window_size, features]
+            
+            # Initialize predictions
+            predictions = {}
+            
+            # For each timeframe, predict next n candles
+            for i, tf in enumerate(self.timeframes):
+                # Extract features for this timeframe
+                tf_features = last_window[:, :, i*5:(i+1)*5]  # [1, window_size, 5]
+                
+                # Predict next n candles
+                tf_predictions = []
+                current_window = tf_features
+                
+                for _ in range(n_candles):
+                    # Get prediction for next candle
+                    output = self.model(current_window)
+                    tf_predictions.append(output.cpu().numpy())
+                    
+                    # Update window for next prediction
+                    current_window = torch.cat([
+                        current_window[:, 1:, :],
+                        output.unsqueeze(1)
+                    ], dim=1)
+                
+                predictions[tf] = np.concatenate(tf_predictions, axis=0)
+            
+            return predictions
+
    def train(self, X_train, y_train, X_val=None, y_val=None, batch_size=32, epochs=100):
        """
        Train the CNN model.
@@ -259,7 +401,7 @@ class CNNModelPyTorch:
            
            # Validation phase
            if val_loader is not None:
-                val_loss, val_acc = self._validate(val_loader)
+                val_loss, val_acc = self.evaluate(X_val, y_val)
                
                logger.info(f"Epoch {epoch+1}/{epochs} - "
                           f"loss: {epoch_loss:.4f} - acc: {epoch_acc:.4f} - "
@@ -281,51 +423,12 @@ class CNNModelPyTorch:
        logger.info("Training completed")
        return self.history
    
-    def _validate(self, val_loader):
-        """Validate the model using the validation set"""
-        self.model.eval()
-        val_loss = 0.0
-        correct = 0
-        total = 0
-        
-        with torch.no_grad():
-            for inputs, targets in val_loader:
-                # Forward pass
-                outputs = self.model(inputs)
-                
-                # Calculate loss
-                if self.output_size == 1:
-                    loss = self.criterion(outputs, targets.unsqueeze(1))
-                else:
-                    loss = self.criterion(outputs, targets)
-                
-                val_loss += loss.item()
-                
-                # Calculate accuracy
-                if self.output_size > 1:
-                    _, predicted = torch.max(outputs, 1)
-                    total += targets.size(0)
-                    correct += (predicted == targets).sum().item()
-        
-        return val_loss / len(val_loader), correct / total if total > 0 else 0
-    
-    def evaluate(self, X_test, y_test):
+    def evaluate_metrics(self, X_test, y_test):
        """
-        Evaluate the model on test data.
-        
-        Args:
-            X_test: Test input data
-            y_test: Test target data
-            
-        Returns:
-            dict: Evaluation metrics
+        Calculate and return comprehensive evaluation metrics as dict
        """
-        logger.info(f"Evaluating model on {len(X_test)} samples")
-        
-        # Convert to PyTorch tensors
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(self.device)
        
-        # Get predictions
        self.model.eval()
        with torch.no_grad():
            y_pred = self.model(X_test_tensor)
@@ -336,70 +439,15 @@ class CNNModelPyTorch:
            else:
                y_pred_class = (y_pred.cpu().numpy() > 0.5).astype(int).flatten()
        
-        # Calculate metrics
-        if self.output_size > 1:
-            accuracy = accuracy_score(y_test, y_pred_class)
-            precision = precision_score(y_test, y_pred_class, average='weighted')
-            recall = recall_score(y_test, y_pred_class, average='weighted')
-            f1 = f1_score(y_test, y_pred_class, average='weighted')
+        metrics = {
+            'accuracy': accuracy_score(y_test, y_pred_class),
+            'precision': precision_score(y_test, y_pred_class, average='weighted', zero_division=0),
+            'recall': recall_score(y_test, y_pred_class, average='weighted', zero_division=0),
+            'f1_score': f1_score(y_test, y_pred_class, average='weighted', zero_division=0)
+        }
        
-            metrics = {
-                'accuracy': accuracy,
-                'precision': precision,
-                'recall': recall,
-                'f1_score': f1
-            }
-        else:
-            accuracy = accuracy_score(y_test, y_pred_class)
-            precision = precision_score(y_test, y_pred_class)
-            recall = recall_score(y_test, y_pred_class)
-            f1 = f1_score(y_test, y_pred_class)
-            
-            metrics = {
-                'accuracy': accuracy,
-                'precision': precision,
-                'recall': recall,
-                'f1_score': f1
-            }
-        
-        logger.info(f"Evaluation metrics: {metrics}")
        return metrics
    
-    def predict(self, X):
-        """
-        Make predictions with the model.
-        
-        Args:
-            X: Input data
-            
-        Returns:
-            Predictions
-        """
-        # Convert to PyTorch tensor
-        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
-        
-        # Get predictions
-        self.model.eval()
-        with torch.no_grad():
-            predictions = self.model(X_tensor)
-            
-            if self.output_size > 1:
-                # Multi-class classification
-                probs = predictions.cpu().numpy()
-                _, class_preds = torch.max(predictions, 1)
-                class_preds = class_preds.cpu().numpy()
-                return class_preds, probs
-            else:
-                # Binary classification or regression
-                preds = predictions.cpu().numpy()
-                if self.output_size == 1:
-                    # Binary classification
-                    class_preds = (preds > 0.5).astype(int)
-                    return class_preds.flatten(), preds.flatten()
-                else:
-                    # Regression
-                    return preds.flatten(), None
-    
    def save(self, filepath):
        """
        Save the model to a file.
--- a/NN/realtime-main.py
+++ b/NN/realtime-main.py
@@ -151,35 +151,59 @@ def main():
    logger.info("Neural Network Trading System finished successfully")

 def train(data_interface, model, args):
-    """Train the model using the data interface"""
+    """Enhanced training with performance tracking"""
+    from torch.utils.tensorboard import SummaryWriter
+    
    logger.info("Starting training mode...")
+    writer = SummaryWriter(log_dir=f"runs/{args.model_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
    
    try:
-        # Prepare training data
-        logger.info("Preparing training data...")
-        X_train, y_train, X_val, y_val = data_interface.prepare_training_data()
+        best_val_acc = 0
        
-        # Train the model
-        logger.info("Training model...")
-        model.train(
-            X_train, y_train,
-            X_val, y_val,
-            batch_size=args.batch_size,
-            epochs=args.epochs
-        )
+        for epoch in range(args.epochs):
+            # Refresh data every few epochs
+            if epoch % 3 == 0:
+                X_train, y_train, X_val, y_val = data_interface.prepare_training_data(refresh=True)
+            else:
+                X_train, y_train, X_val, y_val = data_interface.prepare_training_data()
            
-        # Save the model
+            # Train for one epoch
+            train_loss, train_acc = model.train_epoch(
+                X_train, y_train,
+                batch_size=args.batch_size
+            )
+            
+            # Validate
+            val_loss, val_acc = model.evaluate(X_val, y_val)
+            
+            # Log metrics
+            writer.add_scalar('Loss/Train', train_loss, epoch)
+            writer.add_scalar('Accuracy/Train', train_acc, epoch)
+            writer.add_scalar('Loss/Validation', val_loss, epoch) 
+            writer.add_scalar('Accuracy/Validation', val_acc, epoch)
+            
+            # Save best model
+            if val_acc > best_val_acc:
+                best_val_acc = val_acc
+                model_path = os.path.join(
+                    'models',
+                    f"{args.model_type}_best_{args.symbol.replace('/', '_')}.pt"
+                )
+                model.save(model_path)
+                logger.info(f"New best model saved with val_acc: {val_acc:.2f}")
+                
+            logger.info(f"Epoch {epoch+1}/{args.epochs} - "
+                       f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f} - "
+                       f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}")
+            
+        # Save final model
        model_path = os.path.join(
            'models',
-            f"{args.model_type}_{args.symbol.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+            f"{args.model_type}_final_{args.symbol.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt"
        )
-        logger.info(f"Saving model to {model_path}...")
        model.save(model_path)
        
-        # Evaluate the model
-        logger.info("Evaluating model...")
-        metrics = model.evaluate(X_val, y_val)
-        logger.info(f"Evaluation metrics: {metrics}")
+        logger.info(f"Training Complete - Best Val Accuracy: {best_val_acc:.2f}")
        
    except Exception as e:
        logger.error(f"Error in training mode: {str(e)}")
--- a/NN/realtime_main.py
+++ b/NN/realtime_main.py
@@ -1,10 +1,9 @@
 #!/usr/bin/env python3
 """
-Neural Network Trading System Main Module
+Neural Network Trading System Main Module - PyTorch Version

 This module serves as the main entry point for the NN trading system,
-coordinating data flow between different components and implementing 
-training and inference pipelines.
+using PyTorch exclusively for all model operations.
 """

 import os
@@ -12,200 +11,259 @@ import sys
 import logging
 import argparse
 from datetime import datetime
+from torch.utils.tensorboard import SummaryWriter
+import numpy as np

 # Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler(os.path.join('logs', f'nn_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'))
-    ]
-)
-
 logger = logging.getLogger('NN')
+logger.setLevel(logging.INFO)

-# Create logs directory if it doesn't exist
-os.makedirs('logs', exist_ok=True)
+try:
+    # Create logs directory if it doesn't exist
+    os.makedirs('logs', exist_ok=True)
+    
+    # Try setting up file logging
+    log_file = os.path.join('logs', f'nn_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
+    fh = logging.FileHandler(log_file)
+    fh.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    fh.setFormatter(formatter)
+    logger.addHandler(fh)
+    logger.info(f"Logging to file: {log_file}")
+except Exception as e:
+    logger.warning(f"Failed to setup file logging: {str(e)}. Falling back to console logging only.")
+
+# Always setup console logging
+ch = logging.StreamHandler()
+ch.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+logger.addHandler(ch)

 def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(description='Neural Network Trading System')
    
    parser.add_argument('--mode', type=str, choices=['train', 'predict', 'realtime'], default='train',
-                        help='Mode to run (train, predict, realtime)')
-    parser.add_argument('--symbol', type=str, default='BTC/USD',
-                        help='Main trading pair symbol (default: BTC/USD)')
-    parser.add_argument('--context-pairs', type=str, nargs='*', default=[],
-                        help='Additional context trading pairs')
-    parser.add_argument('--timeframes', type=str, nargs='+', default=['5m', '15m', '1h'],
-                        help='Timeframes to use (default: 5m,15m,1h)') 
-    parser.add_argument('--window-size', type=int, default=30,
-                        help='Window size for input data (default: 30)')
-    parser.add_argument('--output-size', type=int, default=5,
-                        help='Output size (1=up/down, 3=BUY/HOLD/SELL, 5=with extrema)')
+                      help='Mode to run (train, predict, realtime)')
+    parser.add_argument('--symbol', type=str, default='BTC/USDT',
+                      help='Trading pair symbol')
+    parser.add_argument('--timeframes', type=str, nargs='+', default=['1s', '1m', '5m', '1h', '4h'],
+                      help='Timeframes to use (include 1s for ticks)')
+    parser.add_argument('--window-size', type=int, default=20,
+                      help='Window size for input data')
+    parser.add_argument('--output-size', type=int, default=3,
+                      help='Output size (1 for binary, 3 for BUY/HOLD/SELL)')
    parser.add_argument('--batch-size', type=int, default=32,
-                        help='Batch size for training')
-    parser.add_argument('--epochs', type=int, default=100,
-                        help='Number of epochs for training')
+                      help='Batch size for training')
+    parser.add_argument('--epochs', type=int, default=10,
+                      help='Number of epochs for training')
    parser.add_argument('--model-type', type=str, choices=['cnn', 'transformer', 'moe'], default='cnn',
-                        help='Model type to use')
-    parser.add_argument('--framework', type=str, choices=['tensorflow', 'pytorch'], default='pytorch',
-                        help='Deep learning framework to use')
+                      help='Model type to use')
    
    return parser.parse_args()

 def main():
    """Main entry point for the NN trading system"""
-    # Parse arguments
    args = parse_arguments()
    
    logger.info(f"Starting NN Trading System in {args.mode} mode")
-    logger.info(f"Main Symbol: {args.symbol}")
-    if args.context_pairs:
-        logger.info(f"Context Pairs: {args.context_pairs}")
-    logger.info(f"Timeframes: {args.timeframes}")
-    logger.info(f"Window Size: {args.window_size}")
-    logger.info(f"Output Size: {args.output_size} (1=up/down, 3=BUY/HOLD/SELL, 5=with extrema)")
-    logger.info(f"Model Type: {args.model_type}")
-    logger.info(f"Framework: {args.framework}")
+    logger.info(f"Configuration: Symbol={args.symbol}, Timeframes={args.timeframes}")
    
-    # Import the appropriate modules based on the framework
-    if args.framework == 'pytorch':
-        try:
-            import torch
-            logger.info(f"Using PyTorch {torch.__version__}")
+    try:
+        import torch
+        from NN.utils.data_interface import DataInterface
        
-            # Import PyTorch-based modules
-            from NN.utils.multi_data_interface import MultiDataInterface
-            
-            if args.model_type == 'cnn':
-                from NN.models.cnn_model_pytorch import CNNModelPyTorch as Model
-            elif args.model_type == 'transformer':
-                from NN.models.transformer_model_pytorch import TransformerModelPyTorchWrapper as Model
-            elif args.model_type == 'moe':
-                from NN.models.transformer_model_pytorch import MixtureOfExpertsModelPyTorch as Model
-            else:
-                logger.error(f"Unknown model type: {args.model_type}")
-                return
-                
-        except ImportError as e:
-            logger.error(f"Failed to import PyTorch modules: {str(e)}")
-            logger.error("Please make sure PyTorch is installed or use the TensorFlow framework.")
+        # Import appropriate PyTorch model
+        if args.model_type == 'cnn':
+            from NN.models.cnn_model_pytorch import CNNModelPyTorch as Model
+        elif args.model_type == 'transformer':
+            from NN.models.transformer_model_pytorch import TransformerModelPyTorchWrapper as Model
+        elif args.model_type == 'moe':
+            from NN.models.transformer_model_pytorch import MixtureOfExpertsModelPyTorch as Model
+        else:
+            logger.error(f"Unknown model type: {args.model_type}")
            return
            
-    elif args.framework == 'tensorflow':
-        try:
-            import tensorflow as tf
-            logger.info(f"Using TensorFlow {tf.__version__}")
-            
-            # Import TensorFlow-based modules
-            from NN.utils.multi_data_interface import MultiDataInterface
-            
-            if args.model_type == 'cnn':
-                from NN.models.cnn_model import CNNModel as Model
-            elif args.model_type == 'transformer':
-                from NN.models.transformer_model import TransformerModel as Model
-            elif args.model_type == 'moe':
-                from NN.models.transformer_model import MixtureOfExpertsModel as Model
-            else:
-                logger.error(f"Unknown model type: {args.model_type}")
-                return
-                
-        except ImportError as e:
-            logger.error(f"Failed to import TensorFlow modules: {str(e)}")
-            logger.error("Please make sure TensorFlow is installed or use the PyTorch framework.")
-            return
-    else:
-        logger.error(f"Unknown framework: {args.framework}")
+    except ImportError as e:
+        logger.error(f"Failed to import PyTorch modules: {str(e)}")
+        logger.error("Please make sure PyTorch is installed")
        return
        
    # Initialize data interface
    try:
-        logger.info("Initializing data interface...")
-        data_interface = MultiDataInterface(
+        data_interface = DataInterface(
            symbol=args.symbol,
-            timeframes=args.timeframes,
-            window_size=args.window_size,
-            output_size=args.output_size
+            timeframes=args.timeframes
        )
+        
+        # Verify data interface by fetching initial data
+        logger.info("Verifying data interface...")
+        X_sample, y_sample, _, _, _, _ = data_interface.prepare_training_data(refresh=True)
+        if X_sample is None or y_sample is None:
+            logger.error("Failed to prepare initial training data")
+            return
+            
+        logger.info(f"Data interface verified - X shape: {X_sample.shape}, y shape: {y_sample.shape}")
+        
    except Exception as e:
        logger.error(f"Failed to initialize data interface: {str(e)}")
        return
        
    # Initialize model
    try:
-        logger.info(f"Initializing {args.model_type.upper()} model...")
-        # Calculate actual feature count (OHLCV per timeframe)
-        num_features = 5 * len(args.timeframes)
+        # Calculate total number of features across all timeframes
+        num_features = data_interface.get_feature_count()
+        logger.info(f"Initializing model with {num_features} features")
+        
        model = Model(
            window_size=args.window_size,
            num_features=num_features,
            output_size=args.output_size,
            timeframes=args.timeframes
        )
+        
+        # Ensure model is on the correct device
+        if torch.cuda.is_available():
+            model.model = model.model.cuda()
+            logger.info("Model moved to CUDA device")
    except Exception as e:
        logger.error(f"Failed to initialize model: {str(e)}")
        return
        
-    # Execute the requested mode
+    # Execute requested mode
    if args.mode == 'train':
        train(data_interface, model, args)
    elif args.mode == 'predict':
        predict(data_interface, model, args)
    elif args.mode == 'realtime':
        realtime(data_interface, model, args)
-    else:
-        logger.error(f"Unknown mode: {args.mode}")
-        return
-    
-    logger.info("Neural Network Trading System finished successfully")

 def train(data_interface, model, args):
-    """Train the model using the data interface"""
+    """Enhanced training with performance tracking and retrospective fine-tuning"""
    logger.info("Starting training mode...")
+    writer = SummaryWriter()
    
    try:
-        # Prepare training data
-        logger.info("Preparing training data...")
-        X, y, _ = data_interface.prepare_nn_input(
-            timeframes=args.timeframes,
-            n_candles=1000,
-            window_size=args.window_size
-        )
-        logger.info(f"Training data shape: {X.shape}")
-        logger.info(f"Target data shape: {y.shape}")
+        best_val_acc = 0
+        best_val_pnl = float('-inf')
+        best_win_rate = 0
        
-        # Split into train/validation sets (80/20)
-        split_idx = int(len(X) * 0.8)
-        X_train, y_train = X[:split_idx], y[:split_idx]
-        X_val, y_val = X[split_idx:], y[split_idx:]
+        logger.info("Verifying data interface...")
+        X_sample, y_sample, _, _, _, _ = data_interface.prepare_training_data(refresh=True)
+        logger.info(f"Data validation - X shape: {X_sample.shape}, y shape: {y_sample.shape}")
        
-        # Train the model
-        logger.info("Training model...")
-        model.train(
-            X_train, y_train,
-            X_val, y_val,
-            batch_size=args.batch_size,
-            epochs=args.epochs
-        )
+        for epoch in range(args.epochs):
+            # More frequent refresh for shorter timeframes
+            if '1s' in args.timeframes:
+                refresh = True  # Always refresh for tick data
+                refresh_interval = 30  # 30 seconds for tick data
+            else:
+                refresh = epoch % 1 == 0  # Refresh every epoch
+                refresh_interval = 120  # 2 minutes for other timeframes
                
-        # Save the model
-        model_path = os.path.join(
-            'models', 
-            f"{args.model_type}_{args.symbol.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-        )
-        logger.info(f"Saving model to {model_path}...")
-        model.save(model_path)
+            logger.info(f"\nStarting epoch {epoch+1}/{args.epochs}")
+            X_train, y_train, X_val, y_val, train_prices, val_prices = data_interface.prepare_training_data(
+                refresh=refresh,
+                refresh_interval=refresh_interval
+            )
+            logger.info(f"Training data - X shape: {X_train.shape}, y shape: {y_train.shape}")
+            logger.info(f"Validation data - X shape: {X_val.shape}, y shape: {y_val.shape}")
            
-        # Evaluate the model
-        logger.info("Evaluating model...")
-        metrics = model.evaluate(X_val, y_val)
-        logger.info(f"Evaluation metrics: {metrics}")
+            # Train and validate
+            try:
+                train_loss, train_acc = model.train_epoch(X_train, y_train, args.batch_size)
+                val_loss, val_acc = model.evaluate(X_val, y_val)
+                
+                # Get predictions for PnL calculation
+                train_preds = model.predict(X_train)
+                val_preds = model.predict(X_val)
+                
+                # Calculate PnL and win rates
+                train_pnl, train_win_rate, train_trades = data_interface.calculate_pnl(
+                    train_preds, train_prices, position_size=1.0
+                )
+                val_pnl, val_win_rate, val_trades = data_interface.calculate_pnl(
+                    val_preds, val_prices, position_size=1.0
+                )
+                
+                # Monitor action distribution
+                train_actions = np.bincount(train_preds, minlength=3)
+                val_actions = np.bincount(val_preds, minlength=3)
+                
+                # Log metrics
+                writer.add_scalar('Loss/train', train_loss, epoch)
+                writer.add_scalar('Accuracy/train', train_acc, epoch)
+                writer.add_scalar('Loss/val', val_loss, epoch)
+                writer.add_scalar('Accuracy/val', val_acc, epoch)
+                writer.add_scalar('PnL/train', train_pnl, epoch)
+                writer.add_scalar('PnL/val', val_pnl, epoch)
+                writer.add_scalar('WinRate/train', train_win_rate, epoch)
+                writer.add_scalar('WinRate/val', val_win_rate, epoch)
+                
+                # Log action distribution
+                for i, action in enumerate(['SELL', 'HOLD', 'BUY']):
+                    writer.add_scalar(f'Actions/train_{action}', train_actions[i], epoch)
+                    writer.add_scalar(f'Actions/val_{action}', val_actions[i], epoch)
+                
+                # Save best model based on validation PnL
+                if val_pnl > best_val_pnl:
+                    best_val_pnl = val_pnl
+                    best_val_acc = val_acc
+                    best_win_rate = val_win_rate
+                    model.save(f"models/{args.model_type}_best.pt")
+                    
+                # Log detailed metrics
+                logger.info(f"Epoch {epoch+1}/{args.epochs} - "
+                          f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f}, "
+                          f"PnL: {train_pnl:.2%}, Win Rate: {train_win_rate:.2%} - "
+                          f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}, "
+                          f"PnL: {val_pnl:.2%}, Win Rate: {val_win_rate:.2%}")
+                
+                # Log action distribution
+                logger.info("Action Distribution:")
+                for i, action in enumerate(['SELL', 'HOLD', 'BUY']):
+                    logger.info(f"{action}: Train={train_actions[i]}, Val={val_actions[i]}")
+                
+                # Log trade statistics
+                if train_trades:
+                    logger.info(f"Training trades: {len(train_trades)}")
+                    logger.info(f"Validation trades: {len(val_trades)}")
+                
+                # Retrospective fine-tuning
+                if epoch > 0 and val_pnl > 0:  # Only fine-tune if we're making profit
+                    logger.info("Performing retrospective fine-tuning...")
+                    
+                    # Get predictions for next few candles
+                    next_candles = model.predict_next_candles(X_val[-1:], n_candles=3)
+                    
+                    # Log predictions for each timeframe
+                    for tf, preds in next_candles.items():
+                        logger.info(f"Next 3 candles for {tf}:")
+                        for i, pred in enumerate(preds):
+                            action = ['SELL', 'HOLD', 'BUY'][np.argmax(pred)]
+                            confidence = np.max(pred)
+                            logger.info(f"Candle {i+1}: {action} (confidence: {confidence:.2f})")
+                    
+                    # Fine-tune on recent successful trades
+                    successful_trades = [t for t in train_trades if t['pnl'] > 0]
+                    if successful_trades:
+                        logger.info(f"Fine-tuning on {len(successful_trades)} successful trades")
+                        # TODO: Implement fine-tuning logic here
+                
+            except Exception as e:
+                logger.error(f"Error during epoch {epoch+1}: {str(e)}")
+                continue
+                      
+        # Save final model
+        model.save(f"models/{args.model_type}_final_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt")
+        logger.info(f"Training complete. Best validation metrics:")
+        logger.info(f"Accuracy: {best_val_acc:.2f}")
+        logger.info(f"PnL: {best_val_pnl:.2%}")
+        logger.info(f"Win Rate: {best_win_rate:.2%}")
        
    except Exception as e:
-        logger.error(f"Error in training mode: {str(e)}")
-        return
+        logger.error(f"Error in training: {str(e)}")

 def predict(data_interface, model, args):
    """Make predictions using the trained model"""
@@ -240,14 +298,12 @@ def predict(data_interface, model, args):
        
    except Exception as e:
        logger.error(f"Error in prediction mode: {str(e)}")
-        return

 def realtime(data_interface, model, args):
    """Run the model in real-time mode"""
    logger.info("Starting real-time mode...")
    
    try:
-        # Import realtime analyzer
        from NN.utils.realtime_analyzer import RealtimeAnalyzer
        
        # Load the latest model
@@ -279,7 +335,6 @@ def realtime(data_interface, model, args):
        
    except Exception as e:
        logger.error(f"Error in real-time mode: {str(e)}")
-        return

 if __name__ == "__main__":
    main()
--- a/NN/requirements.txt
+++ b/NN/requirements.txt
@@ -1,22 +1,8 @@
-# Main dependencies
-numpy>=1.19.5
-pandas>=1.3.0
-matplotlib>=3.4.2
-scikit-learn>=0.24.2
-
-# PyTorch (primary framework)
-torch
-torchvision
-
-# TensorFlow (optional)
-# tensorflow>=2.5.0
-# tensorflow-addons>=0.13.0
-
-# Additional dependencies
-plotly
-h5py
-tqdm
-pyyaml
-tensorboard
-ccxt
-requests
+torch>=2.0.0
+scikit-learn>=1.0.0
+pandas>=2.0.0
+numpy>=1.24.0
+websockets>=10.0
+plotly>=5.18.0
+tqdm>=4.0.0  # For progress bars
+tensorboard>=2.0.0  # For visualization
--- a/NN/utils/data_interface.py
+++ b/NN/utils/data_interface.py
@@ -224,12 +224,14 @@ class DataInterface:
        for tf in timeframes:
            if tf in dfs:
                X, y, ts = self._create_features(dfs[tf], window_size)
-                features.append(X)
-                if len(targets) == 0:  # Only need targets from one timeframe
-                    targets = y
-                    timestamps = ts
+                if X is not None and y is not None:
+                    features.append(X)
+                    if len(targets) == 0:  # Only need targets from one timeframe
+                        targets = y
+                        timestamps = ts
        
        if not features:
+            logger.error("Failed to create features for any timeframe")
            return None, None, None
            
        # Stack features from all timeframes along the time dimension
@@ -250,6 +252,9 @@ class DataInterface:
        X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
        X = np.clip(X, -1e6, 1e6)  # Clip extreme values
        
+        # Log data shapes for debugging
+        logger.info(f"Prepared input data - X shape: {X.shape}, y shape: {np.array(targets).shape}")
+        
        return X, targets, timestamps
    
    def _create_features(self, df, window_size):
@@ -304,7 +309,13 @@ class DataInterface:
        
        for i in range(len(ohlcv_scaled) - window_size):
            # Input: window_size candles of OHLCV data
-            X.append(ohlcv_scaled[i:i+window_size])
+            window = ohlcv_scaled[i:i+window_size]
+            
+            # Validate window data
+            if np.any(np.isnan(window)) or np.any(np.isinf(window)):
+                continue
+                
+            X.append(window)
            
            # Target: binary classification - price goes up (1) or down (0)
            # 1 if close price increases in the next candle, 0 otherwise
@@ -314,7 +325,18 @@ class DataInterface:
            # Store timestamp for reference
            timestamps.append(df['timestamp'].iloc[i+window_size])
        
-        return np.array(X), np.array(y), np.array(timestamps)
+        if not X:
+            logger.error("No valid windows created")
+            return None, None, None
+            
+        X = np.array(X)
+        y = np.array(y)
+        timestamps = np.array(timestamps)
+        
+        # Log shapes for debugging
+        logger.info(f"Created features - X shape: {X.shape}, y shape: {y.shape}")
+        
+        return X, y, timestamps
    
    def generate_training_dataset(self, timeframes=None, n_candles=1000, window_size=20):
        """
@@ -388,6 +410,95 @@ class DataInterface:
        # OHLCV (5 features) per timeframe
        return 5 * len(self.timeframes)

+    def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
+        """
+        Calculate PnL based on predictions and actual price movements.
+        
+        Args:
+            predictions (np.array): Model predictions (0: sell, 1: hold, 2: buy)
+            actual_prices (np.array): Actual price data
+            position_size (float): Size of the position to trade
+            
+        Returns:
+            tuple: (total_pnl, win_rate, trade_history)
+        """
+        if len(predictions) != len(actual_prices) - 1:
+            logger.error("Predictions and prices length mismatch")
+            return 0.0, 0.0, []
+            
+        pnl = 0.0
+        trades = 0
+        wins = 0
+        trade_history = []
+        
+        for i in range(len(predictions)):
+            pred = predictions[i]
+            current_price = actual_prices[i]
+            next_price = actual_prices[i + 1]
+            
+            # Calculate price change percentage
+            price_change = (next_price - current_price) / current_price
+            
+            # Calculate PnL based on prediction
+            if pred == 2:  # Buy
+                trade_pnl = price_change * position_size
+                trades += 1
+                if trade_pnl > 0:
+                    wins += 1
+                trade_history.append({
+                    'type': 'buy',
+                    'price': current_price,
+                    'pnl': trade_pnl,
+                    'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i]
+                })
+            elif pred == 0:  # Sell
+                trade_pnl = -price_change * position_size
+                trades += 1
+                if trade_pnl > 0:
+                    wins += 1
+                trade_history.append({
+                    'type': 'sell',
+                    'price': current_price,
+                    'pnl': trade_pnl,
+                    'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i]
+                })
+            
+            pnl += trade_pnl if pred in [0, 2] else 0
+            
+        win_rate = wins / trades if trades > 0 else 0.0
+        return pnl, win_rate, trade_history
+
+    def prepare_training_data(self, refresh=False, refresh_interval=300):
+        """
+        Prepare training and validation data with optional refresh.
+        
+        Args:
+            refresh (bool): Whether to force refresh data
+            refresh_interval (int): Minimum seconds between refreshes
+            
+        Returns:
+            tuple: (X_train, y_train, X_val, y_val, prices) numpy arrays
+        """
+        current_time = datetime.now()
+        if refresh or (current_time - getattr(self, 'last_refresh', datetime.min)).total_seconds() > refresh_interval:
+            logger.info("Refreshing training data...")
+            for tf in self.timeframes:
+                self.get_historical_data(timeframe=tf, n_candles=1000, use_cache=False)
+            self.last_refresh = current_time
+
+        # Get all data
+        X, y, _ = self.prepare_nn_input()
+        if X is None:
+            return None, None, None, None, None
+
+        # Get price data for PnL calculation
+        prices = self.dataframes[self.timeframes[0]]['close'].values
+
+        # Split into train/validation (80/20)
+        split_idx = int(len(X) * 0.8)
+        return (X[:split_idx], y[:split_idx], X[split_idx:], y[split_idx:],
+                prices[:split_idx], prices[split_idx:])
+
    def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
        """
        Prepare a single input sample from the most recent data for real-time inference.
--- a/_notes.md
+++ b/_notes.md
@@ -41,6 +41,7 @@ now let's run our "NN Training Pipeline" debug config. for now we start with sin
 python -c "import sys; sys.path.append('f:/projects/gogo2'); from NN.realtime_main import main; main()" --mode train --model-type cnn --framework pytorch
 python -c "import sys; sys.path.append('f:/projects/gogo2'); from NN.realtime_main import main; main()" --mode train --model-type cnn --framework pytorch --epochs 1000
 python -c "import sys; sys.path.append('f:/projects/gogo2'); from NN.realtime_main import main; main()" --mode train --model-type cnn --framework pytorch --epochs 1000 --symbol BTC/USDT --timeframes 1m 5m 1h 4h --epochs 10 --batch-size 32 --window-size 20 --output-size 3
+python -c "import sys; sys.path.append('f:/projects/gogo2'); from NN.realtime_main import main; main()" --mode train --model-type cnn --framework pytorch --epochs 10 --symbol BTC/USDT --timeframes 1s 1m 1h 1d --batch-size 32 --window-size 20 --output-size 3

 python NN/realtime-main.py --mode train --model-type cnn --framework pytorch --symbol BTC/USDT --timeframes 1m 5m 1h 4h --epochs 10 --batch-size 32 --window-size 20 --output-size 3