multi pair inputs, wip, working training??

2025-03-29 01:56:45 +02:00
parent b313f70cd7
commit 0b2000e3e7
9 changed files with 686 additions and 37 deletions
--- a/NN/utils/data_interface.py
+++ b/NN/utils/data_interface.py
@@ -17,16 +17,15 @@ logger = logging.getLogger(__name__)

 class DataInterface:
    """
-    Handles data collection, processing, and preparation for neural network models.
-    
-    This class is responsible for:
-    1. Fetching historical data
-    2. Preprocessing data for neural network input
-    3. Generating training datasets
-    4. Handling real-time data integration
+    Enhanced Data Interface supporting:
+    - Multiple trading pairs (up to 3)
+    - Multiple timeframes per pair (1s, 1m, 1h, 1d + custom)
+    - Technical indicators (up to 20)
+    - Cross-timeframe normalization
+    - Real-time tick streaming
    """
    
-    def __init__(self, symbol="BTC/USDT", timeframes=None, data_dir="NN/data"):
+    def __init__(self, symbol=None, timeframes=None, data_dir="NN/data"):
        """
        Initialize the data interface.
        
@@ -157,9 +156,9 @@ class DataInterface:
            else:
                cycle = np.sin(i / 24 * np.pi) * 0.01  # Daily cycle
            
-            # Calculate price change with random walk + cycles
-            price_change = price * (drift + volatility * np.random.randn() + cycle)
-            price += price_change
+            # Calculate price change with random walk + cycles (clamped to prevent overflow)
+            price_change = price * np.clip(drift + volatility * np.random.randn() + cycle, -0.1, 0.1)
+            price = np.clip(price + price_change, 1000, 100000)  # Keep price in reasonable range
            
            # Generate OHLC from the price
            open_price = price
@@ -171,8 +170,8 @@ class DataInterface:
            high_price = max(high_price, open_price, close_price)
            low_price = min(low_price, open_price, close_price)
            
-            # Generate volume (higher for larger price movements)
-            volume = abs(price_change) * (10000 + 5000 * np.random.rand())
+            # Generate volume (higher for larger price movements) with safe calculation
+            volume = 10000 + 5000 * np.random.rand() + abs(price_change)/price * 10000
            
            prices.append((open_price, high_price, low_price, close_price))
            volumes.append(volume)
@@ -217,19 +216,41 @@ class DataInterface:
            logger.error("No data available for feature creation")
            return None, None, None
        
-        # For simplicity, we'll use just one timeframe for now
-        # In a more complex implementation, we would merge multiple timeframes
-        primary_tf = timeframes[0]
-        if primary_tf not in dfs:
-            logger.error(f"Primary timeframe {primary_tf} not available")
+        # Create features for each timeframe
+        features = []
+        targets = []
+        timestamps = []
+        
+        for tf in timeframes:
+            if tf in dfs:
+                X, y, ts = self._create_features(dfs[tf], window_size)
+                features.append(X)
+                if len(targets) == 0:  # Only need targets from one timeframe
+                    targets = y
+                    timestamps = ts
+        
+        if not features:
            return None, None, None
+            
+        # Stack features from all timeframes along the time dimension
+        # Reshape each timeframe's features to [samples, window, 1, features]
+        reshaped_features = [f.reshape(f.shape[0], f.shape[1], 1, f.shape[2]) 
+                           for f in features]
+        # Concatenate along the channel dimension
+        X = np.concatenate(reshaped_features, axis=2)
+        # Reshape to [samples, window, features*timeframes]
+        X = X.reshape(X.shape[0], X.shape[1], -1)
        
-        df = dfs[primary_tf]
+        # Validate data
+        if np.any(np.isnan(X)) or np.any(np.isinf(X)):
+            logger.error("Generated features contain NaN or infinite values")
+            return None, None, None
+            
+        # Ensure all values are finite and normalized
+        X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
+        X = np.clip(X, -1e6, 1e6)  # Clip extreme values
        
-        # Create features
-        X, y, timestamps = self._create_features(df, window_size)
-        
-        return X, y, timestamps
+        return X, targets, timestamps
    
    def _create_features(self, df, window_size):
        """
@@ -248,9 +269,28 @@ class DataInterface:
        # Extract OHLCV columns
        ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
        
-        # Scale the data
-        scaler = MinMaxScaler()
-        ohlcv_scaled = scaler.fit_transform(ohlcv)
+        # Validate data before scaling
+        if np.any(np.isnan(ohlcv)) or np.any(np.isinf(ohlcv)):
+            logger.error("Input data contains NaN or infinite values")
+            return None, None, None
+            
+        # Handle potential constant columns (avoid division by zero in scaler)
+        ohlcv = np.nan_to_num(ohlcv, nan=0.0)
+        ranges = np.ptp(ohlcv, axis=0)
+        for i in range(len(ranges)):
+            if ranges[i] == 0:  # Constant column
+                ohlcv[:, i] = 1 if i == 3 else 0  # Set close to 1, others to 0
+        
+        # Scale the data with safety checks
+        try:
+            scaler = MinMaxScaler()
+            ohlcv_scaled = scaler.fit_transform(ohlcv)
+            if np.any(np.isnan(ohlcv_scaled)) or np.any(np.isinf(ohlcv_scaled)):
+                logger.error("Scaling produced invalid values")
+                return None, None, None
+        except Exception as e:
+            logger.error(f"Scaling failed: {str(e)}")
+            return None, None, None
        
        # Store the scaler for later use
        timeframe = next((tf for tf in self.timeframes if self.dataframes.get(tf) is not None and 
@@ -343,6 +383,11 @@ class DataInterface:
        logger.info(f"Dataset generated and saved: {dataset_name}")
        return dataset_info
    
+    def get_feature_count(self):
+        """Get the number of features per input sample"""
+        # OHLCV (5 features) per timeframe
+        return 5 * len(self.timeframes)
+
    def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
        """
        Prepare a single input sample from the most recent data for real-time inference.
@@ -387,4 +432,4 @@ class DataInterface:
        # Get timestamp of the most recent candle
        timestamp = df['timestamp'].iloc[-1]
        
-        return X, timestamp 
+        return X, timestamp
--- a/NN/utils/multi_data_interface.py
+++ b/NN/utils/multi_data_interface.py
@@ -0,0 +1,123 @@
+"""
+Enhanced Data Interface with additional NN trading parameters
+"""
+from typing import List, Optional, Tuple
+import numpy as np
+import pandas as pd
+from datetime import datetime
+from .data_interface import DataInterface
+
+class MultiDataInterface(DataInterface):
+    """
+    Enhanced data interface that supports window_size and output_size parameters
+    for neural network trading models.
+    """
+    
+    def __init__(self, symbol: str, 
+                timeframes: List[str], 
+                window_size: int = 20,
+                output_size: int = 3,
+                data_dir: str = "NN/data"):
+        """
+        Initialize with window_size and output_size for NN predictions.
+        """
+        super().__init__(symbol, timeframes, data_dir)
+        self.window_size = window_size
+        self.output_size = output_size
+        self.scalers = {}  # Store scalers for each timeframe
+        self.min_window_threshold = 100  # Minimum candles needed for training
+        
+    def get_feature_count(self) -> int:
+        """
+        Get number of features (OHLCV) for NN input.
+        """
+        return 5  # open, high, low, close, volume
+    
+    def prepare_training_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """Prepare training data with windowed sequences"""
+        # Get historical data for primary timeframe
+        primary_tf = self.timeframes[0]
+        df = self.get_historical_data(timeframe=primary_tf, 
+                                    n_candles=self.min_window_threshold + 1000)
+        
+        if df is None or len(df) < self.min_window_threshold:
+            raise ValueError(f"Insufficient data for training. Need at least {self.min_window_threshold} candles")
+        
+        # Prepare OHLCV sequences
+        ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
+        
+        # Create sequences and labels
+        X = []
+        y = []
+        
+        for i in range(len(ohlcv) - self.window_size - self.output_size):
+            # Input sequence
+            seq = ohlcv[i:i+self.window_size]
+            X.append(seq)
+            
+            # Output target (price movement direction)
+            close_prices = ohlcv[i+self.window_size:i+self.window_size+self.output_size, 3]  # Close prices
+            price_changes = np.diff(close_prices)
+            
+            if self.output_size == 1:
+                # Binary classification (up/down)
+                label = 1 if price_changes[0] > 0 else 0
+            elif self.output_size == 3:
+                # 3-class classification (buy/hold/sell)
+                if price_changes[0] > 0.002:  # Significant rise
+                    label = 0  # Buy
+                elif price_changes[0] < -0.002:  # Significant drop
+                    label = 2  # Sell
+                else:
+                    label = 1  # Hold
+            else:
+                raise ValueError(f"Unsupported output_size: {self.output_size}")
+            
+            y.append(label)
+        
+        # Convert to numpy arrays
+        X = np.array(X)
+        y = np.array(y)
+        
+        # Split into train/validation (80/20)
+        split_idx = int(0.8 * len(X))
+        X_train, y_train = X[:split_idx], y[:split_idx]
+        X_val, y_val = X[split_idx:], y[split_idx:]
+        
+        return X_train, y_train, X_val, y_val
+    
+    def prepare_prediction_data(self) -> np.ndarray:
+        """Prepare most recent window for predictions"""
+        primary_tf = self.timeframes[0]
+        df = self.get_historical_data(timeframe=primary_tf,
+                                    n_candles=self.window_size,
+                                    use_cache=False)
+        
+        if df is None or len(df) < self.window_size:
+            raise ValueError(f"Need at least {self.window_size} candles for prediction")
+        
+        ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values[-self.window_size:]
+        return np.array([ohlcv])  # Add batch dimension
+    
+    def process_predictions(self, predictions: np.ndarray):
+        """Convert prediction probabilities to trading signals"""
+        signals = []
+        for pred in predictions:
+            if self.output_size == 1:
+                signal = "BUY" if pred[0] > 0.5 else "SELL"
+                confidence = np.abs(pred[0] - 0.5) * 2  # Convert to 0-1 scale
+            elif self.output_size == 3:
+                action_idx = np.argmax(pred)
+                signal = ["BUY", "HOLD", "SELL"][action_idx]
+                confidence = pred[action_idx]
+            else:
+                signal = "HOLD"
+                confidence = 0.0
+            
+            signals.append({
+                'action': signal,
+                'confidence': confidence,
+                'timestamp': datetime.now().isoformat()
+            })
+        
+        return signals
--- a/NN/utils/realtime_analyzer.py
+++ b/NN/utils/realtime_analyzer.py
@@ -0,0 +1,182 @@
+"""
+Realtime Analyzer for Neural Network Trading System
+
+This module implements real-time analysis of market data using trained neural network models.
+"""
+
+import logging
+import time
+import numpy as np
+from threading import Thread
+from queue import Queue
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+class RealtimeAnalyzer:
+    """
+    Handles real-time analysis of market data using trained neural network models.
+    
+    Features:
+    - Connects to real-time data sources (websockets)
+    - Processes incoming data through the neural network
+    - Generates trading signals
+    - Manages risk and position sizing
+    - Logs all trading decisions
+    """
+    
+    def __init__(self, data_interface, model, symbol="BTC/USDT", timeframes=None):
+        """
+        Initialize the realtime analyzer.
+        
+        Args:
+            data_interface (DataInterface): Preconfigured data interface
+            model: Trained neural network model
+            symbol (str): Trading pair symbol
+            timeframes (list): List of timeframes to monitor
+        """
+        self.data_interface = data_interface
+        self.model = model
+        self.symbol = symbol
+        self.timeframes = timeframes or ['1h']
+        self.running = False
+        self.data_queue = Queue()
+        self.prediction_interval = 60  # Seconds between predictions
+        
+        logger.info(f"RealtimeAnalyzer initialized for {symbol}")
+
+    def start(self):
+        """Start the realtime analysis process."""
+        if self.running:
+            logger.warning("Realtime analyzer already running")
+            return
+            
+        self.running = True
+        
+        # Start data collection thread
+        self.data_thread = Thread(target=self._collect_data, daemon=True)
+        self.data_thread.start()
+        
+        # Start analysis thread
+        self.analysis_thread = Thread(target=self._analyze_data, daemon=True)
+        self.analysis_thread.start()
+        
+        logger.info("Realtime analysis started")
+
+    def stop(self):
+        """Stop the realtime analysis process."""
+        self.running = False
+        if hasattr(self, 'data_thread'):
+            self.data_thread.join(timeout=1)
+        if hasattr(self, 'analysis_thread'):
+            self.analysis_thread.join(timeout=1)
+        logger.info("Realtime analysis stopped")
+
+    def _collect_data(self):
+        """Thread function for collecting real-time data."""
+        logger.info("Starting data collection thread")
+        
+        # In a real implementation, this would connect to websockets/API
+        # For now, we'll simulate data collection from the data interface
+        while self.running:
+            try:
+                # Get latest data for each timeframe
+                for timeframe in self.timeframes:
+                    # Get recent data (simulating real-time updates)
+                    X, timestamp = self.data_interface.prepare_realtime_input(
+                        timeframe=timeframe,
+                        n_candles=30,
+                        window_size=self.data_interface.window_size
+                    )
+                    
+                    if X is not None:
+                        self.data_queue.put({
+                            'timeframe': timeframe,
+                            'data': X,
+                            'timestamp': timestamp
+                        })
+                
+                # Throttle data collection
+                time.sleep(1)
+                
+            except Exception as e:
+                logger.error(f"Error in data collection: {str(e)}")
+                time.sleep(5)  # Wait before retrying
+
+    def _analyze_data(self):
+        """Thread function for analyzing data and generating signals."""
+        logger.info("Starting analysis thread")
+        
+        last_prediction_time = 0
+        
+        while self.running:
+            try:
+                current_time = time.time()
+                
+                # Only make predictions at the specified interval
+                if current_time - last_prediction_time < self.prediction_interval:
+                    time.sleep(0.1)
+                    continue
+                
+                # Get latest data from queue
+                if not self.data_queue.empty():
+                    data_item = self.data_queue.get()
+                    
+                    # Make prediction
+                    prediction = self.model.predict(data_item['data'])
+                    
+                    # Process prediction
+                    self._process_prediction(
+                        prediction=prediction,
+                        timeframe=data_item['timeframe'],
+                        timestamp=data_item['timestamp']
+                    )
+                    
+                    last_prediction_time = current_time
+                
+                time.sleep(0.1)
+                
+            except Exception as e:
+                logger.error(f"Error in analysis: {str(e)}")
+                time.sleep(1)  # Wait before retrying
+
+    def _process_prediction(self, prediction, timeframe, timestamp):
+        """
+        Process model prediction and generate trading signals.
+        
+        Args:
+            prediction: Model prediction output
+            timeframe (str): Timeframe the prediction is for
+            timestamp: Timestamp of the prediction
+        """
+        # Convert prediction to trading signal
+        signal = self._prediction_to_signal(prediction)
+        
+        # Log the signal
+        logger.info(
+            f"Signal generated - Timeframe: {timeframe}, "
+            f"Timestamp: {timestamp}, "
+            f"Signal: {signal}"
+        )
+        
+        # In a real implementation, we would execute trades here
+        # For now, we'll just log the signals
+
+    def _prediction_to_signal(self, prediction):
+        """
+        Convert model prediction to trading signal.
+        
+        Args:
+            prediction: Model prediction output
+            
+        Returns:
+            str: Trading signal (BUY, SELL, HOLD)
+        """
+        # Simple threshold-based signal generation
+        if len(prediction.shape) == 1:
+            # Binary classification
+            return "BUY" if prediction[0] > 0.5 else "SELL"
+        else:
+            # Multi-class classification (3 outputs)
+            class_idx = np.argmax(prediction)
+            return ["SELL", "HOLD", "BUY"][class_idx]