added 5 min bom data to CNN. respecting port

2025-06-24 20:38:59 +03:00
parent 1d09b3778e
commit 6a4a73ff0b
3 changed files with 507 additions and 86 deletions
--- a/core/data_provider.py
+++ b/core/data_provider.py
@@ -142,6 +142,16 @@ class DataProvider:
            binance_symbol = symbol.replace('/', '').upper()
            self.tick_buffers[binance_symbol] = deque(maxlen=self.buffer_size)
        
+        # BOM (Book of Market) data caching - 1s resolution for last 5 minutes
+        self.bom_cache_duration = 300  # 5 minutes in seconds
+        self.bom_feature_count = 120   # Number of BOM features per timestamp
+        self.bom_data_cache: Dict[str, deque] = {}  # {symbol: deque of (timestamp, bom_features)}
+        
+        # Initialize BOM cache for each symbol
+        for symbol in self.symbols:
+            # Store 300 seconds worth of 1s BOM data
+            self.bom_data_cache[symbol] = deque(maxlen=self.bom_cache_duration)
+        
        # Initialize tick aggregator for raw tick processing
        binance_symbols = [symbol.replace('/', '').upper() for symbol in self.symbols]
        self.tick_aggregator = RealTimeTickAggregator(symbols=binance_symbols)
@@ -2083,4 +2093,363 @@ class DataProvider:
            'distribution_stats': self.distribution_stats.copy(),
            'buffer_sizes': {symbol: len(buffer) for symbol, buffer in self.tick_buffers.items()},
            'tick_aggregator': aggregator_stats
-        } 
+        } 
+    
+    def update_bom_cache(self, symbol: str, bom_features: List[float], cob_integration=None):
+        """
+        Update BOM cache with latest features for a symbol
+        
+        Args:
+            symbol: Trading symbol (e.g., 'ETH/USDT')
+            bom_features: List of BOM features (should be 120 features)
+            cob_integration: Optional COB integration instance for real BOM data
+        """
+        try:
+            current_time = datetime.now()
+            
+            # Ensure we have exactly 120 features
+            if len(bom_features) != self.bom_feature_count:
+                if len(bom_features) > self.bom_feature_count:
+                    bom_features = bom_features[:self.bom_feature_count]
+                else:
+                    bom_features.extend([0.0] * (self.bom_feature_count - len(bom_features)))
+            
+            # Convert to numpy array for efficient storage
+            bom_array = np.array(bom_features, dtype=np.float32)
+            
+            # Add timestamp and features to cache
+            with self.data_lock:
+                self.bom_data_cache[symbol].append((current_time, bom_array))
+            
+            logger.debug(f"Updated BOM cache for {symbol}: {len(self.bom_data_cache[symbol])} timestamps cached")
+            
+        except Exception as e:
+            logger.error(f"Error updating BOM cache for {symbol}: {e}")
+    
+    def get_bom_matrix_for_cnn(self, symbol: str, sequence_length: int = 50) -> Optional[np.ndarray]:
+        """
+        Get BOM matrix for CNN input from cached 1s data
+        
+        Args:
+            symbol: Trading symbol (e.g., 'ETH/USDT')
+            sequence_length: Required sequence length (default 50)
+            
+        Returns:
+            np.ndarray: BOM matrix of shape (sequence_length, 120) or None if insufficient data
+        """
+        try:
+            with self.data_lock:
+                if symbol not in self.bom_data_cache or len(self.bom_data_cache[symbol]) == 0:
+                    logger.warning(f"No BOM data cached for {symbol}")
+                    return None
+                
+                # Get recent data
+                cached_data = list(self.bom_data_cache[symbol])
+                
+                if len(cached_data) < sequence_length:
+                    logger.warning(f"Insufficient BOM data for {symbol}: {len(cached_data)} < {sequence_length}")
+                    # Pad with zeros if we don't have enough data
+                    bom_matrix = np.zeros((sequence_length, self.bom_feature_count), dtype=np.float32)
+                    
+                    # Fill available data at the end
+                    for i, (timestamp, features) in enumerate(cached_data):
+                        if i < sequence_length:
+                            bom_matrix[sequence_length - len(cached_data) + i] = features
+                    
+                    return bom_matrix
+                
+                # Take the most recent sequence_length samples
+                recent_data = cached_data[-sequence_length:]
+                
+                # Create matrix
+                bom_matrix = np.zeros((sequence_length, self.bom_feature_count), dtype=np.float32)
+                for i, (timestamp, features) in enumerate(recent_data):
+                    bom_matrix[i] = features
+                
+                logger.debug(f"Retrieved BOM matrix for {symbol}: shape={bom_matrix.shape}")
+                return bom_matrix
+                
+        except Exception as e:
+            logger.error(f"Error getting BOM matrix for {symbol}: {e}")
+            return None
+    
+    def generate_synthetic_bom_features(self, symbol: str) -> List[float]:
+        """
+        Generate synthetic BOM features when real COB data is not available
+        
+        This creates realistic-looking order book features based on current market data
+        """
+        try:
+            features = []
+            
+            # Get current price for context
+            current_price = self.get_current_price(symbol)
+            if current_price is None:
+                current_price = 3000.0  # Fallback price
+            
+            # === 1. CONSOLIDATED ORDER BOOK DATA (40 features) ===
+            # Top 10 bid levels (price offset + volume)
+            for i in range(10):
+                price_offset = -0.001 * (i + 1) * (1 + np.random.normal(0, 0.1))  # Negative for bids
+                volume_normalized = np.random.exponential(0.5) * (1.0 - i * 0.1)  # Decreasing with depth
+                features.extend([price_offset, volume_normalized])
+            
+            # Top 10 ask levels (price offset + volume)
+            for i in range(10):
+                price_offset = 0.001 * (i + 1) * (1 + np.random.normal(0, 0.1))   # Positive for asks
+                volume_normalized = np.random.exponential(0.5) * (1.0 - i * 0.1)  # Decreasing with depth
+                features.extend([price_offset, volume_normalized])
+            
+            # === 2. VOLUME PROFILE FEATURES (30 features) ===
+            # Top 10 volume levels (buy%, sell%, total volume)
+            for i in range(10):
+                buy_percent = 0.3 + np.random.normal(0, 0.2)  # Around 30-70% buy
+                buy_percent = max(0.0, min(1.0, buy_percent))
+                sell_percent = 1.0 - buy_percent
+                total_volume = np.random.exponential(1.0) * (1.0 - i * 0.05)
+                features.extend([buy_percent, sell_percent, total_volume])
+            
+            # === 3. ORDER FLOW INTENSITY (25 features) ===
+            # Aggressive order flow
+            features.extend([
+                0.5 + np.random.normal(0, 0.1),  # Aggressive buy ratio
+                0.5 + np.random.normal(0, 0.1),  # Aggressive sell ratio
+                0.4 + np.random.normal(0, 0.1),  # Buy volume ratio
+                0.4 + np.random.normal(0, 0.1),  # Sell volume ratio
+                np.random.exponential(100),      # Avg aggressive buy size
+                np.random.exponential(100),      # Avg aggressive sell size
+            ])
+            
+            # Block trade detection
+            features.extend([
+                0.1 + np.random.exponential(0.05),  # Large trade ratio
+                0.2 + np.random.exponential(0.1),   # Large trade volume ratio
+                np.random.exponential(1000),        # Avg large trade size
+            ])
+            
+            # Flow velocity metrics
+            features.extend([
+                1.0 + np.random.normal(0, 0.2),     # Avg time delta
+                0.1 + np.random.exponential(0.05),  # Time velocity variance
+                0.5 + np.random.normal(0, 0.1),     # Trade clustering
+            ])
+            
+            # Institutional activity indicators
+            features.extend([
+                0.05 + np.random.exponential(0.02), # Iceberg detection
+                0.3 + np.random.normal(0, 0.1),     # Hidden order ratio
+                0.2 + np.random.normal(0, 0.05),    # Smart money flow
+                0.1 + np.random.exponential(0.03),  # Algorithmic activity
+            ])
+            
+            # Market maker behavior
+            features.extend([
+                0.6 + np.random.normal(0, 0.1),     # MM provision ratio
+                0.4 + np.random.normal(0, 0.1),     # MM take ratio
+                0.02 + np.random.normal(0, 0.005),  # Spread tightening
+                1.0 + np.random.normal(0, 0.2),     # Quote update frequency
+                0.8 + np.random.normal(0, 0.1),     # Quote stability
+            ])
+            
+            # === 4. MARKET MICROSTRUCTURE SIGNALS (25 features) ===
+            # Order book pressure
+            features.extend([
+                0.5 + np.random.normal(0, 0.1),     # Bid pressure
+                0.5 + np.random.normal(0, 0.1),     # Ask pressure
+                0.0 + np.random.normal(0, 0.05),    # Pressure imbalance
+                1.0 + np.random.normal(0, 0.2),     # Pressure intensity
+                0.5 + np.random.normal(0, 0.1),     # Depth stability
+            ])
+            
+            # Price level concentration
+            features.extend([
+                0.3 + np.random.normal(0, 0.1),     # Bid concentration
+                0.3 + np.random.normal(0, 0.1),     # Ask concentration
+                0.8 + np.random.normal(0, 0.1),     # Top level dominance
+                0.2 + np.random.normal(0, 0.05),    # Fragmentation index
+                0.6 + np.random.normal(0, 0.1),     # Liquidity clustering
+            ])
+            
+            # Temporal dynamics
+            features.extend([
+                0.1 + np.random.normal(0, 0.02),    # Volatility factor
+                1.0 + np.random.normal(0, 0.1),     # Momentum factor
+                0.0 + np.random.normal(0, 0.05),    # Mean reversion
+                0.5 + np.random.normal(0, 0.1),     # Trend alignment
+                0.8 + np.random.normal(0, 0.1),     # Pattern consistency
+            ])
+            
+            # Exchange-specific patterns
+            features.extend([
+                0.4 + np.random.normal(0, 0.1),     # Cross-exchange correlation
+                0.3 + np.random.normal(0, 0.1),     # Exchange arbitrage
+                0.2 + np.random.normal(0, 0.05),    # Latency patterns
+                0.8 + np.random.normal(0, 0.1),     # Sync quality
+                0.6 + np.random.normal(0, 0.1),     # Data freshness
+            ])
+            
+            # Ensure exactly 120 features
+            if len(features) > 120:
+                features = features[:120]
+            elif len(features) < 120:
+                features.extend([0.0] * (120 - len(features)))
+            
+            # Clamp all values to reasonable ranges
+            features = [max(-5.0, min(5.0, f)) for f in features]
+            
+            return features
+            
+        except Exception as e:
+            logger.error(f"Error generating synthetic BOM features for {symbol}: {e}")
+            return [0.0] * 120
+    
+    def start_bom_cache_updates(self, cob_integration=None):
+        """
+        Start background updates of BOM cache every second
+        
+        Args:
+            cob_integration: Optional COB integration instance for real data
+        """
+        try:
+            def update_loop():
+                while self.is_streaming:
+                    try:
+                        for symbol in self.symbols:
+                            if cob_integration:
+                                # Try to get real BOM features from COB integration
+                                try:
+                                    bom_features = self._extract_real_bom_features(symbol, cob_integration)
+                                    if bom_features:
+                                        self.update_bom_cache(symbol, bom_features, cob_integration)
+                                    else:
+                                        # Fallback to synthetic
+                                        synthetic_features = self.generate_synthetic_bom_features(symbol)
+                                        self.update_bom_cache(symbol, synthetic_features)
+                                except Exception as e:
+                                    logger.warning(f"Error getting real BOM features for {symbol}: {e}")
+                                    synthetic_features = self.generate_synthetic_bom_features(symbol)
+                                    self.update_bom_cache(symbol, synthetic_features)
+                            else:
+                                # Generate synthetic BOM features
+                                synthetic_features = self.generate_synthetic_bom_features(symbol)
+                                self.update_bom_cache(symbol, synthetic_features)
+                        
+                        time.sleep(1.0)  # Update every second
+                        
+                    except Exception as e:
+                        logger.error(f"Error in BOM cache update loop: {e}")
+                        time.sleep(5.0)  # Wait longer on error
+            
+            # Start background thread
+            bom_thread = Thread(target=update_loop, daemon=True)
+            bom_thread.start()
+            
+            logger.info("Started BOM cache updates (1s resolution)")
+            
+        except Exception as e:
+            logger.error(f"Error starting BOM cache updates: {e}")
+    
+    def _extract_real_bom_features(self, symbol: str, cob_integration) -> Optional[List[float]]:
+        """Extract real BOM features from COB integration"""
+        try:
+            features = []
+            
+            # Get consolidated order book
+            if hasattr(cob_integration, 'get_consolidated_orderbook'):
+                cob_snapshot = cob_integration.get_consolidated_orderbook(symbol)
+                if cob_snapshot:
+                    # Extract order book features (40 features)
+                    features.extend(self._extract_orderbook_features(cob_snapshot))
+                else:
+                    features.extend([0.0] * 40)
+            else:
+                features.extend([0.0] * 40)
+            
+            # Get volume profile features (30 features)
+            if hasattr(cob_integration, 'get_session_volume_profile'):
+                volume_profile = cob_integration.get_session_volume_profile(symbol)
+                if volume_profile:
+                    features.extend(self._extract_volume_profile_features(volume_profile))
+                else:
+                    features.extend([0.0] * 30)
+            else:
+                features.extend([0.0] * 30)
+            
+            # Add flow and microstructure features (50 features)
+            features.extend(self._extract_flow_microstructure_features(symbol, cob_integration))
+            
+            # Ensure exactly 120 features
+            if len(features) > 120:
+                features = features[:120]
+            elif len(features) < 120:
+                features.extend([0.0] * (120 - len(features)))
+            
+            return features
+            
+        except Exception as e:
+            logger.warning(f"Error extracting real BOM features for {symbol}: {e}")
+            return None
+    
+    def _extract_orderbook_features(self, cob_snapshot) -> List[float]:
+        """Extract order book features from COB snapshot"""
+        features = []
+        
+        try:
+            # Top 10 bid levels
+            for i in range(10):
+                if i < len(cob_snapshot.consolidated_bids):
+                    level = cob_snapshot.consolidated_bids[i]
+                    price_offset = (level.price - cob_snapshot.volume_weighted_mid) / cob_snapshot.volume_weighted_mid
+                    volume_normalized = level.total_volume_usd / 1000000
+                    features.extend([price_offset, volume_normalized])
+                else:
+                    features.extend([0.0, 0.0])
+            
+            # Top 10 ask levels
+            for i in range(10):
+                if i < len(cob_snapshot.consolidated_asks):
+                    level = cob_snapshot.consolidated_asks[i]
+                    price_offset = (level.price - cob_snapshot.volume_weighted_mid) / cob_snapshot.volume_weighted_mid
+                    volume_normalized = level.total_volume_usd / 1000000
+                    features.extend([price_offset, volume_normalized])
+                else:
+                    features.extend([0.0, 0.0])
+                    
+        except Exception as e:
+            logger.warning(f"Error extracting order book features: {e}")
+            features = [0.0] * 40
+        
+        return features[:40]
+    
+    def _extract_volume_profile_features(self, volume_profile) -> List[float]:
+        """Extract volume profile features"""
+        features = []
+        
+        try:
+            if 'data' in volume_profile:
+                svp_data = volume_profile['data']
+                top_levels = sorted(svp_data, key=lambda x: x.get('total_volume', 0), reverse=True)[:10]
+                
+                for level in top_levels:
+                    buy_percent = level.get('buy_percent', 50.0) / 100.0
+                    sell_percent = level.get('sell_percent', 50.0) / 100.0
+                    total_volume = level.get('total_volume', 0.0) / 1000000
+                    features.extend([buy_percent, sell_percent, total_volume])
+                
+                # Pad to 30 features
+                while len(features) < 30:
+                    features.extend([0.5, 0.5, 0.0])
+                    
+        except Exception as e:
+            logger.warning(f"Error extracting volume profile features: {e}")
+            features = [0.0] * 30
+        
+        return features[:30]
+    
+    def _extract_flow_microstructure_features(self, symbol: str, cob_integration) -> List[float]:
+        """Extract flow and microstructure features"""
+        try:
+            # For now, return synthetic features since full implementation would be complex
+            return self.generate_synthetic_bom_features(symbol)[70:]  # Last 50 features
+        except:
+            return [0.0] * 50