working with errors

2025-07-20 01:52:36 +03:00
parent 92919cb1ef
commit 469269e809
7 changed files with 1237 additions and 149 deletions
--- a/core/data_provider.py
+++ b/core/data_provider.py
@@ -196,11 +196,39 @@ class DataProvider:
        # Load existing pivot bounds from cache
        self._load_all_pivot_bounds()
        
+        # Centralized data collection for models and dashboard
+        self.cob_data_cache: Dict[str, deque] = {}  # COB data for models
+        self.training_data_cache: Dict[str, deque] = {}  # Training data for models
+        self.model_data_subscribers: Dict[str, List[Callable]] = {}  # Model-specific data callbacks
+        
+        # Callbacks for data distribution
+        self.cob_data_callbacks: List[Callable] = []  # COB data callbacks
+        self.training_data_callbacks: List[Callable] = []  # Training data callbacks
+        self.model_prediction_callbacks: List[Callable] = []  # Model prediction callbacks
+        
+        # Initialize data caches
+        for symbol in self.symbols:
+            binance_symbol = symbol.replace('/', '').upper()
+            self.cob_data_cache[binance_symbol] = deque(maxlen=300)  # 5 minutes of COB data
+            self.training_data_cache[binance_symbol] = deque(maxlen=1000)  # Training data buffer
+        
+        # Data collection threads
+        self.data_collection_active = False
+        
+        # COB data collection
+        self.cob_collection_active = False
+        self.cob_collection_thread = None
+        
+        # Training data collection
+        self.training_data_collection_active = False
+        self.training_data_thread = None
+        
        logger.info(f"DataProvider initialized for symbols: {self.symbols}")
        logger.info(f"Timeframes: {self.timeframes}")
        logger.info("Centralized data distribution enabled")
        logger.info("Pivot-based normalization system enabled")
        logger.info("Williams Market Structure integration enabled")
+        logger.info("COB and training data collection enabled")
        
        # Rate limiting
        self.last_request_time = {}
@@ -2559,4 +2587,591 @@ class DataProvider:
                if attempt < self.max_retries - 1:
                    time.sleep(5 * (attempt + 1))
                
-        return None
+        return None  
+  # ===== CENTRALIZED DATA COLLECTION METHODS =====
+    
+    def start_centralized_data_collection(self):
+        """Start all centralized data collection processes"""
+        logger.info("Starting centralized data collection for all models and dashboard")
+        
+        # Start COB data collection
+        self.start_cob_data_collection()
+        
+        # Start training data collection
+        self.start_training_data_collection()
+        
+        logger.info("All centralized data collection processes started")
+    
+    def stop_centralized_data_collection(self):
+        """Stop all centralized data collection processes"""
+        logger.info("Stopping centralized data collection")
+        
+        # Stop COB collection
+        self.cob_collection_active = False
+        if self.cob_collection_thread and self.cob_collection_thread.is_alive():
+            self.cob_collection_thread.join(timeout=5)
+        
+        # Stop training data collection
+        self.training_data_collection_active = False
+        if self.training_data_thread and self.training_data_thread.is_alive():
+            self.training_data_thread.join(timeout=5)
+        
+        logger.info("Centralized data collection stopped")
+    
+    def start_cob_data_collection(self):
+        """Start COB (Consolidated Order Book) data collection"""
+        if self.cob_collection_active:
+            logger.warning("COB data collection already active")
+            return
+        
+        self.cob_collection_active = True
+        self.cob_collection_thread = Thread(target=self._cob_collection_worker, daemon=True)
+        self.cob_collection_thread.start()
+        logger.info("COB data collection started")
+    
+    def _cob_collection_worker(self):
+        """Worker thread for COB data collection"""
+        import requests
+        import time
+        import threading
+        
+        logger.info("COB data collection worker started")
+        
+        # Use separate threads for each symbol to achieve higher update frequency
+        def collect_symbol_data(symbol):
+            while self.cob_collection_active:
+                try:
+                    self._collect_cob_data_for_symbol(symbol)
+                    # Sleep for a very short time to achieve ~120 updates/sec across all symbols
+                    # With 2 symbols, each can update at ~60/sec
+                    time.sleep(0.016)  # ~60 updates per second per symbol
+                except Exception as e:
+                    logger.error(f"Error collecting COB data for {symbol}: {e}")
+                    time.sleep(1)  # Short recovery time
+        
+        # Start a thread for each symbol
+        threads = []
+        for symbol in self.symbols:
+            thread = threading.Thread(target=collect_symbol_data, args=(symbol,), daemon=True)
+            thread.start()
+            threads.append(thread)
+            
+        # Keep the main thread alive
+        while self.cob_collection_active:
+            time.sleep(1)
+            
+        # Join threads when collection is stopped
+        for thread in threads:
+            thread.join(timeout=1)
+    
+    def _collect_cob_data_for_symbol(self, symbol: str):
+        """Collect COB data for a specific symbol using Binance REST API"""
+        try:
+            import requests
+            
+            # Convert symbol format
+            binance_symbol = symbol.replace('/', '').upper()
+            
+            # Get order book data
+            url = f"https://api.binance.com/api/v3/depth"
+            params = {
+                'symbol': binance_symbol,
+                'limit': 100  # Get top 100 levels
+            }
+            
+            response = requests.get(url, params=params, timeout=5)
+            if response.status_code == 200:
+                order_book = response.json()
+                
+                # Process and cache the data
+                cob_snapshot = self._process_order_book_data(symbol, order_book)
+                
+                # Store in cache (ensure cache exists)
+                if binance_symbol not in self.cob_data_cache:
+                    self.cob_data_cache[binance_symbol] = deque(maxlen=300)
+                
+                self.cob_data_cache[binance_symbol].append(cob_snapshot)
+                
+                # Distribute to COB data subscribers
+                self._distribute_cob_data(symbol, cob_snapshot)
+                
+            else:
+                logger.debug(f"Failed to fetch COB data for {symbol}: {response.status_code}")
+                
+        except Exception as e:
+            logger.debug(f"Error collecting COB data for {symbol}: {e}")
+    
+    def _process_order_book_data(self, symbol: str, order_book: dict) -> dict:
+        """Process raw order book data into structured COB snapshot with multi-timeframe imbalance metrics"""
+        try:
+            bids = [[float(price), float(qty)] for price, qty in order_book.get('bids', [])]
+            asks = [[float(price), float(qty)] for price, qty in order_book.get('asks', [])]
+            
+            # Calculate statistics
+            total_bid_volume = sum(qty for _, qty in bids)
+            total_ask_volume = sum(qty for _, qty in asks)
+            
+            best_bid = bids[0][0] if bids else 0
+            best_ask = asks[0][0] if asks else 0
+            mid_price = (best_bid + best_ask) / 2 if best_bid and best_ask else 0
+            spread = best_ask - best_bid if best_bid and best_ask else 0
+            spread_bps = (spread / mid_price * 10000) if mid_price > 0 else 0
+            
+            # Calculate current imbalance
+            imbalance = (total_bid_volume - total_ask_volume) / (total_bid_volume + total_ask_volume) if (total_bid_volume + total_ask_volume) > 0 else 0
+            
+            # Calculate multi-timeframe imbalances
+            binance_symbol = symbol.replace('/', '').upper()
+            
+            # Initialize imbalance metrics
+            imbalance_1s = imbalance  # Current imbalance is 1s
+            imbalance_5s = imbalance  # Default to current if not enough history
+            imbalance_15s = imbalance
+            imbalance_60s = imbalance
+            
+            # Calculate historical imbalances if we have enough data
+            if binance_symbol in self.cob_data_cache:
+                cache = self.cob_data_cache[binance_symbol]
+                now = datetime.now()
+                
+                # Get snapshots for different timeframes
+                snapshots_5s = [s for s in cache if (now - s['timestamp']).total_seconds() <= 5]
+                snapshots_15s = [s for s in cache if (now - s['timestamp']).total_seconds() <= 15]
+                snapshots_60s = [s for s in cache if (now - s['timestamp']).total_seconds() <= 60]
+                
+                # Calculate imbalances for each timeframe
+                if snapshots_5s:
+                    bid_vol_5s = sum(s['stats']['bid_liquidity'] for s in snapshots_5s)
+                    ask_vol_5s = sum(s['stats']['ask_liquidity'] for s in snapshots_5s)
+                    total_vol_5s = bid_vol_5s + ask_vol_5s
+                    imbalance_5s = (bid_vol_5s - ask_vol_5s) / total_vol_5s if total_vol_5s > 0 else 0
+                
+                if snapshots_15s:
+                    bid_vol_15s = sum(s['stats']['bid_liquidity'] for s in snapshots_15s)
+                    ask_vol_15s = sum(s['stats']['ask_liquidity'] for s in snapshots_15s)
+                    total_vol_15s = bid_vol_15s + ask_vol_15s
+                    imbalance_15s = (bid_vol_15s - ask_vol_15s) / total_vol_15s if total_vol_15s > 0 else 0
+                
+                if snapshots_60s:
+                    bid_vol_60s = sum(s['stats']['bid_liquidity'] for s in snapshots_60s)
+                    ask_vol_60s = sum(s['stats']['ask_liquidity'] for s in snapshots_60s)
+                    total_vol_60s = bid_vol_60s + ask_vol_60s
+                    imbalance_60s = (bid_vol_60s - ask_vol_60s) / total_vol_60s if total_vol_60s > 0 else 0
+            
+            cob_snapshot = {
+                'symbol': symbol,
+                'timestamp': datetime.now(),
+                'bids': bids[:20],  # Top 20 levels
+                'asks': asks[:20],  # Top 20 levels
+                'stats': {
+                    'best_bid': best_bid,
+                    'best_ask': best_ask,
+                    'mid_price': mid_price,
+                    'spread': spread,
+                    'spread_bps': spread_bps,
+                    'bid_liquidity': total_bid_volume,
+                    'ask_liquidity': total_ask_volume,
+                    'total_liquidity': total_bid_volume + total_ask_volume,
+                    'imbalance': imbalance,
+                    'imbalance_1s': imbalance_1s,
+                    'imbalance_5s': imbalance_5s,
+                    'imbalance_15s': imbalance_15s,
+                    'imbalance_60s': imbalance_60s,
+                    'levels': len(bids) + len(asks)
+                }
+            }
+            
+            return cob_snapshot
+            
+        except Exception as e:
+            logger.error(f"Error processing order book data for {symbol}: {e}")
+            return {}
+    
+    def start_training_data_collection(self):
+        """Start training data collection for models"""
+        if self.training_data_collection_active:
+            logger.warning("Training data collection already active")
+            return
+        
+        self.training_data_collection_active = True
+        self.training_data_thread = Thread(target=self._training_data_collection_worker, daemon=True)
+        self.training_data_thread.start()
+        logger.info("Training data collection started")
+    
+    def _training_data_collection_worker(self):
+        """Worker thread for training data collection"""
+        import time
+        
+        logger.info("Training data collection worker started")
+        
+        while self.training_data_collection_active:
+            try:
+                # Collect training data for all symbols
+                for symbol in self.symbols:
+                    training_sample = self._collect_training_sample(symbol)
+                    if training_sample:
+                        binance_symbol = symbol.replace('/', '').upper()
+                        self.training_data_cache[binance_symbol].append(training_sample)
+                        
+                        # Distribute to training data subscribers
+                        self._distribute_training_data(symbol, training_sample)
+                
+                # Sleep for 5 seconds between collections
+                time.sleep(5)
+                
+            except Exception as e:
+                logger.error(f"Error in training data collection worker: {e}")
+                time.sleep(10)  # Wait longer on error
+    
+    def _collect_training_sample(self, symbol: str) -> Optional[dict]:
+        """Collect a training sample for a specific symbol"""
+        try:
+            # Get recent market data
+            recent_data = self.get_historical_data(symbol, '1m', limit=100)
+            if recent_data is None or len(recent_data) < 50:
+                return None
+            
+            # Get recent ticks
+            recent_ticks = self.get_recent_ticks(symbol, count=100)
+            if len(recent_ticks) < 10:
+                return None
+            
+            # Get COB data
+            binance_symbol = symbol.replace('/', '').upper()
+            recent_cob = list(self.cob_data_cache.get(binance_symbol, []))[-10:] if binance_symbol in self.cob_data_cache else []
+            
+            # Create training sample
+            training_sample = {
+                'symbol': symbol,
+                'timestamp': datetime.now(),
+                'ohlcv_data': recent_data.tail(50).to_dict('records'),
+                'tick_data': [
+                    {
+                        'price': tick.price,
+                        'volume': tick.volume,
+                        'timestamp': tick.timestamp
+                    } for tick in recent_ticks[-50:]
+                ],
+                'cob_data': recent_cob,
+                'features': self._extract_training_features(symbol, recent_data, recent_ticks, recent_cob)
+            }
+            
+            return training_sample
+            
+        except Exception as e:
+            logger.error(f"Error collecting training sample for {symbol}: {e}")
+            return None
+    
+    def _extract_training_features(self, symbol: str, ohlcv_data: pd.DataFrame, 
+                                 recent_ticks: List[MarketTick], recent_cob: List[dict]) -> dict:
+        """Extract features for training from various data sources"""
+        try:
+            features = {}
+            
+            # OHLCV features
+            if len(ohlcv_data) > 0:
+                latest = ohlcv_data.iloc[-1]
+                features.update({
+                    'price': latest['close'],
+                    'volume': latest['volume'],
+                    'price_change_1m': (latest['close'] - ohlcv_data.iloc[-2]['close']) / ohlcv_data.iloc[-2]['close'] if len(ohlcv_data) > 1 else 0,
+                    'volume_ratio': latest['volume'] / ohlcv_data['volume'].mean() if len(ohlcv_data) > 1 else 1,
+                    'volatility': ohlcv_data['close'].pct_change().std() if len(ohlcv_data) > 1 else 0
+                })
+            
+            # Tick features
+            if recent_ticks:
+                tick_prices = [tick.price for tick in recent_ticks]
+                tick_volumes = [tick.volume for tick in recent_ticks]
+                features.update({
+                    'tick_price_std': np.std(tick_prices) if len(tick_prices) > 1 else 0,
+                    'tick_volume_mean': np.mean(tick_volumes),
+                    'tick_count': len(recent_ticks)
+                })
+            
+            # COB features
+            if recent_cob:
+                latest_cob = recent_cob[-1]
+                if 'stats' in latest_cob:
+                    stats = latest_cob['stats']
+                    features.update({
+                        'spread_bps': stats.get('spread_bps', 0),
+                        'imbalance': stats.get('imbalance', 0),
+                        'liquidity': stats.get('total_liquidity', 0),
+                        'cob_levels': stats.get('levels', 0)
+                    })
+            
+            return features
+            
+        except Exception as e:
+            logger.error(f"Error extracting training features for {symbol}: {e}")
+            return {}
+    
+    # ===== SUBSCRIPTION METHODS FOR MODELS AND DASHBOARD =====
+    
+    def subscribe_to_cob_data(self, callback: Callable[[str, dict], None]) -> str:
+        """Subscribe to COB data updates"""
+        subscriber_id = str(uuid.uuid4())
+        self.cob_data_callbacks.append(callback)
+        logger.info(f"COB data subscriber added: {subscriber_id}")
+        return subscriber_id
+    
+    def subscribe_to_training_data(self, callback: Callable[[str, dict], None]) -> str:
+        """Subscribe to training data updates"""
+        subscriber_id = str(uuid.uuid4())
+        self.training_data_callbacks.append(callback)
+        logger.info(f"Training data subscriber added: {subscriber_id}")
+        return subscriber_id
+    
+    def subscribe_to_model_predictions(self, callback: Callable[[str, dict], None]) -> str:
+        """Subscribe to model prediction updates"""
+        subscriber_id = str(uuid.uuid4())
+        self.model_prediction_callbacks.append(callback)
+        logger.info(f"Model prediction subscriber added: {subscriber_id}")
+        return subscriber_id
+    
+    def _distribute_cob_data(self, symbol: str, cob_snapshot: dict):
+        """Distribute COB data to all subscribers"""
+        for callback in self.cob_data_callbacks:
+            try:
+                Thread(target=lambda: callback(symbol, cob_snapshot), daemon=True).start()
+            except Exception as e:
+                logger.error(f"Error distributing COB data: {e}")
+    
+    def _distribute_training_data(self, symbol: str, training_sample: dict):
+        """Distribute training data to all subscribers"""
+        for callback in self.training_data_callbacks:
+            try:
+                Thread(target=lambda: callback(symbol, training_sample), daemon=True).start()
+            except Exception as e:
+                logger.error(f"Error distributing training data: {e}")
+    
+    def _distribute_model_predictions(self, symbol: str, prediction: dict):
+        """Distribute model predictions to all subscribers"""
+        for callback in self.model_prediction_callbacks:
+            try:
+                Thread(target=lambda: callback(symbol, prediction), daemon=True).start()
+            except Exception as e:
+                logger.error(f"Error distributing model prediction: {e}")
+    
+    # ===== DATA ACCESS METHODS FOR MODELS AND DASHBOARD =====
+    
+    def get_cob_data(self, symbol: str, count: int = 50) -> List[dict]:
+        """Get recent COB data for a symbol"""
+        binance_symbol = symbol.replace('/', '').upper()
+        if binance_symbol in self.cob_data_cache:
+            return list(self.cob_data_cache[binance_symbol])[-count:]
+        return []
+    
+    def get_training_data(self, symbol: str, count: int = 100) -> List[dict]:
+        """Get recent training data for a symbol"""
+        binance_symbol = symbol.replace('/', '').upper()
+        if binance_symbol in self.training_data_cache:
+            return list(self.training_data_cache[binance_symbol])[-count:]
+        return []
+    
+    def collect_cob_data(self, symbol: str) -> dict:
+        """
+        Collect Consolidated Order Book (COB) data for a symbol using REST API
+        
+        This centralized method collects COB data for all consumers (models, dashboard, etc.)
+        """
+        try:
+            import requests
+            import time
+            
+            # Use Binance REST API for order book data
+            binance_symbol = symbol.replace('/', '')
+            url = f"https://api.binance.com/api/v3/depth?symbol={binance_symbol}&limit=500"
+            
+            response = requests.get(url, timeout=5)
+            if response.status_code == 200:
+                data = response.json()
+                
+                # Process order book data
+                bids = [[float(price), float(qty)] for price, qty in data.get('bids', [])]
+                asks = [[float(price), float(qty)] for price, qty in data.get('asks', [])]
+                
+                # Calculate mid price
+                best_bid = bids[0][0] if bids else 0
+                best_ask = asks[0][0] if asks else 0
+                mid_price = (best_bid + best_ask) / 2 if best_bid and best_ask else 0
+                
+                # Calculate order book stats
+                bid_liquidity = sum(qty for _, qty in bids[:20])
+                ask_liquidity = sum(qty for _, qty in asks[:20])
+                total_liquidity = bid_liquidity + ask_liquidity
+                
+                # Calculate imbalance
+                imbalance = (bid_liquidity - ask_liquidity) / total_liquidity if total_liquidity > 0 else 0
+                
+                # Calculate spread in basis points
+                spread = (best_ask - best_bid) / mid_price * 10000 if mid_price > 0 else 0
+                
+                # Create COB snapshot
+                cob_snapshot = {
+                    'symbol': symbol,
+                    'timestamp': int(time.time() * 1000),
+                    'bids': bids[:50],  # Limit to top 50 levels
+                    'asks': asks[:50],  # Limit to top 50 levels
+                    'stats': {
+                        'mid_price': mid_price,
+                        'best_bid': best_bid,
+                        'best_ask': best_ask,
+                        'bid_liquidity': bid_liquidity,
+                        'ask_liquidity': ask_liquidity,
+                        'total_liquidity': total_liquidity,
+                        'imbalance': imbalance,
+                        'spread_bps': spread
+                    }
+                }
+                
+                # Store in cache
+                with self.subscriber_lock:
+                    if not hasattr(self, 'cob_data_cache'):
+                        self.cob_data_cache = {}
+                    
+                    if symbol not in self.cob_data_cache:
+                        self.cob_data_cache[symbol] = []
+                    
+                    # Add to cache with max size limit
+                    self.cob_data_cache[symbol].append(cob_snapshot)
+                    if len(self.cob_data_cache[symbol]) > 300:  # Keep 5 minutes of 1s data
+                        self.cob_data_cache[symbol].pop(0)
+                
+                # Notify subscribers
+                self._notify_cob_subscribers(symbol, cob_snapshot)
+                
+                return cob_snapshot
+            else:
+                logger.warning(f"Failed to fetch COB data for {symbol}: {response.status_code}")
+                return {}
+                
+        except Exception as e:
+            logger.debug(f"Error collecting COB data for {symbol}: {e}")
+            return {}
+    
+    def start_cob_collection(self):
+        """
+        Start COB data collection in background thread
+        """
+        try:
+            import threading
+            import time
+            
+            def cob_collector():
+                """Collect COB data using REST API calls"""
+                logger.info("Starting centralized COB data collection")
+                while True:
+                    try:
+                        # Collect data for both symbols
+                        for symbol in ['ETH/USDT', 'BTC/USDT']:
+                            self.collect_cob_data(symbol)
+                        
+                        # Sleep for 1 second between collections
+                        time.sleep(1)
+                    except Exception as e:
+                        logger.debug(f"Error in COB collection: {e}")
+                        time.sleep(5)  # Wait longer on error
+            
+            # Start collector in background thread
+            if not hasattr(self, '_cob_thread_started') or not self._cob_thread_started:
+                cob_thread = threading.Thread(target=cob_collector, daemon=True)
+                cob_thread.start()
+                self._cob_thread_started = True
+                logger.info("Centralized COB data collection started")
+            
+        except Exception as e:
+            logger.error(f"Error starting COB collection: {e}")
+    
+    def _notify_cob_subscribers(self, symbol: str, cob_snapshot: dict):
+        """Notify subscribers of new COB data"""
+        with self.subscriber_lock:
+            if not hasattr(self, 'cob_subscribers'):
+                self.cob_subscribers = {}
+            
+            # Notify all subscribers for this symbol
+            for subscriber_id, callback in self.cob_subscribers.items():
+                try:
+                    callback(symbol, cob_snapshot)
+                except Exception as e:
+                    logger.debug(f"Error notifying COB subscriber {subscriber_id}: {e}")
+    
+    def subscribe_to_cob(self, callback) -> str:
+        """Subscribe to COB data updates"""
+        with self.subscriber_lock:
+            if not hasattr(self, 'cob_subscribers'):
+                self.cob_subscribers = {}
+            
+            subscriber_id = str(uuid.uuid4())
+            self.cob_subscribers[subscriber_id] = callback
+            
+            # Start collection if not already started
+            self.start_cob_collection()
+            
+            return subscriber_id
+    
+    def get_latest_cob_data(self, symbol: str) -> dict:
+        """Get latest COB data for a symbol"""
+        with self.subscriber_lock:
+            # Convert symbol to Binance format for cache lookup
+            binance_symbol = symbol.replace('/', '').upper()
+            
+            logger.debug(f"Getting COB data for {symbol} (binance: {binance_symbol})")
+            
+            if not hasattr(self, 'cob_data_cache'):
+                logger.debug("COB data cache not initialized")
+                return {}
+                
+            if binance_symbol not in self.cob_data_cache:
+                logger.debug(f"Symbol {binance_symbol} not in COB cache. Available: {list(self.cob_data_cache.keys())}")
+                return {}
+            
+            if not self.cob_data_cache[binance_symbol]:
+                logger.debug(f"COB cache for {binance_symbol} is empty")
+                return {}
+            
+            latest_data = self.cob_data_cache[binance_symbol][-1]
+            logger.debug(f"Latest COB data type for {binance_symbol}: {type(latest_data)}")
+            return latest_data
+    
+    def get_cob_data(self, symbol: str, count: int = 50) -> List[dict]:
+        """Get recent COB data for a symbol"""
+        with self.subscriber_lock:
+            # Convert symbol to Binance format for cache lookup
+            binance_symbol = symbol.replace('/', '').upper()
+            
+            if not hasattr(self, 'cob_data_cache') or binance_symbol not in self.cob_data_cache:
+                return []
+            
+            # Return the most recent 'count' snapshots
+            return list(self.cob_data_cache[binance_symbol])[-count:]
+    
+    def get_data_summary(self) -> dict:
+        """Get summary of all collected data"""
+        summary = {
+            'symbols': self.symbols,
+            'subscribers': {
+                'tick_subscribers': len(self.subscribers),
+                'cob_subscribers': len(self.cob_data_callbacks),
+                'training_subscribers': len(self.training_data_callbacks),
+                'prediction_subscribers': len(self.model_prediction_callbacks)
+            },
+            'data_counts': {},
+            'collection_status': {
+                'cob_collection': self.cob_collection_active,
+                'training_collection': self.training_data_collection_active,
+                'streaming': self.is_streaming
+            }
+        }
+        
+        # Add data counts for each symbol
+        for symbol in self.symbols:
+            binance_symbol = symbol.replace('/', '').upper()
+            summary['data_counts'][symbol] = {
+                'ticks': len(self.tick_buffers.get(binance_symbol, [])),
+                'cob_snapshots': len(self.cob_data_cache.get(binance_symbol, [])),
+                'training_samples': len(self.training_data_cache.get(binance_symbol, []))
+            }
+        
+        return summary