#!/usr/bin/env python3 """ Compare COB data quality between DataProvider and COBIntegration This test compares: 1. DataProvider COB collection (used in our test) 2. COBIntegration direct access (used in cob_realtime_dashboard.py) To understand why cob_realtime_dashboard.py gets more stable data. """ import asyncio import logging import time from collections import deque from datetime import datetime, timedelta import matplotlib.pyplot as plt import numpy as np import pandas as pd from core.data_provider import DataProvider, MarketTick from core.config import get_config # Try to import COBIntegration like cob_realtime_dashboard does try: from core.cob_integration import COBIntegration COB_INTEGRATION_AVAILABLE = True except ImportError: COB_INTEGRATION_AVAILABLE = False # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class COBComparisonTester: def __init__(self, symbol='ETH/USDT', duration_seconds=15): self.symbol = symbol self.duration = timedelta(seconds=duration_seconds) # Data storage for both methods self.dp_ticks = deque() # DataProvider ticks self.cob_data = deque() # COBIntegration data # Initialize DataProvider (method 1) logger.info("Initializing DataProvider...") self.data_provider = DataProvider() self.dp_cob_received = 0 # Initialize COBIntegration (method 2) self.cob_integration = None self.cob_received = 0 if COB_INTEGRATION_AVAILABLE: logger.info("Initializing COBIntegration...") self.cob_integration = COBIntegration(symbols=[self.symbol]) else: logger.warning("COBIntegration not available - will only test DataProvider") self.start_time = None self.subscriber_id = None def _dp_cob_callback(self, symbol: str, cob_data: dict): """Callback for DataProvider COB data""" self.dp_cob_received += 1 if 'stats' in cob_data and 'mid_price' in cob_data['stats']: mid_price = cob_data['stats']['mid_price'] if mid_price > 0: synthetic_tick = MarketTick( symbol=symbol, timestamp=cob_data.get('timestamp', datetime.now()), price=mid_price, volume=cob_data.get('stats', {}).get('total_volume', 0), quantity=0, side='dp_cob', trade_id=f"dp_{self.dp_cob_received}", is_buyer_maker=False, raw_data=cob_data ) self.dp_ticks.append(synthetic_tick) if self.dp_cob_received % 20 == 0: logger.info(f"[DataProvider] Update #{self.dp_cob_received}: {symbol} @ ${mid_price:.2f}") def _cob_integration_callback(self, symbol: str, data: dict): """Callback for COBIntegration data""" self.cob_received += 1 # Store COBIntegration data directly cob_record = { 'symbol': symbol, 'timestamp': datetime.now(), 'data': data, 'source': 'cob_integration' } self.cob_data.append(cob_record) if self.cob_received % 20 == 0: stats = data.get('stats', {}) mid_price = stats.get('mid_price', 0) logger.info(f"[COBIntegration] Update #{self.cob_received}: {symbol} @ ${mid_price:.2f}") async def run_comparison_test(self): """Run the comparison test""" logger.info(f"Starting COB comparison test for {self.symbol} for {self.duration.total_seconds()} seconds...") # Start DataProvider COB collection try: logger.info("Starting DataProvider COB collection...") self.data_provider.start_cob_collection() self.data_provider.subscribe_to_cob(self._dp_cob_callback) await self.data_provider.start_real_time_streaming() logger.info("DataProvider streaming started") except Exception as e: logger.error(f"Failed to start DataProvider: {e}") # Start COBIntegration if available if self.cob_integration: try: logger.info("Starting COBIntegration...") self.cob_integration.add_dashboard_callback(self._cob_integration_callback) await self.cob_integration.start() logger.info("COBIntegration started") except Exception as e: logger.error(f"Failed to start COBIntegration: {e}") # Collect data for specified duration self.start_time = datetime.now() while datetime.now() - self.start_time < self.duration: await asyncio.sleep(1) logger.info(f"DataProvider: {len(self.dp_ticks)} ticks | COBIntegration: {len(self.cob_data)} updates") # Stop data collection try: await self.data_provider.stop_real_time_streaming() if self.cob_integration: await self.cob_integration.stop() except Exception as e: logger.error(f"Error stopping data collection: {e}") logger.info(f"Comparison complete:") logger.info(f" DataProvider: {len(self.dp_ticks)} ticks received") logger.info(f" COBIntegration: {len(self.cob_data)} updates received") # Analyze and plot the differences self.analyze_differences() self.create_comparison_plots() def analyze_differences(self): """Analyze the differences between the two data sources""" logger.info("Analyzing data quality differences...") # Analyze DataProvider data dp_order_book_count = 0 dp_mid_prices = [] for tick in self.dp_ticks: if hasattr(tick, 'raw_data') and tick.raw_data: if 'bids' in tick.raw_data and 'asks' in tick.raw_data: dp_order_book_count += 1 if 'stats' in tick.raw_data and 'mid_price' in tick.raw_data['stats']: dp_mid_prices.append(tick.raw_data['stats']['mid_price']) # Analyze COBIntegration data cob_order_book_count = 0 cob_mid_prices = [] for record in self.cob_data: data = record['data'] if 'bids' in data and 'asks' in data: cob_order_book_count += 1 if 'stats' in data and 'mid_price' in data['stats']: cob_mid_prices.append(data['stats']['mid_price']) logger.info("Data Quality Analysis:") logger.info(f" DataProvider:") logger.info(f" Total updates: {len(self.dp_ticks)}") logger.info(f" With order book data: {dp_order_book_count}") logger.info(f" Mid prices collected: {len(dp_mid_prices)}") if dp_mid_prices: logger.info(f" Price range: ${min(dp_mid_prices):.2f} - ${max(dp_mid_prices):.2f}") logger.info(f" COBIntegration:") logger.info(f" Total updates: {len(self.cob_data)}") logger.info(f" With order book data: {cob_order_book_count}") logger.info(f" Mid prices collected: {len(cob_mid_prices)}") if cob_mid_prices: logger.info(f" Price range: ${min(cob_mid_prices):.2f} - ${max(cob_mid_prices):.2f}") def create_comparison_plots(self): """Create comparison plots showing the difference""" logger.info("Creating comparison plots...") fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12)) # Plot 1: Price comparison dp_times = [] dp_prices = [] for tick in self.dp_ticks: if tick.price > 0: dp_times.append(tick.timestamp) dp_prices.append(tick.price) cob_times = [] cob_prices = [] for record in self.cob_data: data = record['data'] if 'stats' in data and 'mid_price' in data['stats']: cob_times.append(record['timestamp']) cob_prices.append(data['stats']['mid_price']) if dp_times: ax1.plot(pd.to_datetime(dp_times), dp_prices, 'b-', alpha=0.7, label='DataProvider COB', linewidth=1) if cob_times: ax1.plot(pd.to_datetime(cob_times), cob_prices, 'r-', alpha=0.7, label='COBIntegration', linewidth=1) ax1.set_title('Price Comparison: DataProvider vs COBIntegration') ax1.set_ylabel('Price (USDT)') ax1.legend() ax1.grid(True, alpha=0.3) # Plot 2: Data quality comparison (order book depth) dp_bid_counts = [] dp_ask_counts = [] dp_ob_times = [] for tick in self.dp_ticks: if hasattr(tick, 'raw_data') and tick.raw_data: if 'bids' in tick.raw_data and 'asks' in tick.raw_data: dp_bid_counts.append(len(tick.raw_data['bids'])) dp_ask_counts.append(len(tick.raw_data['asks'])) dp_ob_times.append(tick.timestamp) cob_bid_counts = [] cob_ask_counts = [] cob_ob_times = [] for record in self.cob_data: data = record['data'] if 'bids' in data and 'asks' in data: cob_bid_counts.append(len(data['bids'])) cob_ask_counts.append(len(data['asks'])) cob_ob_times.append(record['timestamp']) if dp_ob_times: ax2.plot(pd.to_datetime(dp_ob_times), dp_bid_counts, 'b--', alpha=0.7, label='DP Bid Levels') ax2.plot(pd.to_datetime(dp_ob_times), dp_ask_counts, 'b:', alpha=0.7, label='DP Ask Levels') if cob_ob_times: ax2.plot(pd.to_datetime(cob_ob_times), cob_bid_counts, 'r--', alpha=0.7, label='COB Bid Levels') ax2.plot(pd.to_datetime(cob_ob_times), cob_ask_counts, 'r:', alpha=0.7, label='COB Ask Levels') ax2.set_title('Order Book Depth Comparison') ax2.set_ylabel('Number of Levels') ax2.set_xlabel('Time') ax2.legend() ax2.grid(True, alpha=0.3) plt.tight_layout() plot_filename = f"cob_comparison_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png" plt.savefig(plot_filename, dpi=150) logger.info(f"Comparison plot saved to {plot_filename}") plt.show() async def main(): tester = COBComparisonTester() await tester.run_comparison_test() if __name__ == "__main__": try: asyncio.run(main()) except KeyboardInterrupt: logger.info("Test interrupted by user.")