cob data providers tests

2025-07-23 22:49:54 +03:00
parent c30267bf0b
commit 4765b1b1e1
5 changed files with 298 additions and 294 deletions
--- a/tests/cob/test_cob_comparison.py
+++ b/tests/cob/test_cob_comparison.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+"""
+Compare COB data quality between DataProvider and COBIntegration
+
+This test compares:
+1. DataProvider COB collection (used in our test)
+2. COBIntegration direct access (used in cob_realtime_dashboard.py)
+
+To understand why cob_realtime_dashboard.py gets more stable data.
+"""
+
+import asyncio
+import logging
+import time
+from collections import deque
+from datetime import datetime, timedelta
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from core.data_provider import DataProvider, MarketTick
+from core.config import get_config
+
+# Try to import COBIntegration like cob_realtime_dashboard does
+try:
+    from core.cob_integration import COBIntegration
+    COB_INTEGRATION_AVAILABLE = True
+except ImportError:
+    COB_INTEGRATION_AVAILABLE = False
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+
+class COBComparisonTester:
+    def __init__(self, symbol='ETH/USDT', duration_seconds=15):
+        self.symbol = symbol
+        self.duration = timedelta(seconds=duration_seconds)
+        
+        # Data storage for both methods
+        self.dp_ticks = deque()  # DataProvider ticks
+        self.cob_data = deque()  # COBIntegration data
+        
+        # Initialize DataProvider (method 1)
+        logger.info("Initializing DataProvider...")
+        self.data_provider = DataProvider()
+        self.dp_cob_received = 0
+        
+        # Initialize COBIntegration (method 2) 
+        self.cob_integration = None
+        self.cob_received = 0
+        if COB_INTEGRATION_AVAILABLE:
+            logger.info("Initializing COBIntegration...")
+            self.cob_integration = COBIntegration(symbols=[self.symbol])
+        else:
+            logger.warning("COBIntegration not available - will only test DataProvider")
+        
+        self.start_time = None
+        self.subscriber_id = None
+
+    def _dp_cob_callback(self, symbol: str, cob_data: dict):
+        """Callback for DataProvider COB data"""
+        self.dp_cob_received += 1
+        
+        if 'stats' in cob_data and 'mid_price' in cob_data['stats']:
+            mid_price = cob_data['stats']['mid_price']
+            if mid_price > 0:
+                synthetic_tick = MarketTick(
+                    symbol=symbol,
+                    timestamp=cob_data.get('timestamp', datetime.now()),
+                    price=mid_price,
+                    volume=cob_data.get('stats', {}).get('total_volume', 0),
+                    quantity=0,
+                    side='dp_cob',
+                    trade_id=f"dp_{self.dp_cob_received}",
+                    is_buyer_maker=False,
+                    raw_data=cob_data
+                )
+                self.dp_ticks.append(synthetic_tick)
+                
+                if self.dp_cob_received % 20 == 0:
+                    logger.info(f"[DataProvider] Update #{self.dp_cob_received}: {symbol} @ ${mid_price:.2f}")
+
+    def _cob_integration_callback(self, symbol: str, data: dict):
+        """Callback for COBIntegration data"""
+        self.cob_received += 1
+        
+        # Store COBIntegration data directly
+        cob_record = {
+            'symbol': symbol,  
+            'timestamp': datetime.now(),
+            'data': data,
+            'source': 'cob_integration'
+        }
+        self.cob_data.append(cob_record)
+        
+        if self.cob_received % 20 == 0:
+            stats = data.get('stats', {})
+            mid_price = stats.get('mid_price', 0)
+            logger.info(f"[COBIntegration] Update #{self.cob_received}: {symbol} @ ${mid_price:.2f}")
+
+    async def run_comparison_test(self):
+        """Run the comparison test"""
+        logger.info(f"Starting COB comparison test for {self.symbol} for {self.duration.total_seconds()} seconds...")
+        
+        # Start DataProvider COB collection
+        try:
+            logger.info("Starting DataProvider COB collection...")
+            self.data_provider.start_cob_collection()
+            self.data_provider.subscribe_to_cob(self._dp_cob_callback)
+            await self.data_provider.start_real_time_streaming()
+            logger.info("DataProvider streaming started")
+        except Exception as e:
+            logger.error(f"Failed to start DataProvider: {e}")
+        
+        # Start COBIntegration if available
+        if self.cob_integration:
+            try:
+                logger.info("Starting COBIntegration...")
+                self.cob_integration.add_dashboard_callback(self._cob_integration_callback)
+                await self.cob_integration.start()
+                logger.info("COBIntegration started")
+            except Exception as e:
+                logger.error(f"Failed to start COBIntegration: {e}")
+        
+        # Collect data for specified duration
+        self.start_time = datetime.now()
+        while datetime.now() - self.start_time < self.duration:
+            await asyncio.sleep(1)
+            logger.info(f"DataProvider: {len(self.dp_ticks)} ticks | COBIntegration: {len(self.cob_data)} updates")
+        
+        # Stop data collection
+        try:
+            await self.data_provider.stop_real_time_streaming()
+            if self.cob_integration:
+                await self.cob_integration.stop()
+        except Exception as e:
+            logger.error(f"Error stopping data collection: {e}")
+        
+        logger.info(f"Comparison complete:")
+        logger.info(f"  DataProvider: {len(self.dp_ticks)} ticks received")
+        logger.info(f"  COBIntegration: {len(self.cob_data)} updates received")
+        
+        # Analyze and plot the differences
+        self.analyze_differences()
+        self.create_comparison_plots()
+
+    def analyze_differences(self):
+        """Analyze the differences between the two data sources"""
+        logger.info("Analyzing data quality differences...")
+        
+        # Analyze DataProvider data
+        dp_order_book_count = 0
+        dp_mid_prices = []
+        
+        for tick in self.dp_ticks:
+            if hasattr(tick, 'raw_data') and tick.raw_data:
+                if 'bids' in tick.raw_data and 'asks' in tick.raw_data:
+                    dp_order_book_count += 1
+                if 'stats' in tick.raw_data and 'mid_price' in tick.raw_data['stats']:
+                    dp_mid_prices.append(tick.raw_data['stats']['mid_price'])
+        
+        # Analyze COBIntegration data
+        cob_order_book_count = 0
+        cob_mid_prices = []
+        
+        for record in self.cob_data:
+            data = record['data']
+            if 'bids' in data and 'asks' in data:
+                cob_order_book_count += 1
+            if 'stats' in data and 'mid_price' in data['stats']:
+                cob_mid_prices.append(data['stats']['mid_price'])
+        
+        logger.info("Data Quality Analysis:")
+        logger.info(f"  DataProvider:")
+        logger.info(f"    Total updates: {len(self.dp_ticks)}")
+        logger.info(f"    With order book data: {dp_order_book_count}")
+        logger.info(f"    Mid prices collected: {len(dp_mid_prices)}")
+        if dp_mid_prices:
+            logger.info(f"    Price range: ${min(dp_mid_prices):.2f} - ${max(dp_mid_prices):.2f}")
+        
+        logger.info(f"  COBIntegration:")
+        logger.info(f"    Total updates: {len(self.cob_data)}")
+        logger.info(f"    With order book data: {cob_order_book_count}")
+        logger.info(f"    Mid prices collected: {len(cob_mid_prices)}")
+        if cob_mid_prices:
+            logger.info(f"    Price range: ${min(cob_mid_prices):.2f} - ${max(cob_mid_prices):.2f}")
+
+    def create_comparison_plots(self):
+        """Create comparison plots showing the difference"""
+        logger.info("Creating comparison plots...")
+        
+        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))
+        
+        # Plot 1: Price comparison
+        dp_times = []
+        dp_prices = []
+        for tick in self.dp_ticks:
+            if tick.price > 0:
+                dp_times.append(tick.timestamp)
+                dp_prices.append(tick.price)
+        
+        cob_times = []
+        cob_prices = []
+        for record in self.cob_data:
+            data = record['data']
+            if 'stats' in data and 'mid_price' in data['stats']:
+                cob_times.append(record['timestamp'])
+                cob_prices.append(data['stats']['mid_price'])
+        
+        if dp_times:
+            ax1.plot(pd.to_datetime(dp_times), dp_prices, 'b-', alpha=0.7, label='DataProvider COB', linewidth=1)
+        if cob_times:
+            ax1.plot(pd.to_datetime(cob_times), cob_prices, 'r-', alpha=0.7, label='COBIntegration', linewidth=1)
+        
+        ax1.set_title('Price Comparison: DataProvider vs COBIntegration')
+        ax1.set_ylabel('Price (USDT)')
+        ax1.legend()
+        ax1.grid(True, alpha=0.3)
+        
+        # Plot 2: Data quality comparison (order book depth)
+        dp_bid_counts = []
+        dp_ask_counts = []
+        dp_ob_times = []
+        
+        for tick in self.dp_ticks:
+            if hasattr(tick, 'raw_data') and tick.raw_data:
+                if 'bids' in tick.raw_data and 'asks' in tick.raw_data:
+                    dp_bid_counts.append(len(tick.raw_data['bids']))
+                    dp_ask_counts.append(len(tick.raw_data['asks']))
+                    dp_ob_times.append(tick.timestamp)
+        
+        cob_bid_counts = []
+        cob_ask_counts = []
+        cob_ob_times = []
+        
+        for record in self.cob_data:
+            data = record['data']
+            if 'bids' in data and 'asks' in data:
+                cob_bid_counts.append(len(data['bids']))
+                cob_ask_counts.append(len(data['asks']))
+                cob_ob_times.append(record['timestamp'])
+        
+        if dp_ob_times:
+            ax2.plot(pd.to_datetime(dp_ob_times), dp_bid_counts, 'b--', alpha=0.7, label='DP Bid Levels')
+            ax2.plot(pd.to_datetime(dp_ob_times), dp_ask_counts, 'b:', alpha=0.7, label='DP Ask Levels')
+        if cob_ob_times:
+            ax2.plot(pd.to_datetime(cob_ob_times), cob_bid_counts, 'r--', alpha=0.7, label='COB Bid Levels')
+            ax2.plot(pd.to_datetime(cob_ob_times), cob_ask_counts, 'r:', alpha=0.7, label='COB Ask Levels')
+        
+        ax2.set_title('Order Book Depth Comparison')
+        ax2.set_ylabel('Number of Levels')
+        ax2.set_xlabel('Time')
+        ax2.legend()
+        ax2.grid(True, alpha=0.3)
+        
+        plt.tight_layout()
+        
+        plot_filename = f"cob_comparison_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png"
+        plt.savefig(plot_filename, dpi=150)
+        logger.info(f"Comparison plot saved to {plot_filename}")
+        plt.show()
+
+
+async def main():
+    tester = COBComparisonTester()
+    await tester.run_comparison_test()
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        logger.info("Test interrupted by user.")
--- a/tests/cob/test_cob_data_stability.py
+++ b/tests/cob/test_cob_data_stability.py
@@ -0,0 +1,274 @@
+import asyncio
+import logging
+import time
+from collections import deque
+from datetime import datetime, timedelta
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from matplotlib.colors import LogNorm
+
+from core.data_provider import DataProvider, MarketTick
+from core.config import get_config
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+
+class COBStabilityTester:
+    def __init__(self, symbol='ETHUSDT', duration_seconds=15):
+        self.symbol = symbol
+        self.duration = timedelta(seconds=duration_seconds)
+        self.ticks = deque()
+        
+        # Set granularity (buckets) based on symbol
+        if 'ETH' in symbol.upper():
+            self.price_granularity = 1.0  # 1 USD for ETH
+        elif 'BTC' in symbol.upper():
+            self.price_granularity = 10.0  # 10 USD for BTC
+        else:
+            self.price_granularity = 1.0  # Default 1 USD
+        
+        logger.info(f"Using price granularity: ${self.price_granularity} for {symbol}")
+        
+        # Initialize DataProvider the same way as clean_dashboard
+        logger.info("Initializing DataProvider like in clean_dashboard...")
+        self.data_provider = DataProvider()  # Use default constructor like clean_dashboard
+        
+        # Initialize COB data collection like clean_dashboard does
+        self.cob_data_received = 0
+        self.latest_cob_data = {}
+        
+        # Store all COB snapshots for heatmap generation
+        self.cob_snapshots = deque()
+        self.price_data = []  # For price line chart
+        
+        self.start_time = None
+        self.subscriber_id = None
+
+    def _tick_callback(self, tick: MarketTick):
+        """Callback function to receive ticks from the DataProvider."""
+        if self.start_time is None:
+            self.start_time = datetime.now()
+            logger.info(f"Started collecting ticks at {self.start_time}")
+
+        # Store all ticks
+        self.ticks.append(tick)
+    
+    def _cob_data_callback(self, symbol: str, cob_data: dict):
+        """Callback function to receive COB data from the DataProvider."""
+        self.cob_data_received += 1
+        self.latest_cob_data[symbol] = cob_data
+        
+        # Store the complete COB snapshot for heatmap generation
+        if 'bids' in cob_data and 'asks' in cob_data:
+            snapshot = {
+                'timestamp': cob_data.get('timestamp', datetime.now()),
+                'bids': cob_data['bids'],
+                'asks': cob_data['asks'],
+                'stats': cob_data.get('stats', {})
+            }
+            self.cob_snapshots.append(snapshot)
+        
+        # Convert COB data to tick-like format for analysis
+        if 'stats' in cob_data and 'mid_price' in cob_data['stats']:
+            mid_price = cob_data['stats']['mid_price']
+            if mid_price > 0:
+                # Store price data for line chart
+                self.price_data.append({
+                    'timestamp': cob_data.get('timestamp', datetime.now()),
+                    'price': mid_price
+                })
+                
+                # Create a synthetic tick from COB data
+                synthetic_tick = MarketTick(
+                    symbol=symbol,
+                    timestamp=cob_data.get('timestamp', datetime.now()),
+                    price=mid_price,
+                    volume=cob_data.get('stats', {}).get('total_volume', 0),
+                    quantity=0,  # Not available in COB data
+                    side='unknown',  # COB data doesn't have side info
+                    trade_id=f"cob_{self.cob_data_received}",
+                    is_buyer_maker=False,
+                    raw_data=cob_data
+                )
+                self.ticks.append(synthetic_tick)
+                
+                if self.cob_data_received % 10 == 0:  # Log every 10th update
+                    logger.info(f"COB update #{self.cob_data_received}: {symbol} @ ${mid_price:.2f}")
+
+    async def run_test(self):
+        """Run the data collection and plotting test."""
+        logger.info(f"Starting COB stability test for {self.symbol} for {self.duration.total_seconds()} seconds...")
+
+        # Initialize COB collection like clean_dashboard does
+        try:
+            logger.info("Starting COB collection in data provider...")
+            self.data_provider.start_cob_collection()
+            logger.info("Started COB collection in data provider")
+            
+            # Subscribe to COB updates
+            logger.info("Subscribing to COB data updates...")
+            self.data_provider.subscribe_to_cob(self._cob_data_callback)
+            logger.info("Subscribed to COB data updates from data provider")
+        except Exception as e:
+            logger.error(f"Failed to start COB collection or subscribe: {e}")
+
+        # Subscribe to ticks as fallback
+        try:
+            self.subscriber_id = self.data_provider.subscribe_to_ticks(self._tick_callback, symbols=[self.symbol])
+            logger.info("Subscribed to tick data as fallback")
+        except Exception as e:
+            logger.warning(f"Failed to subscribe to ticks: {e}")
+
+        # Start the data provider's real-time streaming
+        try:
+            await self.data_provider.start_real_time_streaming()
+            logger.info("Started real-time streaming")
+        except Exception as e:
+            logger.error(f"Failed to start real-time streaming: {e}")
+
+        # Collect data for the specified duration
+        self.start_time = datetime.now()
+        while datetime.now() - self.start_time < self.duration:
+            await asyncio.sleep(1)
+            logger.info(f"Collected {len(self.ticks)} ticks so far...")
+
+        # Stop streaming and unsubscribe
+        await self.data_provider.stop_real_time_streaming()
+        self.data_provider.unsubscribe_from_ticks(self.subscriber_id)
+
+        logger.info(f"Finished collecting data. Total ticks: {len(self.ticks)}")
+
+        # Plot the results
+        if self.price_data and self.cob_snapshots:
+            self.create_price_heatmap_chart()
+        elif self.ticks:
+            self._create_simple_price_chart()
+        else:
+            logger.warning("No data was collected. Cannot generate plot.")
+
+    def create_price_heatmap_chart(self):
+        """Create a visualization with price chart and order book heatmap."""
+        if not self.price_data or not self.cob_snapshots:
+            logger.warning("Insufficient data to plot.")
+            return
+
+        logger.info(f"Creating price and order book heatmap chart...")
+
+        # Prepare data
+        price_df = pd.DataFrame(self.price_data)
+        price_df['timestamp'] = pd.to_datetime(price_df['timestamp'])
+
+        # Extract order book data for heatmap
+        heatmap_data = []
+        for snapshot in self.cob_snapshots:
+            timestamp = snapshot['timestamp']
+            for side in ['bids', 'asks']:
+                for order in snapshot[side]:
+                    bucketed_price = round(order['price'] / self.price_granularity) * self.price_granularity
+                    heatmap_data.append({
+                        'time': timestamp,
+                        'price': bucketed_price,
+                        'size': order['size'],
+                        'side': side
+                    })
+
+        heatmap_df = pd.DataFrame(heatmap_data)
+
+        # Create plot
+        fig, ax1 = plt.subplots(figsize=(16, 8))
+
+        # Plot price line
+        ax1.plot(price_df['timestamp'], price_df['price'], 'cyan', linewidth=1, label='Price')
+
+        # Prepare heatmap
+        for side, cmap in zip(['bids', 'asks'], ['Greens', 'Reds']):
+            side_df = heatmap_df[heatmap_df['side'] == side]
+            if not side_df.empty:
+                hist, xedges, yedges = np.histogram2d(
+                    side_df['time'].astype(np.int64) // 10**9, 
+                    side_df['price'],
+                    bins=[np.unique(side_df['time'].astype(np.int64) // 10**9), np.arange(price_df['price'].min(), price_df['price'].max(), self.price_granularity)],
+                    weights=side_df['size']
+                )
+                ax1.pcolormesh(pd.to_datetime(xedges, unit='s'), yedges, hist.T, cmap=cmap, alpha=0.5)
+
+        # Enhance plot
+        ax1.set_title(f'Price Chart with Order Book Heatmap - {self.symbol}')
+        ax1.set_xlabel('Time')
+        ax1.set_ylabel('Price (USDT)')
+        ax1.legend(loc='upper left')
+        ax1.grid(True, alpha=0.3)
+
+        plt.tight_layout()
+        plot_filename = f"price_heatmap_chart_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png"
+        plt.savefig(plot_filename, dpi=150)
+        logger.info(f"Price and heatmap chart saved to {plot_filename}")
+        plt.show()
+    
+    def _create_simple_price_chart(self):
+        """Create a simple price chart as fallback"""
+        logger.info("Creating simple price chart as fallback...")
+        
+        prices = []
+        times = []
+        
+        for tick in self.ticks:
+            if tick.price > 0:
+                prices.append(tick.price)
+                times.append(tick.timestamp)
+        
+        if not prices:
+            logger.warning("No price data to plot")
+            return
+        
+        fig, ax = plt.subplots(figsize=(15, 8))
+        ax.plot(pd.to_datetime(times), prices, 'cyan', linewidth=1)
+        ax.set_title(f'Price Chart - {self.symbol}')
+        ax.set_xlabel('Time')
+        ax.set_ylabel('Price (USDT)')
+        fig.autofmt_xdate()
+        
+        plot_filename = f"cob_price_chart_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png"
+        plt.savefig(plot_filename)
+        logger.info(f"Price chart saved to {plot_filename}")
+        plt.show()
+
+
+async def main(symbol='ETHUSDT', duration_seconds=15):
+    """Main function to run the COB test with configurable parameters.
+    
+    Args:
+        symbol: Trading symbol (default: ETHUSDT)
+        duration_seconds: Test duration in seconds (default: 15)
+    """
+    logger.info(f"Starting COB test with symbol={symbol}, duration={duration_seconds}s")
+    tester = COBStabilityTester(symbol=symbol, duration_seconds=duration_seconds)
+    await tester.run_test()
+
+
+if __name__ == "__main__":
+    import sys
+    
+    # Parse command line arguments
+    symbol = 'ETHUSDT'  # Default
+    duration = 15       # Default
+    
+    if len(sys.argv) > 1:
+        symbol = sys.argv[1]
+    if len(sys.argv) > 2:
+        try:
+            duration = int(sys.argv[2])
+        except ValueError:
+            logger.warning(f"Invalid duration '{sys.argv[2]}', using default 15 seconds")
+    
+    logger.info(f"Configuration: Symbol={symbol}, Duration={duration}s")
+    logger.info(f"Granularity: {'1 USD for ETH' if 'ETH' in symbol.upper() else '10 USD for BTC' if 'BTC' in symbol.upper() else '1 USD default'}")
+    
+    try:
+        asyncio.run(main(symbol, duration))
+    except KeyboardInterrupt:
+        logger.info("Test interrupted by user.")