gogo2/tests/cob/test_cob_comparison.py

#!/usr/bin/env python3
"""
Compare COB data quality between DataProvider and COBIntegration

This test compares:
1. DataProvider COB collection (used in our test)
2. COBIntegration direct access (used in cob_realtime_dashboard.py)

To understand why cob_realtime_dashboard.py gets more stable data.
"""

import asyncio
import logging
import time
from collections import deque
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from core.data_provider import DataProvider, MarketTick
from core.config import get_config

# Try to import COBIntegration like cob_realtime_dashboard does
try:
    from core.cob_integration import COBIntegration
    COB_INTEGRATION_AVAILABLE = True
except ImportError:
    COB_INTEGRATION_AVAILABLE = False

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


class COBComparisonTester:
    def __init__(self, symbol='ETH/USDT', duration_seconds=15):
        self.symbol = symbol
        self.duration = timedelta(seconds=duration_seconds)

        # Data storage for both methods
        self.dp_ticks = deque()  # DataProvider ticks
        self.cob_data = deque()  # COBIntegration data

        # Initialize DataProvider (method 1)
        logger.info("Initializing DataProvider...")
        self.data_provider = DataProvider()
        self.dp_cob_received = 0

        # Initialize COBIntegration (method 2)
        self.cob_integration = None
        self.cob_received = 0
        if COB_INTEGRATION_AVAILABLE:
            logger.info("Initializing COBIntegration...")
            self.cob_integration = COBIntegration(symbols=[self.symbol])
        else:
            logger.warning("COBIntegration not available - will only test DataProvider")

        self.start_time = None
        self.subscriber_id = None

    def _dp_cob_callback(self, symbol: str, cob_data: dict):
        """Callback for DataProvider COB data"""
        self.dp_cob_received += 1

        if 'stats' in cob_data and 'mid_price' in cob_data['stats']:
            mid_price = cob_data['stats']['mid_price']
            if mid_price > 0:
                synthetic_tick = MarketTick(
                    symbol=symbol,
                    timestamp=cob_data.get('timestamp', datetime.now()),
                    price=mid_price,
                    volume=cob_data.get('stats', {}).get('total_volume', 0),
                    quantity=0,
                    side='dp_cob',
                    trade_id=f"dp_{self.dp_cob_received}",
                    is_buyer_maker=False,
                    raw_data=cob_data
                )
                self.dp_ticks.append(synthetic_tick)

                if self.dp_cob_received % 20 == 0:
                    logger.info(f"[DataProvider] Update #{self.dp_cob_received}: {symbol} @ ${mid_price:.2f}")

    def _cob_integration_callback(self, symbol: str, data: dict):
        """Callback for COBIntegration data"""
        self.cob_received += 1

        # Store COBIntegration data directly
        cob_record = {
            'symbol': symbol,
            'timestamp': datetime.now(),
            'data': data,
            'source': 'cob_integration'
        }
        self.cob_data.append(cob_record)

        if self.cob_received % 20 == 0:
            stats = data.get('stats', {})
            mid_price = stats.get('mid_price', 0)
            logger.info(f"[COBIntegration] Update #{self.cob_received}: {symbol} @ ${mid_price:.2f}")

    async def run_comparison_test(self):
        """Run the comparison test"""
        logger.info(f"Starting COB comparison test for {self.symbol} for {self.duration.total_seconds()} seconds...")

        # Start DataProvider COB collection
        try:
            logger.info("Starting DataProvider COB collection...")
            self.data_provider.start_cob_collection()
            self.data_provider.subscribe_to_cob(self._dp_cob_callback)
            await self.data_provider.start_real_time_streaming()
            logger.info("DataProvider streaming started")
        except Exception as e:
            logger.error(f"Failed to start DataProvider: {e}")

        # Start COBIntegration if available
        if self.cob_integration:
            try:
                logger.info("Starting COBIntegration...")
                self.cob_integration.add_dashboard_callback(self._cob_integration_callback)
                await self.cob_integration.start()
                logger.info("COBIntegration started")
            except Exception as e:
                logger.error(f"Failed to start COBIntegration: {e}")

        # Collect data for specified duration
        self.start_time = datetime.now()
        while datetime.now() - self.start_time < self.duration:
            await asyncio.sleep(1)
            logger.info(f"DataProvider: {len(self.dp_ticks)} ticks | COBIntegration: {len(self.cob_data)} updates")

        # Stop data collection
        try:
            await self.data_provider.stop_real_time_streaming()
            if self.cob_integration:
                await self.cob_integration.stop()
        except Exception as e:
            logger.error(f"Error stopping data collection: {e}")

        logger.info(f"Comparison complete:")
        logger.info(f"  DataProvider: {len(self.dp_ticks)} ticks received")
        logger.info(f"  COBIntegration: {len(self.cob_data)} updates received")

        # Analyze and plot the differences
        self.analyze_differences()
        self.create_comparison_plots()

    def analyze_differences(self):
        """Analyze the differences between the two data sources"""
        logger.info("Analyzing data quality differences...")

        # Analyze DataProvider data
        dp_order_book_count = 0
        dp_mid_prices = []

        for tick in self.dp_ticks:
            if hasattr(tick, 'raw_data') and tick.raw_data:
                if 'bids' in tick.raw_data and 'asks' in tick.raw_data:
                    dp_order_book_count += 1
                if 'stats' in tick.raw_data and 'mid_price' in tick.raw_data['stats']:
                    dp_mid_prices.append(tick.raw_data['stats']['mid_price'])

        # Analyze COBIntegration data
        cob_order_book_count = 0
        cob_mid_prices = []

        for record in self.cob_data:
            data = record['data']
            if 'bids' in data and 'asks' in data:
                cob_order_book_count += 1
            if 'stats' in data and 'mid_price' in data['stats']:
                cob_mid_prices.append(data['stats']['mid_price'])

        logger.info("Data Quality Analysis:")
        logger.info(f"  DataProvider:")
        logger.info(f"    Total updates: {len(self.dp_ticks)}")
        logger.info(f"    With order book data: {dp_order_book_count}")
        logger.info(f"    Mid prices collected: {len(dp_mid_prices)}")
        if dp_mid_prices:
            logger.info(f"    Price range: ${min(dp_mid_prices):.2f} - ${max(dp_mid_prices):.2f}")

        logger.info(f"  COBIntegration:")
        logger.info(f"    Total updates: {len(self.cob_data)}")
        logger.info(f"    With order book data: {cob_order_book_count}")
        logger.info(f"    Mid prices collected: {len(cob_mid_prices)}")
        if cob_mid_prices:
            logger.info(f"    Price range: ${min(cob_mid_prices):.2f} - ${max(cob_mid_prices):.2f}")

    def create_comparison_plots(self):
        """Create comparison plots showing the difference"""
        logger.info("Creating comparison plots...")

        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))

        # Plot 1: Price comparison
        dp_times = []
        dp_prices = []
        for tick in self.dp_ticks:
            if tick.price > 0:
                dp_times.append(tick.timestamp)
                dp_prices.append(tick.price)

        cob_times = []
        cob_prices = []
        for record in self.cob_data:
            data = record['data']
            if 'stats' in data and 'mid_price' in data['stats']:
                cob_times.append(record['timestamp'])
                cob_prices.append(data['stats']['mid_price'])

        if dp_times:
            ax1.plot(pd.to_datetime(dp_times), dp_prices, 'b-', alpha=0.7, label='DataProvider COB', linewidth=1)
        if cob_times:
            ax1.plot(pd.to_datetime(cob_times), cob_prices, 'r-', alpha=0.7, label='COBIntegration', linewidth=1)

        ax1.set_title('Price Comparison: DataProvider vs COBIntegration')
        ax1.set_ylabel('Price (USDT)')
        ax1.legend()
        ax1.grid(True, alpha=0.3)

        # Plot 2: Data quality comparison (order book depth)
        dp_bid_counts = []
        dp_ask_counts = []
        dp_ob_times = []

        for tick in self.dp_ticks:
            if hasattr(tick, 'raw_data') and tick.raw_data:
                if 'bids' in tick.raw_data and 'asks' in tick.raw_data:
                    dp_bid_counts.append(len(tick.raw_data['bids']))
                    dp_ask_counts.append(len(tick.raw_data['asks']))
                    dp_ob_times.append(tick.timestamp)

        cob_bid_counts = []
        cob_ask_counts = []
        cob_ob_times = []

        for record in self.cob_data:
            data = record['data']
            if 'bids' in data and 'asks' in data:
                cob_bid_counts.append(len(data['bids']))
                cob_ask_counts.append(len(data['asks']))
                cob_ob_times.append(record['timestamp'])

        if dp_ob_times:
            ax2.plot(pd.to_datetime(dp_ob_times), dp_bid_counts, 'b--', alpha=0.7, label='DP Bid Levels')
            ax2.plot(pd.to_datetime(dp_ob_times), dp_ask_counts, 'b:', alpha=0.7, label='DP Ask Levels')
        if cob_ob_times:
            ax2.plot(pd.to_datetime(cob_ob_times), cob_bid_counts, 'r--', alpha=0.7, label='COB Bid Levels')
            ax2.plot(pd.to_datetime(cob_ob_times), cob_ask_counts, 'r:', alpha=0.7, label='COB Ask Levels')

        ax2.set_title('Order Book Depth Comparison')
        ax2.set_ylabel('Number of Levels')
        ax2.set_xlabel('Time')
        ax2.legend()
        ax2.grid(True, alpha=0.3)

        plt.tight_layout()

        plot_filename = f"cob_comparison_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png"
        plt.savefig(plot_filename, dpi=150)
        logger.info(f"Comparison plot saved to {plot_filename}")
        plt.show()


async def main():
    tester = COBComparisonTester()
    await tester.run_comparison_test()


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        logger.info("Test interrupted by user.")