cob data providers tests

This commit is contained in:
Dobromir Popov
2025-07-23 22:49:54 +03:00
parent c30267bf0b
commit 4765b1b1e1
5 changed files with 298 additions and 294 deletions

View File

@ -0,0 +1,276 @@
#!/usr/bin/env python3
"""
Compare COB data quality between DataProvider and COBIntegration
This test compares:
1. DataProvider COB collection (used in our test)
2. COBIntegration direct access (used in cob_realtime_dashboard.py)
To understand why cob_realtime_dashboard.py gets more stable data.
"""
import asyncio
import logging
import time
from collections import deque
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from core.data_provider import DataProvider, MarketTick
from core.config import get_config
# Try to import COBIntegration like cob_realtime_dashboard does
try:
from core.cob_integration import COBIntegration
COB_INTEGRATION_AVAILABLE = True
except ImportError:
COB_INTEGRATION_AVAILABLE = False
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class COBComparisonTester:
def __init__(self, symbol='ETH/USDT', duration_seconds=15):
self.symbol = symbol
self.duration = timedelta(seconds=duration_seconds)
# Data storage for both methods
self.dp_ticks = deque() # DataProvider ticks
self.cob_data = deque() # COBIntegration data
# Initialize DataProvider (method 1)
logger.info("Initializing DataProvider...")
self.data_provider = DataProvider()
self.dp_cob_received = 0
# Initialize COBIntegration (method 2)
self.cob_integration = None
self.cob_received = 0
if COB_INTEGRATION_AVAILABLE:
logger.info("Initializing COBIntegration...")
self.cob_integration = COBIntegration(symbols=[self.symbol])
else:
logger.warning("COBIntegration not available - will only test DataProvider")
self.start_time = None
self.subscriber_id = None
def _dp_cob_callback(self, symbol: str, cob_data: dict):
"""Callback for DataProvider COB data"""
self.dp_cob_received += 1
if 'stats' in cob_data and 'mid_price' in cob_data['stats']:
mid_price = cob_data['stats']['mid_price']
if mid_price > 0:
synthetic_tick = MarketTick(
symbol=symbol,
timestamp=cob_data.get('timestamp', datetime.now()),
price=mid_price,
volume=cob_data.get('stats', {}).get('total_volume', 0),
quantity=0,
side='dp_cob',
trade_id=f"dp_{self.dp_cob_received}",
is_buyer_maker=False,
raw_data=cob_data
)
self.dp_ticks.append(synthetic_tick)
if self.dp_cob_received % 20 == 0:
logger.info(f"[DataProvider] Update #{self.dp_cob_received}: {symbol} @ ${mid_price:.2f}")
def _cob_integration_callback(self, symbol: str, data: dict):
"""Callback for COBIntegration data"""
self.cob_received += 1
# Store COBIntegration data directly
cob_record = {
'symbol': symbol,
'timestamp': datetime.now(),
'data': data,
'source': 'cob_integration'
}
self.cob_data.append(cob_record)
if self.cob_received % 20 == 0:
stats = data.get('stats', {})
mid_price = stats.get('mid_price', 0)
logger.info(f"[COBIntegration] Update #{self.cob_received}: {symbol} @ ${mid_price:.2f}")
async def run_comparison_test(self):
"""Run the comparison test"""
logger.info(f"Starting COB comparison test for {self.symbol} for {self.duration.total_seconds()} seconds...")
# Start DataProvider COB collection
try:
logger.info("Starting DataProvider COB collection...")
self.data_provider.start_cob_collection()
self.data_provider.subscribe_to_cob(self._dp_cob_callback)
await self.data_provider.start_real_time_streaming()
logger.info("DataProvider streaming started")
except Exception as e:
logger.error(f"Failed to start DataProvider: {e}")
# Start COBIntegration if available
if self.cob_integration:
try:
logger.info("Starting COBIntegration...")
self.cob_integration.add_dashboard_callback(self._cob_integration_callback)
await self.cob_integration.start()
logger.info("COBIntegration started")
except Exception as e:
logger.error(f"Failed to start COBIntegration: {e}")
# Collect data for specified duration
self.start_time = datetime.now()
while datetime.now() - self.start_time < self.duration:
await asyncio.sleep(1)
logger.info(f"DataProvider: {len(self.dp_ticks)} ticks | COBIntegration: {len(self.cob_data)} updates")
# Stop data collection
try:
await self.data_provider.stop_real_time_streaming()
if self.cob_integration:
await self.cob_integration.stop()
except Exception as e:
logger.error(f"Error stopping data collection: {e}")
logger.info(f"Comparison complete:")
logger.info(f" DataProvider: {len(self.dp_ticks)} ticks received")
logger.info(f" COBIntegration: {len(self.cob_data)} updates received")
# Analyze and plot the differences
self.analyze_differences()
self.create_comparison_plots()
def analyze_differences(self):
"""Analyze the differences between the two data sources"""
logger.info("Analyzing data quality differences...")
# Analyze DataProvider data
dp_order_book_count = 0
dp_mid_prices = []
for tick in self.dp_ticks:
if hasattr(tick, 'raw_data') and tick.raw_data:
if 'bids' in tick.raw_data and 'asks' in tick.raw_data:
dp_order_book_count += 1
if 'stats' in tick.raw_data and 'mid_price' in tick.raw_data['stats']:
dp_mid_prices.append(tick.raw_data['stats']['mid_price'])
# Analyze COBIntegration data
cob_order_book_count = 0
cob_mid_prices = []
for record in self.cob_data:
data = record['data']
if 'bids' in data and 'asks' in data:
cob_order_book_count += 1
if 'stats' in data and 'mid_price' in data['stats']:
cob_mid_prices.append(data['stats']['mid_price'])
logger.info("Data Quality Analysis:")
logger.info(f" DataProvider:")
logger.info(f" Total updates: {len(self.dp_ticks)}")
logger.info(f" With order book data: {dp_order_book_count}")
logger.info(f" Mid prices collected: {len(dp_mid_prices)}")
if dp_mid_prices:
logger.info(f" Price range: ${min(dp_mid_prices):.2f} - ${max(dp_mid_prices):.2f}")
logger.info(f" COBIntegration:")
logger.info(f" Total updates: {len(self.cob_data)}")
logger.info(f" With order book data: {cob_order_book_count}")
logger.info(f" Mid prices collected: {len(cob_mid_prices)}")
if cob_mid_prices:
logger.info(f" Price range: ${min(cob_mid_prices):.2f} - ${max(cob_mid_prices):.2f}")
def create_comparison_plots(self):
"""Create comparison plots showing the difference"""
logger.info("Creating comparison plots...")
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))
# Plot 1: Price comparison
dp_times = []
dp_prices = []
for tick in self.dp_ticks:
if tick.price > 0:
dp_times.append(tick.timestamp)
dp_prices.append(tick.price)
cob_times = []
cob_prices = []
for record in self.cob_data:
data = record['data']
if 'stats' in data and 'mid_price' in data['stats']:
cob_times.append(record['timestamp'])
cob_prices.append(data['stats']['mid_price'])
if dp_times:
ax1.plot(pd.to_datetime(dp_times), dp_prices, 'b-', alpha=0.7, label='DataProvider COB', linewidth=1)
if cob_times:
ax1.plot(pd.to_datetime(cob_times), cob_prices, 'r-', alpha=0.7, label='COBIntegration', linewidth=1)
ax1.set_title('Price Comparison: DataProvider vs COBIntegration')
ax1.set_ylabel('Price (USDT)')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Plot 2: Data quality comparison (order book depth)
dp_bid_counts = []
dp_ask_counts = []
dp_ob_times = []
for tick in self.dp_ticks:
if hasattr(tick, 'raw_data') and tick.raw_data:
if 'bids' in tick.raw_data and 'asks' in tick.raw_data:
dp_bid_counts.append(len(tick.raw_data['bids']))
dp_ask_counts.append(len(tick.raw_data['asks']))
dp_ob_times.append(tick.timestamp)
cob_bid_counts = []
cob_ask_counts = []
cob_ob_times = []
for record in self.cob_data:
data = record['data']
if 'bids' in data and 'asks' in data:
cob_bid_counts.append(len(data['bids']))
cob_ask_counts.append(len(data['asks']))
cob_ob_times.append(record['timestamp'])
if dp_ob_times:
ax2.plot(pd.to_datetime(dp_ob_times), dp_bid_counts, 'b--', alpha=0.7, label='DP Bid Levels')
ax2.plot(pd.to_datetime(dp_ob_times), dp_ask_counts, 'b:', alpha=0.7, label='DP Ask Levels')
if cob_ob_times:
ax2.plot(pd.to_datetime(cob_ob_times), cob_bid_counts, 'r--', alpha=0.7, label='COB Bid Levels')
ax2.plot(pd.to_datetime(cob_ob_times), cob_ask_counts, 'r:', alpha=0.7, label='COB Ask Levels')
ax2.set_title('Order Book Depth Comparison')
ax2.set_ylabel('Number of Levels')
ax2.set_xlabel('Time')
ax2.legend()
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plot_filename = f"cob_comparison_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png"
plt.savefig(plot_filename, dpi=150)
logger.info(f"Comparison plot saved to {plot_filename}")
plt.show()
async def main():
tester = COBComparisonTester()
await tester.run_comparison_test()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
logger.info("Test interrupted by user.")

View File

@ -0,0 +1,274 @@
import asyncio
import logging
import time
from collections import deque
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.colors import LogNorm
from core.data_provider import DataProvider, MarketTick
from core.config import get_config
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class COBStabilityTester:
def __init__(self, symbol='ETHUSDT', duration_seconds=15):
self.symbol = symbol
self.duration = timedelta(seconds=duration_seconds)
self.ticks = deque()
# Set granularity (buckets) based on symbol
if 'ETH' in symbol.upper():
self.price_granularity = 1.0 # 1 USD for ETH
elif 'BTC' in symbol.upper():
self.price_granularity = 10.0 # 10 USD for BTC
else:
self.price_granularity = 1.0 # Default 1 USD
logger.info(f"Using price granularity: ${self.price_granularity} for {symbol}")
# Initialize DataProvider the same way as clean_dashboard
logger.info("Initializing DataProvider like in clean_dashboard...")
self.data_provider = DataProvider() # Use default constructor like clean_dashboard
# Initialize COB data collection like clean_dashboard does
self.cob_data_received = 0
self.latest_cob_data = {}
# Store all COB snapshots for heatmap generation
self.cob_snapshots = deque()
self.price_data = [] # For price line chart
self.start_time = None
self.subscriber_id = None
def _tick_callback(self, tick: MarketTick):
"""Callback function to receive ticks from the DataProvider."""
if self.start_time is None:
self.start_time = datetime.now()
logger.info(f"Started collecting ticks at {self.start_time}")
# Store all ticks
self.ticks.append(tick)
def _cob_data_callback(self, symbol: str, cob_data: dict):
"""Callback function to receive COB data from the DataProvider."""
self.cob_data_received += 1
self.latest_cob_data[symbol] = cob_data
# Store the complete COB snapshot for heatmap generation
if 'bids' in cob_data and 'asks' in cob_data:
snapshot = {
'timestamp': cob_data.get('timestamp', datetime.now()),
'bids': cob_data['bids'],
'asks': cob_data['asks'],
'stats': cob_data.get('stats', {})
}
self.cob_snapshots.append(snapshot)
# Convert COB data to tick-like format for analysis
if 'stats' in cob_data and 'mid_price' in cob_data['stats']:
mid_price = cob_data['stats']['mid_price']
if mid_price > 0:
# Store price data for line chart
self.price_data.append({
'timestamp': cob_data.get('timestamp', datetime.now()),
'price': mid_price
})
# Create a synthetic tick from COB data
synthetic_tick = MarketTick(
symbol=symbol,
timestamp=cob_data.get('timestamp', datetime.now()),
price=mid_price,
volume=cob_data.get('stats', {}).get('total_volume', 0),
quantity=0, # Not available in COB data
side='unknown', # COB data doesn't have side info
trade_id=f"cob_{self.cob_data_received}",
is_buyer_maker=False,
raw_data=cob_data
)
self.ticks.append(synthetic_tick)
if self.cob_data_received % 10 == 0: # Log every 10th update
logger.info(f"COB update #{self.cob_data_received}: {symbol} @ ${mid_price:.2f}")
async def run_test(self):
"""Run the data collection and plotting test."""
logger.info(f"Starting COB stability test for {self.symbol} for {self.duration.total_seconds()} seconds...")
# Initialize COB collection like clean_dashboard does
try:
logger.info("Starting COB collection in data provider...")
self.data_provider.start_cob_collection()
logger.info("Started COB collection in data provider")
# Subscribe to COB updates
logger.info("Subscribing to COB data updates...")
self.data_provider.subscribe_to_cob(self._cob_data_callback)
logger.info("Subscribed to COB data updates from data provider")
except Exception as e:
logger.error(f"Failed to start COB collection or subscribe: {e}")
# Subscribe to ticks as fallback
try:
self.subscriber_id = self.data_provider.subscribe_to_ticks(self._tick_callback, symbols=[self.symbol])
logger.info("Subscribed to tick data as fallback")
except Exception as e:
logger.warning(f"Failed to subscribe to ticks: {e}")
# Start the data provider's real-time streaming
try:
await self.data_provider.start_real_time_streaming()
logger.info("Started real-time streaming")
except Exception as e:
logger.error(f"Failed to start real-time streaming: {e}")
# Collect data for the specified duration
self.start_time = datetime.now()
while datetime.now() - self.start_time < self.duration:
await asyncio.sleep(1)
logger.info(f"Collected {len(self.ticks)} ticks so far...")
# Stop streaming and unsubscribe
await self.data_provider.stop_real_time_streaming()
self.data_provider.unsubscribe_from_ticks(self.subscriber_id)
logger.info(f"Finished collecting data. Total ticks: {len(self.ticks)}")
# Plot the results
if self.price_data and self.cob_snapshots:
self.create_price_heatmap_chart()
elif self.ticks:
self._create_simple_price_chart()
else:
logger.warning("No data was collected. Cannot generate plot.")
def create_price_heatmap_chart(self):
"""Create a visualization with price chart and order book heatmap."""
if not self.price_data or not self.cob_snapshots:
logger.warning("Insufficient data to plot.")
return
logger.info(f"Creating price and order book heatmap chart...")
# Prepare data
price_df = pd.DataFrame(self.price_data)
price_df['timestamp'] = pd.to_datetime(price_df['timestamp'])
# Extract order book data for heatmap
heatmap_data = []
for snapshot in self.cob_snapshots:
timestamp = snapshot['timestamp']
for side in ['bids', 'asks']:
for order in snapshot[side]:
bucketed_price = round(order['price'] / self.price_granularity) * self.price_granularity
heatmap_data.append({
'time': timestamp,
'price': bucketed_price,
'size': order['size'],
'side': side
})
heatmap_df = pd.DataFrame(heatmap_data)
# Create plot
fig, ax1 = plt.subplots(figsize=(16, 8))
# Plot price line
ax1.plot(price_df['timestamp'], price_df['price'], 'cyan', linewidth=1, label='Price')
# Prepare heatmap
for side, cmap in zip(['bids', 'asks'], ['Greens', 'Reds']):
side_df = heatmap_df[heatmap_df['side'] == side]
if not side_df.empty:
hist, xedges, yedges = np.histogram2d(
side_df['time'].astype(np.int64) // 10**9,
side_df['price'],
bins=[np.unique(side_df['time'].astype(np.int64) // 10**9), np.arange(price_df['price'].min(), price_df['price'].max(), self.price_granularity)],
weights=side_df['size']
)
ax1.pcolormesh(pd.to_datetime(xedges, unit='s'), yedges, hist.T, cmap=cmap, alpha=0.5)
# Enhance plot
ax1.set_title(f'Price Chart with Order Book Heatmap - {self.symbol}')
ax1.set_xlabel('Time')
ax1.set_ylabel('Price (USDT)')
ax1.legend(loc='upper left')
ax1.grid(True, alpha=0.3)
plt.tight_layout()
plot_filename = f"price_heatmap_chart_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png"
plt.savefig(plot_filename, dpi=150)
logger.info(f"Price and heatmap chart saved to {plot_filename}")
plt.show()
def _create_simple_price_chart(self):
"""Create a simple price chart as fallback"""
logger.info("Creating simple price chart as fallback...")
prices = []
times = []
for tick in self.ticks:
if tick.price > 0:
prices.append(tick.price)
times.append(tick.timestamp)
if not prices:
logger.warning("No price data to plot")
return
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(pd.to_datetime(times), prices, 'cyan', linewidth=1)
ax.set_title(f'Price Chart - {self.symbol}')
ax.set_xlabel('Time')
ax.set_ylabel('Price (USDT)')
fig.autofmt_xdate()
plot_filename = f"cob_price_chart_{self.symbol.replace('/', '_')}_{datetime.now():%Y%m%d_%H%M%S}.png"
plt.savefig(plot_filename)
logger.info(f"Price chart saved to {plot_filename}")
plt.show()
async def main(symbol='ETHUSDT', duration_seconds=15):
"""Main function to run the COB test with configurable parameters.
Args:
symbol: Trading symbol (default: ETHUSDT)
duration_seconds: Test duration in seconds (default: 15)
"""
logger.info(f"Starting COB test with symbol={symbol}, duration={duration_seconds}s")
tester = COBStabilityTester(symbol=symbol, duration_seconds=duration_seconds)
await tester.run_test()
if __name__ == "__main__":
import sys
# Parse command line arguments
symbol = 'ETHUSDT' # Default
duration = 15 # Default
if len(sys.argv) > 1:
symbol = sys.argv[1]
if len(sys.argv) > 2:
try:
duration = int(sys.argv[2])
except ValueError:
logger.warning(f"Invalid duration '{sys.argv[2]}', using default 15 seconds")
logger.info(f"Configuration: Symbol={symbol}, Duration={duration}s")
logger.info(f"Granularity: {'1 USD for ETH' if 'ETH' in symbol.upper() else '10 USD for BTC' if 'BTC' in symbol.upper() else '1 USD default'}")
try:
asyncio.run(main(symbol, duration))
except KeyboardInterrupt:
logger.info("Test interrupted by user.")