bucket aggregation
This commit is contained in:
489
COBY/connectors/binance_connector.py
Normal file
489
COBY/connectors/binance_connector.py
Normal file
@ -0,0 +1,489 @@
|
||||
"""
|
||||
Binance exchange connector implementation.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
|
||||
from ..utils.logging import get_logger, set_correlation_id
|
||||
from ..utils.exceptions import ValidationError
|
||||
from ..utils.validation import validate_symbol, validate_price, validate_volume
|
||||
from .base_connector import BaseExchangeConnector
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class BinanceConnector(BaseExchangeConnector):
|
||||
"""
|
||||
Binance WebSocket connector implementation.
|
||||
|
||||
Supports:
|
||||
- Order book depth streams
|
||||
- Trade streams
|
||||
- Symbol normalization
|
||||
- Real-time data processing
|
||||
"""
|
||||
|
||||
# Binance WebSocket URLs
|
||||
WEBSOCKET_URL = "wss://stream.binance.com:9443/ws"
|
||||
API_URL = "https://api.binance.com/api/v3"
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Binance connector"""
|
||||
super().__init__("binance", self.WEBSOCKET_URL)
|
||||
|
||||
# Binance-specific message handlers
|
||||
self.message_handlers.update({
|
||||
'depthUpdate': self._handle_orderbook_update,
|
||||
'trade': self._handle_trade_update,
|
||||
'error': self._handle_error_message
|
||||
})
|
||||
|
||||
# Stream management
|
||||
self.active_streams: List[str] = []
|
||||
self.stream_id = 1
|
||||
|
||||
logger.info("Binance connector initialized")
|
||||
|
||||
def _get_message_type(self, data: Dict) -> str:
|
||||
"""
|
||||
Determine message type from Binance message data.
|
||||
|
||||
Args:
|
||||
data: Parsed message data
|
||||
|
||||
Returns:
|
||||
str: Message type identifier
|
||||
"""
|
||||
# Binance uses 'e' field for event type
|
||||
if 'e' in data:
|
||||
return data['e']
|
||||
|
||||
# Handle error messages
|
||||
if 'error' in data:
|
||||
return 'error'
|
||||
|
||||
# Handle subscription confirmations
|
||||
if 'result' in data and 'id' in data:
|
||||
return 'subscription_response'
|
||||
|
||||
return 'unknown'
|
||||
|
||||
def normalize_symbol(self, symbol: str) -> str:
|
||||
"""
|
||||
Normalize symbol to Binance format.
|
||||
|
||||
Args:
|
||||
symbol: Standard symbol format (e.g., 'BTCUSDT')
|
||||
|
||||
Returns:
|
||||
str: Binance symbol format (e.g., 'BTCUSDT')
|
||||
"""
|
||||
# Binance uses uppercase symbols without separators
|
||||
normalized = symbol.upper().replace('-', '').replace('/', '')
|
||||
|
||||
# Validate symbol format
|
||||
if not validate_symbol(normalized):
|
||||
raise ValidationError(f"Invalid symbol format: {symbol}", "INVALID_SYMBOL")
|
||||
|
||||
return normalized
|
||||
|
||||
async def subscribe_orderbook(self, symbol: str) -> None:
|
||||
"""
|
||||
Subscribe to order book depth updates for a symbol.
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol (e.g., 'BTCUSDT')
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
normalized_symbol = self.normalize_symbol(symbol)
|
||||
stream_name = f"{normalized_symbol.lower()}@depth@100ms"
|
||||
|
||||
# Create subscription message
|
||||
subscription_msg = {
|
||||
"method": "SUBSCRIBE",
|
||||
"params": [stream_name],
|
||||
"id": self.stream_id
|
||||
}
|
||||
|
||||
# Send subscription
|
||||
success = await self._send_message(subscription_msg)
|
||||
if success:
|
||||
# Track subscription
|
||||
if symbol not in self.subscriptions:
|
||||
self.subscriptions[symbol] = []
|
||||
if 'orderbook' not in self.subscriptions[symbol]:
|
||||
self.subscriptions[symbol].append('orderbook')
|
||||
|
||||
self.active_streams.append(stream_name)
|
||||
self.stream_id += 1
|
||||
|
||||
logger.info(f"Subscribed to order book for {symbol} on Binance")
|
||||
else:
|
||||
logger.error(f"Failed to subscribe to order book for {symbol} on Binance")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error subscribing to order book for {symbol}: {e}")
|
||||
raise
|
||||
|
||||
async def subscribe_trades(self, symbol: str) -> None:
|
||||
"""
|
||||
Subscribe to trade updates for a symbol.
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol (e.g., 'BTCUSDT')
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
normalized_symbol = self.normalize_symbol(symbol)
|
||||
stream_name = f"{normalized_symbol.lower()}@trade"
|
||||
|
||||
# Create subscription message
|
||||
subscription_msg = {
|
||||
"method": "SUBSCRIBE",
|
||||
"params": [stream_name],
|
||||
"id": self.stream_id
|
||||
}
|
||||
|
||||
# Send subscription
|
||||
success = await self._send_message(subscription_msg)
|
||||
if success:
|
||||
# Track subscription
|
||||
if symbol not in self.subscriptions:
|
||||
self.subscriptions[symbol] = []
|
||||
if 'trades' not in self.subscriptions[symbol]:
|
||||
self.subscriptions[symbol].append('trades')
|
||||
|
||||
self.active_streams.append(stream_name)
|
||||
self.stream_id += 1
|
||||
|
||||
logger.info(f"Subscribed to trades for {symbol} on Binance")
|
||||
else:
|
||||
logger.error(f"Failed to subscribe to trades for {symbol} on Binance")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error subscribing to trades for {symbol}: {e}")
|
||||
raise
|
||||
|
||||
async def unsubscribe_orderbook(self, symbol: str) -> None:
|
||||
"""
|
||||
Unsubscribe from order book updates for a symbol.
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol (e.g., 'BTCUSDT')
|
||||
"""
|
||||
try:
|
||||
normalized_symbol = self.normalize_symbol(symbol)
|
||||
stream_name = f"{normalized_symbol.lower()}@depth@100ms"
|
||||
|
||||
# Create unsubscription message
|
||||
unsubscription_msg = {
|
||||
"method": "UNSUBSCRIBE",
|
||||
"params": [stream_name],
|
||||
"id": self.stream_id
|
||||
}
|
||||
|
||||
# Send unsubscription
|
||||
success = await self._send_message(unsubscription_msg)
|
||||
if success:
|
||||
# Remove from tracking
|
||||
if symbol in self.subscriptions and 'orderbook' in self.subscriptions[symbol]:
|
||||
self.subscriptions[symbol].remove('orderbook')
|
||||
if not self.subscriptions[symbol]:
|
||||
del self.subscriptions[symbol]
|
||||
|
||||
if stream_name in self.active_streams:
|
||||
self.active_streams.remove(stream_name)
|
||||
|
||||
self.stream_id += 1
|
||||
logger.info(f"Unsubscribed from order book for {symbol} on Binance")
|
||||
else:
|
||||
logger.error(f"Failed to unsubscribe from order book for {symbol} on Binance")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error unsubscribing from order book for {symbol}: {e}")
|
||||
raise
|
||||
|
||||
async def unsubscribe_trades(self, symbol: str) -> None:
|
||||
"""
|
||||
Unsubscribe from trade updates for a symbol.
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol (e.g., 'BTCUSDT')
|
||||
"""
|
||||
try:
|
||||
normalized_symbol = self.normalize_symbol(symbol)
|
||||
stream_name = f"{normalized_symbol.lower()}@trade"
|
||||
|
||||
# Create unsubscription message
|
||||
unsubscription_msg = {
|
||||
"method": "UNSUBSCRIBE",
|
||||
"params": [stream_name],
|
||||
"id": self.stream_id
|
||||
}
|
||||
|
||||
# Send unsubscription
|
||||
success = await self._send_message(unsubscription_msg)
|
||||
if success:
|
||||
# Remove from tracking
|
||||
if symbol in self.subscriptions and 'trades' in self.subscriptions[symbol]:
|
||||
self.subscriptions[symbol].remove('trades')
|
||||
if not self.subscriptions[symbol]:
|
||||
del self.subscriptions[symbol]
|
||||
|
||||
if stream_name in self.active_streams:
|
||||
self.active_streams.remove(stream_name)
|
||||
|
||||
self.stream_id += 1
|
||||
logger.info(f"Unsubscribed from trades for {symbol} on Binance")
|
||||
else:
|
||||
logger.error(f"Failed to unsubscribe from trades for {symbol} on Binance")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error unsubscribing from trades for {symbol}: {e}")
|
||||
raise
|
||||
|
||||
async def get_symbols(self) -> List[str]:
|
||||
"""
|
||||
Get list of available trading symbols from Binance.
|
||||
|
||||
Returns:
|
||||
List[str]: List of available symbols
|
||||
"""
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(f"{self.API_URL}/exchangeInfo") as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
symbols = [
|
||||
symbol_info['symbol']
|
||||
for symbol_info in data.get('symbols', [])
|
||||
if symbol_info.get('status') == 'TRADING'
|
||||
]
|
||||
logger.info(f"Retrieved {len(symbols)} symbols from Binance")
|
||||
return symbols
|
||||
else:
|
||||
logger.error(f"Failed to get symbols from Binance: HTTP {response.status}")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting symbols from Binance: {e}")
|
||||
return []
|
||||
|
||||
async def get_orderbook_snapshot(self, symbol: str, depth: int = 20) -> Optional[OrderBookSnapshot]:
|
||||
"""
|
||||
Get current order book snapshot from Binance REST API.
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
depth: Number of price levels to retrieve
|
||||
|
||||
Returns:
|
||||
OrderBookSnapshot: Current order book or None if unavailable
|
||||
"""
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
normalized_symbol = self.normalize_symbol(symbol)
|
||||
|
||||
# Binance supports depths: 5, 10, 20, 50, 100, 500, 1000, 5000
|
||||
valid_depths = [5, 10, 20, 50, 100, 500, 1000, 5000]
|
||||
api_depth = min(valid_depths, key=lambda x: abs(x - depth))
|
||||
|
||||
url = f"{self.API_URL}/depth"
|
||||
params = {
|
||||
'symbol': normalized_symbol,
|
||||
'limit': api_depth
|
||||
}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, params=params) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return self._parse_orderbook_snapshot(data, symbol)
|
||||
else:
|
||||
logger.error(f"Failed to get order book for {symbol}: HTTP {response.status}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting order book snapshot for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def _parse_orderbook_snapshot(self, data: Dict, symbol: str) -> OrderBookSnapshot:
|
||||
"""
|
||||
Parse Binance order book data into OrderBookSnapshot.
|
||||
|
||||
Args:
|
||||
data: Raw Binance order book data
|
||||
symbol: Trading symbol
|
||||
|
||||
Returns:
|
||||
OrderBookSnapshot: Parsed order book
|
||||
"""
|
||||
try:
|
||||
# Parse bids and asks
|
||||
bids = []
|
||||
for bid_data in data.get('bids', []):
|
||||
price = float(bid_data[0])
|
||||
size = float(bid_data[1])
|
||||
|
||||
if validate_price(price) and validate_volume(size):
|
||||
bids.append(PriceLevel(price=price, size=size))
|
||||
|
||||
asks = []
|
||||
for ask_data in data.get('asks', []):
|
||||
price = float(ask_data[0])
|
||||
size = float(ask_data[1])
|
||||
|
||||
if validate_price(price) and validate_volume(size):
|
||||
asks.append(PriceLevel(price=price, size=size))
|
||||
|
||||
# Create order book snapshot
|
||||
orderbook = OrderBookSnapshot(
|
||||
symbol=symbol,
|
||||
exchange=self.exchange_name,
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=bids,
|
||||
asks=asks,
|
||||
sequence_id=data.get('lastUpdateId')
|
||||
)
|
||||
|
||||
return orderbook
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing order book snapshot: {e}")
|
||||
raise ValidationError(f"Invalid order book data: {e}", "PARSE_ERROR")
|
||||
|
||||
async def _handle_orderbook_update(self, data: Dict) -> None:
|
||||
"""
|
||||
Handle order book depth update from Binance.
|
||||
|
||||
Args:
|
||||
data: Order book update data
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# Extract symbol from stream name
|
||||
stream = data.get('s', '').upper()
|
||||
if not stream:
|
||||
logger.warning("Order book update missing symbol")
|
||||
return
|
||||
|
||||
# Parse bids and asks
|
||||
bids = []
|
||||
for bid_data in data.get('b', []):
|
||||
price = float(bid_data[0])
|
||||
size = float(bid_data[1])
|
||||
|
||||
if validate_price(price) and validate_volume(size):
|
||||
bids.append(PriceLevel(price=price, size=size))
|
||||
|
||||
asks = []
|
||||
for ask_data in data.get('a', []):
|
||||
price = float(ask_data[0])
|
||||
size = float(ask_data[1])
|
||||
|
||||
if validate_price(price) and validate_volume(size):
|
||||
asks.append(PriceLevel(price=price, size=size))
|
||||
|
||||
# Create order book snapshot
|
||||
orderbook = OrderBookSnapshot(
|
||||
symbol=stream,
|
||||
exchange=self.exchange_name,
|
||||
timestamp=datetime.fromtimestamp(data.get('E', 0) / 1000, tz=timezone.utc),
|
||||
bids=bids,
|
||||
asks=asks,
|
||||
sequence_id=data.get('u') # Final update ID
|
||||
)
|
||||
|
||||
# Notify callbacks
|
||||
self._notify_data_callbacks(orderbook)
|
||||
|
||||
logger.debug(f"Processed order book update for {stream}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling order book update: {e}")
|
||||
|
||||
async def _handle_trade_update(self, data: Dict) -> None:
|
||||
"""
|
||||
Handle trade update from Binance.
|
||||
|
||||
Args:
|
||||
data: Trade update data
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# Extract trade data
|
||||
symbol = data.get('s', '').upper()
|
||||
if not symbol:
|
||||
logger.warning("Trade update missing symbol")
|
||||
return
|
||||
|
||||
price = float(data.get('p', 0))
|
||||
size = float(data.get('q', 0))
|
||||
|
||||
# Validate data
|
||||
if not validate_price(price) or not validate_volume(size):
|
||||
logger.warning(f"Invalid trade data: price={price}, size={size}")
|
||||
return
|
||||
|
||||
# Determine side (Binance uses 'm' field - true if buyer is market maker)
|
||||
is_buyer_maker = data.get('m', False)
|
||||
side = 'sell' if is_buyer_maker else 'buy'
|
||||
|
||||
# Create trade event
|
||||
trade = TradeEvent(
|
||||
symbol=symbol,
|
||||
exchange=self.exchange_name,
|
||||
timestamp=datetime.fromtimestamp(data.get('T', 0) / 1000, tz=timezone.utc),
|
||||
price=price,
|
||||
size=size,
|
||||
side=side,
|
||||
trade_id=str(data.get('t', ''))
|
||||
)
|
||||
|
||||
# Notify callbacks
|
||||
self._notify_data_callbacks(trade)
|
||||
|
||||
logger.debug(f"Processed trade for {symbol}: {side} {size} @ {price}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling trade update: {e}")
|
||||
|
||||
async def _handle_error_message(self, data: Dict) -> None:
|
||||
"""
|
||||
Handle error message from Binance.
|
||||
|
||||
Args:
|
||||
data: Error message data
|
||||
"""
|
||||
error_code = data.get('code', 'unknown')
|
||||
error_msg = data.get('msg', 'Unknown error')
|
||||
|
||||
logger.error(f"Binance error {error_code}: {error_msg}")
|
||||
|
||||
# Handle specific error codes
|
||||
if error_code == -1121: # Invalid symbol
|
||||
logger.error("Invalid symbol error - check symbol format")
|
||||
elif error_code == -1130: # Invalid listen key
|
||||
logger.error("Invalid listen key - may need to reconnect")
|
||||
|
||||
def get_binance_stats(self) -> Dict[str, Any]:
|
||||
"""Get Binance-specific statistics"""
|
||||
base_stats = self.get_stats()
|
||||
|
||||
binance_stats = {
|
||||
'active_streams': len(self.active_streams),
|
||||
'stream_list': self.active_streams.copy(),
|
||||
'next_stream_id': self.stream_id
|
||||
}
|
||||
|
||||
base_stats.update(binance_stats)
|
||||
return base_stats
|
168
COBY/examples/binance_example.py
Normal file
168
COBY/examples/binance_example.py
Normal file
@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Example usage of Binance connector.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add COBY to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from connectors.binance_connector import BinanceConnector
|
||||
from utils.logging import setup_logging, get_logger
|
||||
from models.core import OrderBookSnapshot, TradeEvent
|
||||
|
||||
# Setup logging
|
||||
setup_logging(level='INFO', console_output=True)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class BinanceExample:
|
||||
"""Example Binance connector usage"""
|
||||
|
||||
def __init__(self):
|
||||
self.connector = BinanceConnector()
|
||||
self.orderbook_count = 0
|
||||
self.trade_count = 0
|
||||
|
||||
# Add data callbacks
|
||||
self.connector.add_data_callback(self.on_data_received)
|
||||
self.connector.add_status_callback(self.on_status_changed)
|
||||
|
||||
def on_data_received(self, data):
|
||||
"""Handle received data"""
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
self.orderbook_count += 1
|
||||
logger.info(
|
||||
f"📊 Order Book {self.orderbook_count}: {data.symbol} - "
|
||||
f"Mid: ${data.mid_price:.2f}, Spread: ${data.spread:.2f}, "
|
||||
f"Bids: {len(data.bids)}, Asks: {len(data.asks)}"
|
||||
)
|
||||
|
||||
elif isinstance(data, TradeEvent):
|
||||
self.trade_count += 1
|
||||
logger.info(
|
||||
f"💰 Trade {self.trade_count}: {data.symbol} - "
|
||||
f"{data.side.upper()} {data.size} @ ${data.price:.2f}"
|
||||
)
|
||||
|
||||
def on_status_changed(self, exchange, status):
|
||||
"""Handle status changes"""
|
||||
logger.info(f"🔄 {exchange} status changed to: {status.value}")
|
||||
|
||||
async def run_example(self):
|
||||
"""Run the example"""
|
||||
try:
|
||||
logger.info("🚀 Starting Binance connector example")
|
||||
|
||||
# Connect to Binance
|
||||
logger.info("🔌 Connecting to Binance...")
|
||||
connected = await self.connector.connect()
|
||||
|
||||
if not connected:
|
||||
logger.error("❌ Failed to connect to Binance")
|
||||
return
|
||||
|
||||
logger.info("✅ Connected to Binance successfully")
|
||||
|
||||
# Get available symbols
|
||||
logger.info("📋 Getting available symbols...")
|
||||
symbols = await self.connector.get_symbols()
|
||||
logger.info(f"📋 Found {len(symbols)} trading symbols")
|
||||
|
||||
# Show some popular symbols
|
||||
popular_symbols = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'BNBUSDT']
|
||||
available_popular = [s for s in popular_symbols if s in symbols]
|
||||
logger.info(f"📋 Popular symbols available: {available_popular}")
|
||||
|
||||
# Get order book snapshot
|
||||
if 'BTCUSDT' in symbols:
|
||||
logger.info("📊 Getting BTC order book snapshot...")
|
||||
orderbook = await self.connector.get_orderbook_snapshot('BTCUSDT', depth=10)
|
||||
if orderbook:
|
||||
logger.info(
|
||||
f"📊 BTC Order Book: Mid=${orderbook.mid_price:.2f}, "
|
||||
f"Spread=${orderbook.spread:.2f}"
|
||||
)
|
||||
|
||||
# Subscribe to real-time data
|
||||
logger.info("🔔 Subscribing to real-time data...")
|
||||
|
||||
# Subscribe to BTC order book and trades
|
||||
if 'BTCUSDT' in symbols:
|
||||
await self.connector.subscribe_orderbook('BTCUSDT')
|
||||
await self.connector.subscribe_trades('BTCUSDT')
|
||||
logger.info("✅ Subscribed to BTCUSDT order book and trades")
|
||||
|
||||
# Subscribe to ETH order book
|
||||
if 'ETHUSDT' in symbols:
|
||||
await self.connector.subscribe_orderbook('ETHUSDT')
|
||||
logger.info("✅ Subscribed to ETHUSDT order book")
|
||||
|
||||
# Let it run for a while
|
||||
logger.info("⏳ Collecting data for 30 seconds...")
|
||||
await asyncio.sleep(30)
|
||||
|
||||
# Show statistics
|
||||
stats = self.connector.get_binance_stats()
|
||||
logger.info("📈 Final Statistics:")
|
||||
logger.info(f" 📊 Order books received: {self.orderbook_count}")
|
||||
logger.info(f" 💰 Trades received: {self.trade_count}")
|
||||
logger.info(f" 📡 Total messages: {stats['message_count']}")
|
||||
logger.info(f" ❌ Errors: {stats['error_count']}")
|
||||
logger.info(f" 🔗 Active streams: {stats['active_streams']}")
|
||||
logger.info(f" 📋 Subscriptions: {list(stats['subscriptions'].keys())}")
|
||||
|
||||
# Unsubscribe and disconnect
|
||||
logger.info("🔌 Cleaning up...")
|
||||
|
||||
if 'BTCUSDT' in self.connector.subscriptions:
|
||||
await self.connector.unsubscribe_orderbook('BTCUSDT')
|
||||
await self.connector.unsubscribe_trades('BTCUSDT')
|
||||
|
||||
if 'ETHUSDT' in self.connector.subscriptions:
|
||||
await self.connector.unsubscribe_orderbook('ETHUSDT')
|
||||
|
||||
await self.connector.disconnect()
|
||||
logger.info("✅ Disconnected successfully")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("⏹️ Interrupted by user")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Example failed: {e}")
|
||||
finally:
|
||||
# Ensure cleanup
|
||||
try:
|
||||
await self.connector.disconnect()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main function"""
|
||||
example = BinanceExample()
|
||||
await example.run_example()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Binance Connector Example")
|
||||
print("=" * 25)
|
||||
print("This example will:")
|
||||
print("1. Connect to Binance WebSocket")
|
||||
print("2. Get available trading symbols")
|
||||
print("3. Subscribe to real-time order book and trade data")
|
||||
print("4. Display received data for 30 seconds")
|
||||
print("5. Show statistics and disconnect")
|
||||
print()
|
||||
print("Press Ctrl+C to stop early")
|
||||
print("=" * 25)
|
||||
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except KeyboardInterrupt:
|
||||
print("\n👋 Example stopped by user")
|
||||
except Exception as e:
|
||||
print(f"\n❌ Example failed: {e}")
|
||||
sys.exit(1)
|
15
COBY/processing/__init__.py
Normal file
15
COBY/processing/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
"""
|
||||
Data processing and normalization components for the COBY system.
|
||||
"""
|
||||
|
||||
from .data_processor import StandardDataProcessor
|
||||
from .quality_checker import DataQualityChecker
|
||||
from .anomaly_detector import AnomalyDetector
|
||||
from .metrics_calculator import MetricsCalculator
|
||||
|
||||
__all__ = [
|
||||
'StandardDataProcessor',
|
||||
'DataQualityChecker',
|
||||
'AnomalyDetector',
|
||||
'MetricsCalculator'
|
||||
]
|
329
COBY/processing/anomaly_detector.py
Normal file
329
COBY/processing/anomaly_detector.py
Normal file
@ -0,0 +1,329 @@
|
||||
"""
|
||||
Anomaly detection for market data.
|
||||
"""
|
||||
|
||||
import statistics
|
||||
from typing import Dict, List, Union, Optional, Deque
|
||||
from collections import deque
|
||||
from datetime import datetime, timedelta
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.timing import get_current_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class AnomalyDetector:
|
||||
"""
|
||||
Detects anomalies in market data using statistical methods.
|
||||
|
||||
Detects:
|
||||
- Price spikes and drops
|
||||
- Volume anomalies
|
||||
- Spread anomalies
|
||||
- Frequency anomalies
|
||||
"""
|
||||
|
||||
def __init__(self, window_size: int = 100, z_score_threshold: float = 3.0):
|
||||
"""
|
||||
Initialize anomaly detector.
|
||||
|
||||
Args:
|
||||
window_size: Size of rolling window for statistics
|
||||
z_score_threshold: Z-score threshold for anomaly detection
|
||||
"""
|
||||
self.window_size = window_size
|
||||
self.z_score_threshold = z_score_threshold
|
||||
|
||||
# Rolling windows for statistics
|
||||
self.price_windows: Dict[str, Deque[float]] = {}
|
||||
self.volume_windows: Dict[str, Deque[float]] = {}
|
||||
self.spread_windows: Dict[str, Deque[float]] = {}
|
||||
self.timestamp_windows: Dict[str, Deque[datetime]] = {}
|
||||
|
||||
logger.info(f"Anomaly detector initialized with window_size={window_size}, threshold={z_score_threshold}")
|
||||
|
||||
def detect_orderbook_anomalies(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""
|
||||
Detect anomalies in order book data.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot to analyze
|
||||
|
||||
Returns:
|
||||
List[str]: List of detected anomalies
|
||||
"""
|
||||
anomalies = []
|
||||
key = f"{orderbook.symbol}_{orderbook.exchange}"
|
||||
|
||||
try:
|
||||
# Price anomalies
|
||||
if orderbook.mid_price:
|
||||
price_anomalies = self._detect_price_anomalies(key, orderbook.mid_price)
|
||||
anomalies.extend(price_anomalies)
|
||||
|
||||
# Volume anomalies
|
||||
total_volume = orderbook.bid_volume + orderbook.ask_volume
|
||||
volume_anomalies = self._detect_volume_anomalies(key, total_volume)
|
||||
anomalies.extend(volume_anomalies)
|
||||
|
||||
# Spread anomalies
|
||||
if orderbook.spread and orderbook.mid_price:
|
||||
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
|
||||
spread_anomalies = self._detect_spread_anomalies(key, spread_pct)
|
||||
anomalies.extend(spread_anomalies)
|
||||
|
||||
# Frequency anomalies
|
||||
frequency_anomalies = self._detect_frequency_anomalies(key, orderbook.timestamp)
|
||||
anomalies.extend(frequency_anomalies)
|
||||
|
||||
# Update windows
|
||||
self._update_windows(key, orderbook)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting order book anomalies: {e}")
|
||||
anomalies.append(f"Anomaly detection error: {e}")
|
||||
|
||||
if anomalies:
|
||||
logger.warning(f"Anomalies detected in {orderbook.symbol}@{orderbook.exchange}: {anomalies}")
|
||||
|
||||
return anomalies
|
||||
|
||||
def detect_trade_anomalies(self, trade: TradeEvent) -> List[str]:
|
||||
"""
|
||||
Detect anomalies in trade data.
|
||||
|
||||
Args:
|
||||
trade: Trade event to analyze
|
||||
|
||||
Returns:
|
||||
List[str]: List of detected anomalies
|
||||
"""
|
||||
anomalies = []
|
||||
key = f"{trade.symbol}_{trade.exchange}_trade"
|
||||
|
||||
try:
|
||||
# Price anomalies
|
||||
price_anomalies = self._detect_price_anomalies(key, trade.price)
|
||||
anomalies.extend(price_anomalies)
|
||||
|
||||
# Volume anomalies
|
||||
volume_anomalies = self._detect_volume_anomalies(key, trade.size)
|
||||
anomalies.extend(volume_anomalies)
|
||||
|
||||
# Update windows
|
||||
self._update_trade_windows(key, trade)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting trade anomalies: {e}")
|
||||
anomalies.append(f"Anomaly detection error: {e}")
|
||||
|
||||
if anomalies:
|
||||
logger.warning(f"Trade anomalies detected in {trade.symbol}@{trade.exchange}: {anomalies}")
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_price_anomalies(self, key: str, price: float) -> List[str]:
|
||||
"""Detect price anomalies using z-score"""
|
||||
anomalies = []
|
||||
|
||||
if key not in self.price_windows:
|
||||
self.price_windows[key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.price_windows[key]
|
||||
if len(window) < 10: # Need minimum data points
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
mean_price = statistics.mean(window)
|
||||
std_price = statistics.stdev(window)
|
||||
|
||||
if std_price > 0:
|
||||
z_score = abs(price - mean_price) / std_price
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
direction = "spike" if price > mean_price else "drop"
|
||||
anomalies.append(f"Price {direction}: {price:.6f} (z-score: {z_score:.2f})")
|
||||
|
||||
except statistics.StatisticsError:
|
||||
pass # Not enough data or all values are the same
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_volume_anomalies(self, key: str, volume: float) -> List[str]:
|
||||
"""Detect volume anomalies using z-score"""
|
||||
anomalies = []
|
||||
|
||||
volume_key = f"{key}_volume"
|
||||
if volume_key not in self.volume_windows:
|
||||
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.volume_windows[volume_key]
|
||||
if len(window) < 10:
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
mean_volume = statistics.mean(window)
|
||||
std_volume = statistics.stdev(window)
|
||||
|
||||
if std_volume > 0:
|
||||
z_score = abs(volume - mean_volume) / std_volume
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
direction = "spike" if volume > mean_volume else "drop"
|
||||
anomalies.append(f"Volume {direction}: {volume:.6f} (z-score: {z_score:.2f})")
|
||||
|
||||
except statistics.StatisticsError:
|
||||
pass
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_spread_anomalies(self, key: str, spread_pct: float) -> List[str]:
|
||||
"""Detect spread anomalies using z-score"""
|
||||
anomalies = []
|
||||
|
||||
spread_key = f"{key}_spread"
|
||||
if spread_key not in self.spread_windows:
|
||||
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.spread_windows[spread_key]
|
||||
if len(window) < 10:
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
mean_spread = statistics.mean(window)
|
||||
std_spread = statistics.stdev(window)
|
||||
|
||||
if std_spread > 0:
|
||||
z_score = abs(spread_pct - mean_spread) / std_spread
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
direction = "widening" if spread_pct > mean_spread else "tightening"
|
||||
anomalies.append(f"Spread {direction}: {spread_pct:.4f}% (z-score: {z_score:.2f})")
|
||||
|
||||
except statistics.StatisticsError:
|
||||
pass
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_frequency_anomalies(self, key: str, timestamp: datetime) -> List[str]:
|
||||
"""Detect frequency anomalies in data updates"""
|
||||
anomalies = []
|
||||
|
||||
timestamp_key = f"{key}_timestamp"
|
||||
if timestamp_key not in self.timestamp_windows:
|
||||
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.timestamp_windows[timestamp_key]
|
||||
if len(window) < 5:
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
# Calculate intervals between updates
|
||||
intervals = []
|
||||
for i in range(1, len(window)):
|
||||
interval = (window[i] - window[i-1]).total_seconds()
|
||||
intervals.append(interval)
|
||||
|
||||
if len(intervals) >= 5:
|
||||
mean_interval = statistics.mean(intervals)
|
||||
std_interval = statistics.stdev(intervals)
|
||||
|
||||
# Check current interval
|
||||
current_interval = (timestamp - window[-1]).total_seconds()
|
||||
|
||||
if std_interval > 0:
|
||||
z_score = abs(current_interval - mean_interval) / std_interval
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
if current_interval > mean_interval:
|
||||
anomalies.append(f"Update delay: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
|
||||
else:
|
||||
anomalies.append(f"Update burst: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
|
||||
|
||||
except (statistics.StatisticsError, IndexError):
|
||||
pass
|
||||
|
||||
return anomalies
|
||||
|
||||
def _update_windows(self, key: str, orderbook: OrderBookSnapshot) -> None:
|
||||
"""Update rolling windows with new data"""
|
||||
# Update price window
|
||||
if orderbook.mid_price:
|
||||
if key not in self.price_windows:
|
||||
self.price_windows[key] = deque(maxlen=self.window_size)
|
||||
self.price_windows[key].append(orderbook.mid_price)
|
||||
|
||||
# Update volume window
|
||||
total_volume = orderbook.bid_volume + orderbook.ask_volume
|
||||
volume_key = f"{key}_volume"
|
||||
if volume_key not in self.volume_windows:
|
||||
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
|
||||
self.volume_windows[volume_key].append(total_volume)
|
||||
|
||||
# Update spread window
|
||||
if orderbook.spread and orderbook.mid_price:
|
||||
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
|
||||
spread_key = f"{key}_spread"
|
||||
if spread_key not in self.spread_windows:
|
||||
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
|
||||
self.spread_windows[spread_key].append(spread_pct)
|
||||
|
||||
# Update timestamp window
|
||||
timestamp_key = f"{key}_timestamp"
|
||||
if timestamp_key not in self.timestamp_windows:
|
||||
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
|
||||
self.timestamp_windows[timestamp_key].append(orderbook.timestamp)
|
||||
|
||||
def _update_trade_windows(self, key: str, trade: TradeEvent) -> None:
|
||||
"""Update rolling windows with trade data"""
|
||||
# Update price window
|
||||
if key not in self.price_windows:
|
||||
self.price_windows[key] = deque(maxlen=self.window_size)
|
||||
self.price_windows[key].append(trade.price)
|
||||
|
||||
# Update volume window
|
||||
volume_key = f"{key}_volume"
|
||||
if volume_key not in self.volume_windows:
|
||||
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
|
||||
self.volume_windows[volume_key].append(trade.size)
|
||||
|
||||
def get_statistics(self) -> Dict[str, Dict[str, float]]:
|
||||
"""Get current statistics for all tracked symbols"""
|
||||
stats = {}
|
||||
|
||||
for key, window in self.price_windows.items():
|
||||
if len(window) >= 2:
|
||||
try:
|
||||
stats[key] = {
|
||||
'price_mean': statistics.mean(window),
|
||||
'price_std': statistics.stdev(window),
|
||||
'price_min': min(window),
|
||||
'price_max': max(window),
|
||||
'data_points': len(window)
|
||||
}
|
||||
except statistics.StatisticsError:
|
||||
stats[key] = {'error': 'insufficient_data'}
|
||||
|
||||
return stats
|
||||
|
||||
def reset_windows(self, key: Optional[str] = None) -> None:
|
||||
"""Reset rolling windows for a specific key or all keys"""
|
||||
if key:
|
||||
# Reset specific key
|
||||
self.price_windows.pop(key, None)
|
||||
self.volume_windows.pop(f"{key}_volume", None)
|
||||
self.spread_windows.pop(f"{key}_spread", None)
|
||||
self.timestamp_windows.pop(f"{key}_timestamp", None)
|
||||
else:
|
||||
# Reset all windows
|
||||
self.price_windows.clear()
|
||||
self.volume_windows.clear()
|
||||
self.spread_windows.clear()
|
||||
self.timestamp_windows.clear()
|
||||
|
||||
logger.info(f"Reset anomaly detection windows for {key or 'all keys'}")
|
378
COBY/processing/data_processor.py
Normal file
378
COBY/processing/data_processor.py
Normal file
@ -0,0 +1,378 @@
|
||||
"""
|
||||
Main data processor implementation.
|
||||
"""
|
||||
|
||||
from typing import Dict, Union, List, Optional, Any
|
||||
from ..interfaces.data_processor import DataProcessor
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent, OrderBookMetrics
|
||||
from ..utils.logging import get_logger, set_correlation_id
|
||||
from ..utils.exceptions import ValidationError, ProcessingError
|
||||
from ..utils.timing import get_current_timestamp
|
||||
from .quality_checker import DataQualityChecker
|
||||
from .anomaly_detector import AnomalyDetector
|
||||
from .metrics_calculator import MetricsCalculator
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class StandardDataProcessor(DataProcessor):
|
||||
"""
|
||||
Standard implementation of data processor interface.
|
||||
|
||||
Provides:
|
||||
- Data normalization and validation
|
||||
- Quality checking
|
||||
- Anomaly detection
|
||||
- Metrics calculation
|
||||
- Data enrichment
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize data processor with components"""
|
||||
self.quality_checker = DataQualityChecker()
|
||||
self.anomaly_detector = AnomalyDetector()
|
||||
self.metrics_calculator = MetricsCalculator()
|
||||
|
||||
# Processing statistics
|
||||
self.processed_orderbooks = 0
|
||||
self.processed_trades = 0
|
||||
self.quality_failures = 0
|
||||
self.anomalies_detected = 0
|
||||
|
||||
logger.info("Standard data processor initialized")
|
||||
|
||||
def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot:
|
||||
"""
|
||||
Normalize raw order book data to standard format.
|
||||
|
||||
Args:
|
||||
raw_data: Raw order book data from exchange
|
||||
exchange: Exchange name
|
||||
|
||||
Returns:
|
||||
OrderBookSnapshot: Normalized order book data
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# This is a generic implementation - specific exchanges would override
|
||||
# For now, assume data is already in correct format
|
||||
if isinstance(raw_data, OrderBookSnapshot):
|
||||
return raw_data
|
||||
|
||||
# If raw_data is a dict, try to construct OrderBookSnapshot
|
||||
# This would be customized per exchange
|
||||
raise NotImplementedError(
|
||||
"normalize_orderbook should be implemented by exchange-specific processors"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing order book data: {e}")
|
||||
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
|
||||
|
||||
def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent:
|
||||
"""
|
||||
Normalize raw trade data to standard format.
|
||||
|
||||
Args:
|
||||
raw_data: Raw trade data from exchange
|
||||
exchange: Exchange name
|
||||
|
||||
Returns:
|
||||
TradeEvent: Normalized trade data
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# This is a generic implementation - specific exchanges would override
|
||||
if isinstance(raw_data, TradeEvent):
|
||||
return raw_data
|
||||
|
||||
# If raw_data is a dict, try to construct TradeEvent
|
||||
# This would be customized per exchange
|
||||
raise NotImplementedError(
|
||||
"normalize_trade should be implemented by exchange-specific processors"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing trade data: {e}")
|
||||
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
|
||||
|
||||
def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool:
|
||||
"""
|
||||
Validate normalized data for quality and consistency.
|
||||
|
||||
Args:
|
||||
data: Normalized data to validate
|
||||
|
||||
Returns:
|
||||
bool: True if data is valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, issues = self.quality_checker.check_orderbook_quality(data)
|
||||
self.processed_orderbooks += 1
|
||||
|
||||
if quality_score < 0.5: # Threshold for acceptable quality
|
||||
self.quality_failures += 1
|
||||
logger.warning(f"Low quality order book data: score={quality_score:.2f}, issues={issues}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, issues = self.quality_checker.check_trade_quality(data)
|
||||
self.processed_trades += 1
|
||||
|
||||
if quality_score < 0.5:
|
||||
self.quality_failures += 1
|
||||
logger.warning(f"Low quality trade data: score={quality_score:.2f}, issues={issues}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
else:
|
||||
logger.error(f"Unknown data type for validation: {type(data)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating data: {e}")
|
||||
return False
|
||||
|
||||
def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
|
||||
"""
|
||||
Calculate metrics from order book data.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
OrderBookMetrics: Calculated metrics
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
return self.metrics_calculator.calculate_orderbook_metrics(orderbook)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating metrics: {e}")
|
||||
raise ProcessingError(f"Metrics calculation failed: {e}", "METRICS_ERROR")
|
||||
|
||||
def detect_anomalies(self, data: Union[OrderBookSnapshot, TradeEvent]) -> List[str]:
|
||||
"""
|
||||
Detect anomalies in the data.
|
||||
|
||||
Args:
|
||||
data: Data to analyze for anomalies
|
||||
|
||||
Returns:
|
||||
List[str]: List of detected anomaly descriptions
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
anomalies = self.anomaly_detector.detect_orderbook_anomalies(data)
|
||||
elif isinstance(data, TradeEvent):
|
||||
anomalies = self.anomaly_detector.detect_trade_anomalies(data)
|
||||
else:
|
||||
logger.error(f"Unknown data type for anomaly detection: {type(data)}")
|
||||
return ["Unknown data type"]
|
||||
|
||||
if anomalies:
|
||||
self.anomalies_detected += len(anomalies)
|
||||
|
||||
return anomalies
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting anomalies: {e}")
|
||||
return [f"Anomaly detection error: {e}"]
|
||||
|
||||
def filter_data(self, data: Union[OrderBookSnapshot, TradeEvent], criteria: Dict) -> bool:
|
||||
"""
|
||||
Filter data based on criteria.
|
||||
|
||||
Args:
|
||||
data: Data to filter
|
||||
criteria: Filtering criteria
|
||||
|
||||
Returns:
|
||||
bool: True if data passes filter, False otherwise
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# Symbol filter
|
||||
if 'symbols' in criteria:
|
||||
allowed_symbols = criteria['symbols']
|
||||
if data.symbol not in allowed_symbols:
|
||||
return False
|
||||
|
||||
# Exchange filter
|
||||
if 'exchanges' in criteria:
|
||||
allowed_exchanges = criteria['exchanges']
|
||||
if data.exchange not in allowed_exchanges:
|
||||
return False
|
||||
|
||||
# Quality filter
|
||||
if 'min_quality' in criteria:
|
||||
min_quality = criteria['min_quality']
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, _ = self.quality_checker.check_trade_quality(data)
|
||||
else:
|
||||
quality_score = 0.0
|
||||
|
||||
if quality_score < min_quality:
|
||||
return False
|
||||
|
||||
# Price range filter
|
||||
if 'price_range' in criteria:
|
||||
price_range = criteria['price_range']
|
||||
min_price, max_price = price_range
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
price = data.mid_price
|
||||
elif isinstance(data, TradeEvent):
|
||||
price = data.price
|
||||
else:
|
||||
return False
|
||||
|
||||
if price and (price < min_price or price > max_price):
|
||||
return False
|
||||
|
||||
# Volume filter for trades
|
||||
if 'min_volume' in criteria and isinstance(data, TradeEvent):
|
||||
min_volume = criteria['min_volume']
|
||||
if data.size < min_volume:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error filtering data: {e}")
|
||||
return False
|
||||
|
||||
def enrich_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> Dict:
|
||||
"""
|
||||
Enrich data with additional metadata.
|
||||
|
||||
Args:
|
||||
data: Data to enrich
|
||||
|
||||
Returns:
|
||||
Dict: Enriched data with metadata
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
enriched = {
|
||||
'original_data': data,
|
||||
'processing_timestamp': get_current_timestamp(),
|
||||
'processor_version': '1.0.0'
|
||||
}
|
||||
|
||||
# Add quality metrics
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, quality_issues = self.quality_checker.check_orderbook_quality(data)
|
||||
enriched['quality_score'] = quality_score
|
||||
enriched['quality_issues'] = quality_issues
|
||||
|
||||
# Add calculated metrics
|
||||
try:
|
||||
metrics = self.calculate_metrics(data)
|
||||
enriched['metrics'] = {
|
||||
'mid_price': metrics.mid_price,
|
||||
'spread': metrics.spread,
|
||||
'spread_percentage': metrics.spread_percentage,
|
||||
'volume_imbalance': metrics.volume_imbalance,
|
||||
'depth_10': metrics.depth_10,
|
||||
'depth_50': metrics.depth_50
|
||||
}
|
||||
except Exception as e:
|
||||
enriched['metrics_error'] = str(e)
|
||||
|
||||
# Add liquidity score
|
||||
try:
|
||||
liquidity_score = self.metrics_calculator.calculate_liquidity_score(data)
|
||||
enriched['liquidity_score'] = liquidity_score
|
||||
except Exception as e:
|
||||
enriched['liquidity_error'] = str(e)
|
||||
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, quality_issues = self.quality_checker.check_trade_quality(data)
|
||||
enriched['quality_score'] = quality_score
|
||||
enriched['quality_issues'] = quality_issues
|
||||
|
||||
# Add trade-specific enrichments
|
||||
enriched['trade_value'] = data.price * data.size
|
||||
enriched['side_numeric'] = 1 if data.side == 'buy' else -1
|
||||
|
||||
# Add anomaly detection results
|
||||
anomalies = self.detect_anomalies(data)
|
||||
enriched['anomalies'] = anomalies
|
||||
enriched['anomaly_count'] = len(anomalies)
|
||||
|
||||
return enriched
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error enriching data: {e}")
|
||||
return {
|
||||
'original_data': data,
|
||||
'enrichment_error': str(e)
|
||||
}
|
||||
|
||||
def get_data_quality_score(self, data: Union[OrderBookSnapshot, TradeEvent]) -> float:
|
||||
"""
|
||||
Calculate data quality score.
|
||||
|
||||
Args:
|
||||
data: Data to score
|
||||
|
||||
Returns:
|
||||
float: Quality score between 0.0 and 1.0
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, _ = self.quality_checker.check_trade_quality(data)
|
||||
else:
|
||||
logger.error(f"Unknown data type for quality scoring: {type(data)}")
|
||||
return 0.0
|
||||
|
||||
return quality_score
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating quality score: {e}")
|
||||
return 0.0
|
||||
|
||||
def get_processing_stats(self) -> Dict[str, Any]:
|
||||
"""Get processing statistics"""
|
||||
return {
|
||||
'processed_orderbooks': self.processed_orderbooks,
|
||||
'processed_trades': self.processed_trades,
|
||||
'quality_failures': self.quality_failures,
|
||||
'anomalies_detected': self.anomalies_detected,
|
||||
'quality_failure_rate': (
|
||||
self.quality_failures / max(1, self.processed_orderbooks + self.processed_trades)
|
||||
),
|
||||
'anomaly_rate': (
|
||||
self.anomalies_detected / max(1, self.processed_orderbooks + self.processed_trades)
|
||||
),
|
||||
'quality_checker_summary': self.quality_checker.get_quality_summary(),
|
||||
'anomaly_detector_stats': self.anomaly_detector.get_statistics()
|
||||
}
|
||||
|
||||
def reset_stats(self) -> None:
|
||||
"""Reset processing statistics"""
|
||||
self.processed_orderbooks = 0
|
||||
self.processed_trades = 0
|
||||
self.quality_failures = 0
|
||||
self.anomalies_detected = 0
|
||||
|
||||
logger.info("Processing statistics reset")
|
275
COBY/processing/metrics_calculator.py
Normal file
275
COBY/processing/metrics_calculator.py
Normal file
@ -0,0 +1,275 @@
|
||||
"""
|
||||
Metrics calculation for order book analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
from ..models.core import OrderBookSnapshot, OrderBookMetrics, ImbalanceMetrics
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class MetricsCalculator:
|
||||
"""
|
||||
Calculates various metrics from order book data.
|
||||
|
||||
Metrics include:
|
||||
- Basic metrics (mid price, spread, volumes)
|
||||
- Imbalance metrics
|
||||
- Depth metrics
|
||||
- Liquidity metrics
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize metrics calculator"""
|
||||
logger.info("Metrics calculator initialized")
|
||||
|
||||
def calculate_orderbook_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
|
||||
"""
|
||||
Calculate comprehensive order book metrics.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
OrderBookMetrics: Calculated metrics
|
||||
"""
|
||||
try:
|
||||
# Basic calculations
|
||||
mid_price = self._calculate_mid_price(orderbook)
|
||||
spread = self._calculate_spread(orderbook)
|
||||
spread_percentage = (spread / mid_price * 100) if mid_price > 0 else 0.0
|
||||
|
||||
# Volume calculations
|
||||
bid_volume = sum(level.size for level in orderbook.bids)
|
||||
ask_volume = sum(level.size for level in orderbook.asks)
|
||||
|
||||
# Imbalance calculation
|
||||
total_volume = bid_volume + ask_volume
|
||||
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
|
||||
|
||||
# Depth calculations
|
||||
depth_10 = self._calculate_depth(orderbook, 10)
|
||||
depth_50 = self._calculate_depth(orderbook, 50)
|
||||
|
||||
return OrderBookMetrics(
|
||||
symbol=orderbook.symbol,
|
||||
exchange=orderbook.exchange,
|
||||
timestamp=orderbook.timestamp,
|
||||
mid_price=mid_price,
|
||||
spread=spread,
|
||||
spread_percentage=spread_percentage,
|
||||
bid_volume=bid_volume,
|
||||
ask_volume=ask_volume,
|
||||
volume_imbalance=volume_imbalance,
|
||||
depth_10=depth_10,
|
||||
depth_50=depth_50
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating order book metrics: {e}")
|
||||
raise
|
||||
|
||||
def calculate_imbalance_metrics(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics:
|
||||
"""
|
||||
Calculate order book imbalance metrics.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
ImbalanceMetrics: Calculated imbalance metrics
|
||||
"""
|
||||
try:
|
||||
# Volume imbalance
|
||||
bid_volume = sum(level.size for level in orderbook.bids)
|
||||
ask_volume = sum(level.size for level in orderbook.asks)
|
||||
total_volume = bid_volume + ask_volume
|
||||
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
|
||||
|
||||
# Price imbalance (weighted by volume)
|
||||
price_imbalance = self._calculate_price_imbalance(orderbook)
|
||||
|
||||
# Depth imbalance
|
||||
depth_imbalance = self._calculate_depth_imbalance(orderbook)
|
||||
|
||||
# Momentum score (simplified - would need historical data for full implementation)
|
||||
momentum_score = volume_imbalance * 0.5 + price_imbalance * 0.3 + depth_imbalance * 0.2
|
||||
|
||||
return ImbalanceMetrics(
|
||||
symbol=orderbook.symbol,
|
||||
timestamp=orderbook.timestamp,
|
||||
volume_imbalance=volume_imbalance,
|
||||
price_imbalance=price_imbalance,
|
||||
depth_imbalance=depth_imbalance,
|
||||
momentum_score=momentum_score
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating imbalance metrics: {e}")
|
||||
raise
|
||||
|
||||
def _calculate_mid_price(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate mid price"""
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
best_bid = orderbook.bids[0].price
|
||||
best_ask = orderbook.asks[0].price
|
||||
|
||||
return (best_bid + best_ask) / 2.0
|
||||
|
||||
def _calculate_spread(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate bid-ask spread"""
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
best_bid = orderbook.bids[0].price
|
||||
best_ask = orderbook.asks[0].price
|
||||
|
||||
return best_ask - best_bid
|
||||
|
||||
def _calculate_depth(self, orderbook: OrderBookSnapshot, levels: int) -> float:
|
||||
"""Calculate market depth for specified number of levels"""
|
||||
bid_depth = sum(
|
||||
level.size for level in orderbook.bids[:levels]
|
||||
)
|
||||
ask_depth = sum(
|
||||
level.size for level in orderbook.asks[:levels]
|
||||
)
|
||||
|
||||
return bid_depth + ask_depth
|
||||
|
||||
def _calculate_price_imbalance(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate price-weighted imbalance"""
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
# Calculate volume-weighted average prices for top levels
|
||||
bid_vwap = self._calculate_vwap(orderbook.bids[:5])
|
||||
ask_vwap = self._calculate_vwap(orderbook.asks[:5])
|
||||
|
||||
if bid_vwap == 0 or ask_vwap == 0:
|
||||
return 0.0
|
||||
|
||||
mid_price = (bid_vwap + ask_vwap) / 2.0
|
||||
|
||||
# Normalize imbalance
|
||||
price_imbalance = (bid_vwap - ask_vwap) / mid_price if mid_price > 0 else 0.0
|
||||
|
||||
return max(-1.0, min(1.0, price_imbalance))
|
||||
|
||||
def _calculate_depth_imbalance(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate depth imbalance across multiple levels"""
|
||||
levels_to_check = [5, 10, 20]
|
||||
imbalances = []
|
||||
|
||||
for levels in levels_to_check:
|
||||
bid_depth = sum(level.size for level in orderbook.bids[:levels])
|
||||
ask_depth = sum(level.size for level in orderbook.asks[:levels])
|
||||
total_depth = bid_depth + ask_depth
|
||||
|
||||
if total_depth > 0:
|
||||
imbalance = (bid_depth - ask_depth) / total_depth
|
||||
imbalances.append(imbalance)
|
||||
|
||||
# Return weighted average of imbalances
|
||||
if imbalances:
|
||||
return sum(imbalances) / len(imbalances)
|
||||
|
||||
return 0.0
|
||||
|
||||
def _calculate_vwap(self, levels: List) -> float:
|
||||
"""Calculate volume-weighted average price for price levels"""
|
||||
if not levels:
|
||||
return 0.0
|
||||
|
||||
total_volume = sum(level.size for level in levels)
|
||||
if total_volume == 0:
|
||||
return 0.0
|
||||
|
||||
weighted_sum = sum(level.price * level.size for level in levels)
|
||||
|
||||
return weighted_sum / total_volume
|
||||
|
||||
def calculate_liquidity_score(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""
|
||||
Calculate liquidity score based on depth and spread.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
float: Liquidity score (0.0 to 1.0)
|
||||
"""
|
||||
try:
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
# Spread component (lower spread = higher liquidity)
|
||||
spread = self._calculate_spread(orderbook)
|
||||
mid_price = self._calculate_mid_price(orderbook)
|
||||
|
||||
if mid_price == 0:
|
||||
return 0.0
|
||||
|
||||
spread_pct = (spread / mid_price) * 100
|
||||
spread_score = max(0.0, 1.0 - (spread_pct / 5.0)) # Normalize to 5% max spread
|
||||
|
||||
# Depth component (higher depth = higher liquidity)
|
||||
total_depth = self._calculate_depth(orderbook, 10)
|
||||
depth_score = min(1.0, total_depth / 100.0) # Normalize to 100 units max depth
|
||||
|
||||
# Volume balance component (more balanced = higher liquidity)
|
||||
bid_volume = sum(level.size for level in orderbook.bids[:10])
|
||||
ask_volume = sum(level.size for level in orderbook.asks[:10])
|
||||
total_volume = bid_volume + ask_volume
|
||||
|
||||
if total_volume > 0:
|
||||
imbalance = abs(bid_volume - ask_volume) / total_volume
|
||||
balance_score = 1.0 - imbalance
|
||||
else:
|
||||
balance_score = 0.0
|
||||
|
||||
# Weighted combination
|
||||
liquidity_score = (spread_score * 0.4 + depth_score * 0.4 + balance_score * 0.2)
|
||||
|
||||
return max(0.0, min(1.0, liquidity_score))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating liquidity score: {e}")
|
||||
return 0.0
|
||||
|
||||
def get_market_summary(self, orderbook: OrderBookSnapshot) -> Dict[str, float]:
|
||||
"""
|
||||
Get comprehensive market summary.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
Dict[str, float]: Market summary metrics
|
||||
"""
|
||||
try:
|
||||
metrics = self.calculate_orderbook_metrics(orderbook)
|
||||
imbalance = self.calculate_imbalance_metrics(orderbook)
|
||||
liquidity = self.calculate_liquidity_score(orderbook)
|
||||
|
||||
return {
|
||||
'mid_price': metrics.mid_price,
|
||||
'spread': metrics.spread,
|
||||
'spread_percentage': metrics.spread_percentage,
|
||||
'bid_volume': metrics.bid_volume,
|
||||
'ask_volume': metrics.ask_volume,
|
||||
'volume_imbalance': metrics.volume_imbalance,
|
||||
'depth_10': metrics.depth_10,
|
||||
'depth_50': metrics.depth_50,
|
||||
'price_imbalance': imbalance.price_imbalance,
|
||||
'depth_imbalance': imbalance.depth_imbalance,
|
||||
'momentum_score': imbalance.momentum_score,
|
||||
'liquidity_score': liquidity
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating market summary: {e}")
|
||||
return {}
|
288
COBY/processing/quality_checker.py
Normal file
288
COBY/processing/quality_checker.py
Normal file
@ -0,0 +1,288 @@
|
||||
"""
|
||||
Data quality checking and validation for market data.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Union, Optional, Tuple
|
||||
from datetime import datetime, timezone
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.validation import validate_price, validate_volume, validate_symbol
|
||||
from ..utils.timing import get_current_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DataQualityChecker:
|
||||
"""
|
||||
Comprehensive data quality checker for market data.
|
||||
|
||||
Validates:
|
||||
- Data structure integrity
|
||||
- Price and volume ranges
|
||||
- Timestamp consistency
|
||||
- Cross-validation between related data points
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize quality checker with default thresholds"""
|
||||
# Quality thresholds
|
||||
self.max_spread_percentage = 10.0 # Maximum spread as % of mid price
|
||||
self.max_price_change_percentage = 50.0 # Maximum price change between updates
|
||||
self.min_volume_threshold = 0.000001 # Minimum meaningful volume
|
||||
self.max_timestamp_drift = 300 # Maximum seconds drift from current time
|
||||
|
||||
# Price history for validation
|
||||
self.price_history: Dict[str, Dict[str, float]] = {} # symbol -> exchange -> last_price
|
||||
|
||||
logger.info("Data quality checker initialized")
|
||||
|
||||
def check_orderbook_quality(self, orderbook: OrderBookSnapshot) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
Check order book data quality.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot to validate
|
||||
|
||||
Returns:
|
||||
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
|
||||
"""
|
||||
issues = []
|
||||
quality_score = 1.0
|
||||
|
||||
try:
|
||||
# Basic structure validation
|
||||
structure_issues = self._check_orderbook_structure(orderbook)
|
||||
issues.extend(structure_issues)
|
||||
quality_score -= len(structure_issues) * 0.1
|
||||
|
||||
# Price validation
|
||||
price_issues = self._check_orderbook_prices(orderbook)
|
||||
issues.extend(price_issues)
|
||||
quality_score -= len(price_issues) * 0.15
|
||||
|
||||
# Volume validation
|
||||
volume_issues = self._check_orderbook_volumes(orderbook)
|
||||
issues.extend(volume_issues)
|
||||
quality_score -= len(volume_issues) * 0.1
|
||||
|
||||
# Spread validation
|
||||
spread_issues = self._check_orderbook_spread(orderbook)
|
||||
issues.extend(spread_issues)
|
||||
quality_score -= len(spread_issues) * 0.2
|
||||
|
||||
# Timestamp validation
|
||||
timestamp_issues = self._check_timestamp(orderbook.timestamp)
|
||||
issues.extend(timestamp_issues)
|
||||
quality_score -= len(timestamp_issues) * 0.1
|
||||
|
||||
# Cross-validation with history
|
||||
history_issues = self._check_price_history(orderbook)
|
||||
issues.extend(history_issues)
|
||||
quality_score -= len(history_issues) * 0.15
|
||||
|
||||
# Update price history
|
||||
self._update_price_history(orderbook)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking order book quality: {e}")
|
||||
issues.append(f"Quality check error: {e}")
|
||||
quality_score = 0.0
|
||||
|
||||
# Ensure score is within bounds
|
||||
quality_score = max(0.0, min(1.0, quality_score))
|
||||
|
||||
if issues:
|
||||
logger.debug(f"Order book quality issues for {orderbook.symbol}@{orderbook.exchange}: {issues}")
|
||||
|
||||
return quality_score, issues de
|
||||
f check_trade_quality(self, trade: TradeEvent) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
Check trade data quality.
|
||||
|
||||
Args:
|
||||
trade: Trade event to validate
|
||||
|
||||
Returns:
|
||||
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
|
||||
"""
|
||||
issues = []
|
||||
quality_score = 1.0
|
||||
|
||||
try:
|
||||
# Basic structure validation
|
||||
if not validate_symbol(trade.symbol):
|
||||
issues.append("Invalid symbol format")
|
||||
|
||||
if not trade.exchange:
|
||||
issues.append("Missing exchange")
|
||||
|
||||
if not trade.trade_id:
|
||||
issues.append("Missing trade ID")
|
||||
|
||||
# Price validation
|
||||
if not validate_price(trade.price):
|
||||
issues.append(f"Invalid price: {trade.price}")
|
||||
|
||||
# Volume validation
|
||||
if not validate_volume(trade.size):
|
||||
issues.append(f"Invalid size: {trade.size}")
|
||||
|
||||
if trade.size < self.min_volume_threshold:
|
||||
issues.append(f"Size below threshold: {trade.size}")
|
||||
|
||||
# Side validation
|
||||
if trade.side not in ['buy', 'sell']:
|
||||
issues.append(f"Invalid side: {trade.side}")
|
||||
|
||||
# Timestamp validation
|
||||
timestamp_issues = self._check_timestamp(trade.timestamp)
|
||||
issues.extend(timestamp_issues)
|
||||
|
||||
# Calculate quality score
|
||||
quality_score -= len(issues) * 0.2
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking trade quality: {e}")
|
||||
issues.append(f"Quality check error: {e}")
|
||||
quality_score = 0.0
|
||||
|
||||
# Ensure score is within bounds
|
||||
quality_score = max(0.0, min(1.0, quality_score))
|
||||
|
||||
if issues:
|
||||
logger.debug(f"Trade quality issues for {trade.symbol}@{trade.exchange}: {issues}")
|
||||
|
||||
return quality_score, issues
|
||||
|
||||
def _check_orderbook_structure(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check basic order book structure"""
|
||||
issues = []
|
||||
|
||||
if not validate_symbol(orderbook.symbol):
|
||||
issues.append("Invalid symbol format")
|
||||
|
||||
if not orderbook.exchange:
|
||||
issues.append("Missing exchange")
|
||||
|
||||
if not orderbook.bids:
|
||||
issues.append("No bid levels")
|
||||
|
||||
if not orderbook.asks:
|
||||
issues.append("No ask levels")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_orderbook_prices(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check order book price validity"""
|
||||
issues = []
|
||||
|
||||
# Check bid prices (should be descending)
|
||||
for i, bid in enumerate(orderbook.bids):
|
||||
if not validate_price(bid.price):
|
||||
issues.append(f"Invalid bid price at level {i}: {bid.price}")
|
||||
|
||||
if i > 0 and bid.price >= orderbook.bids[i-1].price:
|
||||
issues.append(f"Bid prices not descending at level {i}")
|
||||
|
||||
# Check ask prices (should be ascending)
|
||||
for i, ask in enumerate(orderbook.asks):
|
||||
if not validate_price(ask.price):
|
||||
issues.append(f"Invalid ask price at level {i}: {ask.price}")
|
||||
|
||||
if i > 0 and ask.price <= orderbook.asks[i-1].price:
|
||||
issues.append(f"Ask prices not ascending at level {i}")
|
||||
|
||||
# Check bid-ask ordering
|
||||
if orderbook.bids and orderbook.asks:
|
||||
if orderbook.bids[0].price >= orderbook.asks[0].price:
|
||||
issues.append("Best bid >= best ask (crossed book)")
|
||||
|
||||
return issues def
|
||||
_check_orderbook_volumes(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check order book volume validity"""
|
||||
issues = []
|
||||
|
||||
# Check bid volumes
|
||||
for i, bid in enumerate(orderbook.bids):
|
||||
if not validate_volume(bid.size):
|
||||
issues.append(f"Invalid bid volume at level {i}: {bid.size}")
|
||||
|
||||
if bid.size < self.min_volume_threshold:
|
||||
issues.append(f"Bid volume below threshold at level {i}: {bid.size}")
|
||||
|
||||
# Check ask volumes
|
||||
for i, ask in enumerate(orderbook.asks):
|
||||
if not validate_volume(ask.size):
|
||||
issues.append(f"Invalid ask volume at level {i}: {ask.size}")
|
||||
|
||||
if ask.size < self.min_volume_threshold:
|
||||
issues.append(f"Ask volume below threshold at level {i}: {ask.size}")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_orderbook_spread(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check order book spread validity"""
|
||||
issues = []
|
||||
|
||||
if orderbook.mid_price and orderbook.spread:
|
||||
spread_percentage = (orderbook.spread / orderbook.mid_price) * 100
|
||||
|
||||
if spread_percentage > self.max_spread_percentage:
|
||||
issues.append(f"Spread too wide: {spread_percentage:.2f}%")
|
||||
|
||||
if spread_percentage < 0:
|
||||
issues.append(f"Negative spread: {spread_percentage:.2f}%")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_timestamp(self, timestamp: datetime) -> List[str]:
|
||||
"""Check timestamp validity"""
|
||||
issues = []
|
||||
|
||||
if not timestamp:
|
||||
issues.append("Missing timestamp")
|
||||
return issues
|
||||
|
||||
# Check if timestamp is timezone-aware
|
||||
if timestamp.tzinfo is None:
|
||||
issues.append("Timestamp missing timezone info")
|
||||
|
||||
# Check timestamp drift
|
||||
current_time = get_current_timestamp()
|
||||
time_diff = abs((timestamp - current_time).total_seconds())
|
||||
|
||||
if time_diff > self.max_timestamp_drift:
|
||||
issues.append(f"Timestamp drift too large: {time_diff:.1f}s")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_price_history(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check price consistency with history"""
|
||||
issues = []
|
||||
|
||||
key = f"{orderbook.symbol}_{orderbook.exchange}"
|
||||
|
||||
if key in self.price_history and orderbook.mid_price:
|
||||
last_price = self.price_history[key]
|
||||
price_change = abs(orderbook.mid_price - last_price) / last_price * 100
|
||||
|
||||
if price_change > self.max_price_change_percentage:
|
||||
issues.append(f"Large price change: {price_change:.2f}%")
|
||||
|
||||
return issues
|
||||
|
||||
def _update_price_history(self, orderbook: OrderBookSnapshot) -> None:
|
||||
"""Update price history for future validation"""
|
||||
if orderbook.mid_price:
|
||||
key = f"{orderbook.symbol}_{orderbook.exchange}"
|
||||
self.price_history[key] = orderbook.mid_price
|
||||
|
||||
def get_quality_summary(self) -> Dict[str, int]:
|
||||
"""Get summary of quality checks performed"""
|
||||
return {
|
||||
'symbols_tracked': len(self.price_history),
|
||||
'max_spread_percentage': self.max_spread_percentage,
|
||||
'max_price_change_percentage': self.max_price_change_percentage,
|
||||
'min_volume_threshold': self.min_volume_threshold,
|
||||
'max_timestamp_drift': self.max_timestamp_drift
|
||||
}
|
341
COBY/tests/test_binance_connector.py
Normal file
341
COBY/tests/test_binance_connector.py
Normal file
@ -0,0 +1,341 @@
|
||||
"""
|
||||
Tests for Binance exchange connector.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..connectors.binance_connector import BinanceConnector
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def binance_connector():
|
||||
"""Create Binance connector for testing"""
|
||||
return BinanceConnector()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_binance_orderbook_data():
|
||||
"""Sample Binance order book data"""
|
||||
return {
|
||||
"lastUpdateId": 1027024,
|
||||
"bids": [
|
||||
["4.00000000", "431.00000000"],
|
||||
["3.99000000", "9.00000000"]
|
||||
],
|
||||
"asks": [
|
||||
["4.00000200", "12.00000000"],
|
||||
["4.01000000", "18.00000000"]
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_binance_depth_update():
|
||||
"""Sample Binance depth update message"""
|
||||
return {
|
||||
"e": "depthUpdate",
|
||||
"E": 1672515782136,
|
||||
"s": "BTCUSDT",
|
||||
"U": 157,
|
||||
"u": 160,
|
||||
"b": [
|
||||
["50000.00", "0.25"],
|
||||
["49999.00", "0.50"]
|
||||
],
|
||||
"a": [
|
||||
["50001.00", "0.30"],
|
||||
["50002.00", "0.40"]
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_binance_trade_update():
|
||||
"""Sample Binance trade update message"""
|
||||
return {
|
||||
"e": "trade",
|
||||
"E": 1672515782136,
|
||||
"s": "BTCUSDT",
|
||||
"t": 12345,
|
||||
"p": "50000.50",
|
||||
"q": "0.10",
|
||||
"b": 88,
|
||||
"a": 50,
|
||||
"T": 1672515782134,
|
||||
"m": False,
|
||||
"M": True
|
||||
}
|
||||
|
||||
|
||||
class TestBinanceConnector:
|
||||
"""Test cases for BinanceConnector"""
|
||||
|
||||
def test_initialization(self, binance_connector):
|
||||
"""Test connector initialization"""
|
||||
assert binance_connector.exchange_name == "binance"
|
||||
assert binance_connector.websocket_url == BinanceConnector.WEBSOCKET_URL
|
||||
assert len(binance_connector.message_handlers) >= 3
|
||||
assert binance_connector.stream_id == 1
|
||||
assert binance_connector.active_streams == []
|
||||
|
||||
def test_normalize_symbol(self, binance_connector):
|
||||
"""Test symbol normalization"""
|
||||
# Test standard format
|
||||
assert binance_connector.normalize_symbol("BTCUSDT") == "BTCUSDT"
|
||||
|
||||
# Test with separators
|
||||
assert binance_connector.normalize_symbol("BTC-USDT") == "BTCUSDT"
|
||||
assert binance_connector.normalize_symbol("BTC/USDT") == "BTCUSDT"
|
||||
|
||||
# Test lowercase
|
||||
assert binance_connector.normalize_symbol("btcusdt") == "BTCUSDT"
|
||||
|
||||
# Test invalid symbol
|
||||
with pytest.raises(Exception):
|
||||
binance_connector.normalize_symbol("")
|
||||
|
||||
def test_get_message_type(self, binance_connector):
|
||||
"""Test message type detection"""
|
||||
# Test depth update
|
||||
depth_msg = {"e": "depthUpdate", "s": "BTCUSDT"}
|
||||
assert binance_connector._get_message_type(depth_msg) == "depthUpdate"
|
||||
|
||||
# Test trade update
|
||||
trade_msg = {"e": "trade", "s": "BTCUSDT"}
|
||||
assert binance_connector._get_message_type(trade_msg) == "trade"
|
||||
|
||||
# Test error message
|
||||
error_msg = {"error": {"code": -1121, "msg": "Invalid symbol"}}
|
||||
assert binance_connector._get_message_type(error_msg) == "error"
|
||||
|
||||
# Test unknown message
|
||||
unknown_msg = {"data": "something"}
|
||||
assert binance_connector._get_message_type(unknown_msg) == "unknown"
|
||||
|
||||
def test_parse_orderbook_snapshot(self, binance_connector, sample_binance_orderbook_data):
|
||||
"""Test order book snapshot parsing"""
|
||||
orderbook = binance_connector._parse_orderbook_snapshot(
|
||||
sample_binance_orderbook_data,
|
||||
"BTCUSDT"
|
||||
)
|
||||
|
||||
assert isinstance(orderbook, OrderBookSnapshot)
|
||||
assert orderbook.symbol == "BTCUSDT"
|
||||
assert orderbook.exchange == "binance"
|
||||
assert len(orderbook.bids) == 2
|
||||
assert len(orderbook.asks) == 2
|
||||
assert orderbook.sequence_id == 1027024
|
||||
|
||||
# Check bid data
|
||||
assert orderbook.bids[0].price == 4.0
|
||||
assert orderbook.bids[0].size == 431.0
|
||||
|
||||
# Check ask data
|
||||
assert orderbook.asks[0].price == 4.000002
|
||||
assert orderbook.asks[0].size == 12.0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_orderbook_update(self, binance_connector, sample_binance_depth_update):
|
||||
"""Test order book update handling"""
|
||||
# Mock callback
|
||||
callback_called = False
|
||||
received_data = None
|
||||
|
||||
def mock_callback(data):
|
||||
nonlocal callback_called, received_data
|
||||
callback_called = True
|
||||
received_data = data
|
||||
|
||||
binance_connector.add_data_callback(mock_callback)
|
||||
|
||||
# Handle update
|
||||
await binance_connector._handle_orderbook_update(sample_binance_depth_update)
|
||||
|
||||
# Verify callback was called
|
||||
assert callback_called
|
||||
assert isinstance(received_data, OrderBookSnapshot)
|
||||
assert received_data.symbol == "BTCUSDT"
|
||||
assert received_data.exchange == "binance"
|
||||
assert len(received_data.bids) == 2
|
||||
assert len(received_data.asks) == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_trade_update(self, binance_connector, sample_binance_trade_update):
|
||||
"""Test trade update handling"""
|
||||
# Mock callback
|
||||
callback_called = False
|
||||
received_data = None
|
||||
|
||||
def mock_callback(data):
|
||||
nonlocal callback_called, received_data
|
||||
callback_called = True
|
||||
received_data = data
|
||||
|
||||
binance_connector.add_data_callback(mock_callback)
|
||||
|
||||
# Handle update
|
||||
await binance_connector._handle_trade_update(sample_binance_trade_update)
|
||||
|
||||
# Verify callback was called
|
||||
assert callback_called
|
||||
assert isinstance(received_data, TradeEvent)
|
||||
assert received_data.symbol == "BTCUSDT"
|
||||
assert received_data.exchange == "binance"
|
||||
assert received_data.price == 50000.50
|
||||
assert received_data.size == 0.10
|
||||
assert received_data.side == "buy" # m=False means buyer is not maker
|
||||
assert received_data.trade_id == "12345"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_subscribe_orderbook(self, binance_connector):
|
||||
"""Test order book subscription"""
|
||||
# Mock WebSocket send
|
||||
binance_connector._send_message = AsyncMock(return_value=True)
|
||||
|
||||
# Subscribe
|
||||
await binance_connector.subscribe_orderbook("BTCUSDT")
|
||||
|
||||
# Verify subscription was sent
|
||||
binance_connector._send_message.assert_called_once()
|
||||
call_args = binance_connector._send_message.call_args[0][0]
|
||||
|
||||
assert call_args["method"] == "SUBSCRIBE"
|
||||
assert "btcusdt@depth@100ms" in call_args["params"]
|
||||
assert call_args["id"] == 1
|
||||
|
||||
# Verify tracking
|
||||
assert "BTCUSDT" in binance_connector.subscriptions
|
||||
assert "orderbook" in binance_connector.subscriptions["BTCUSDT"]
|
||||
assert "btcusdt@depth@100ms" in binance_connector.active_streams
|
||||
assert binance_connector.stream_id == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_subscribe_trades(self, binance_connector):
|
||||
"""Test trade subscription"""
|
||||
# Mock WebSocket send
|
||||
binance_connector._send_message = AsyncMock(return_value=True)
|
||||
|
||||
# Subscribe
|
||||
await binance_connector.subscribe_trades("ETHUSDT")
|
||||
|
||||
# Verify subscription was sent
|
||||
binance_connector._send_message.assert_called_once()
|
||||
call_args = binance_connector._send_message.call_args[0][0]
|
||||
|
||||
assert call_args["method"] == "SUBSCRIBE"
|
||||
assert "ethusdt@trade" in call_args["params"]
|
||||
assert call_args["id"] == 1
|
||||
|
||||
# Verify tracking
|
||||
assert "ETHUSDT" in binance_connector.subscriptions
|
||||
assert "trades" in binance_connector.subscriptions["ETHUSDT"]
|
||||
assert "ethusdt@trade" in binance_connector.active_streams
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unsubscribe_orderbook(self, binance_connector):
|
||||
"""Test order book unsubscription"""
|
||||
# Setup initial subscription
|
||||
binance_connector.subscriptions["BTCUSDT"] = ["orderbook"]
|
||||
binance_connector.active_streams.append("btcusdt@depth@100ms")
|
||||
|
||||
# Mock WebSocket send
|
||||
binance_connector._send_message = AsyncMock(return_value=True)
|
||||
|
||||
# Unsubscribe
|
||||
await binance_connector.unsubscribe_orderbook("BTCUSDT")
|
||||
|
||||
# Verify unsubscription was sent
|
||||
binance_connector._send_message.assert_called_once()
|
||||
call_args = binance_connector._send_message.call_args[0][0]
|
||||
|
||||
assert call_args["method"] == "UNSUBSCRIBE"
|
||||
assert "btcusdt@depth@100ms" in call_args["params"]
|
||||
|
||||
# Verify tracking removal
|
||||
assert "BTCUSDT" not in binance_connector.subscriptions
|
||||
assert "btcusdt@depth@100ms" not in binance_connector.active_streams
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('aiohttp.ClientSession.get')
|
||||
async def test_get_symbols(self, mock_get, binance_connector):
|
||||
"""Test getting available symbols"""
|
||||
# Mock API response
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 200
|
||||
mock_response.json = AsyncMock(return_value={
|
||||
"symbols": [
|
||||
{"symbol": "BTCUSDT", "status": "TRADING"},
|
||||
{"symbol": "ETHUSDT", "status": "TRADING"},
|
||||
{"symbol": "ADAUSDT", "status": "BREAK"} # Should be filtered out
|
||||
]
|
||||
})
|
||||
mock_get.return_value.__aenter__.return_value = mock_response
|
||||
|
||||
# Get symbols
|
||||
symbols = await binance_connector.get_symbols()
|
||||
|
||||
# Verify results
|
||||
assert len(symbols) == 2
|
||||
assert "BTCUSDT" in symbols
|
||||
assert "ETHUSDT" in symbols
|
||||
assert "ADAUSDT" not in symbols # Filtered out due to status
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('aiohttp.ClientSession.get')
|
||||
async def test_get_orderbook_snapshot(self, mock_get, binance_connector, sample_binance_orderbook_data):
|
||||
"""Test getting order book snapshot"""
|
||||
# Mock API response
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 200
|
||||
mock_response.json = AsyncMock(return_value=sample_binance_orderbook_data)
|
||||
mock_get.return_value.__aenter__.return_value = mock_response
|
||||
|
||||
# Get order book snapshot
|
||||
orderbook = await binance_connector.get_orderbook_snapshot("BTCUSDT", depth=20)
|
||||
|
||||
# Verify results
|
||||
assert isinstance(orderbook, OrderBookSnapshot)
|
||||
assert orderbook.symbol == "BTCUSDT"
|
||||
assert orderbook.exchange == "binance"
|
||||
assert len(orderbook.bids) == 2
|
||||
assert len(orderbook.asks) == 2
|
||||
|
||||
def test_get_binance_stats(self, binance_connector):
|
||||
"""Test getting Binance-specific statistics"""
|
||||
# Add some test data
|
||||
binance_connector.active_streams = ["btcusdt@depth@100ms", "ethusdt@trade"]
|
||||
binance_connector.stream_id = 5
|
||||
|
||||
stats = binance_connector.get_binance_stats()
|
||||
|
||||
# Verify Binance-specific stats
|
||||
assert stats['active_streams'] == 2
|
||||
assert len(stats['stream_list']) == 2
|
||||
assert stats['next_stream_id'] == 5
|
||||
|
||||
# Verify base stats are included
|
||||
assert 'exchange' in stats
|
||||
assert 'connection_status' in stats
|
||||
assert 'message_count' in stats
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run a simple test
|
||||
async def simple_test():
|
||||
connector = BinanceConnector()
|
||||
|
||||
# Test symbol normalization
|
||||
normalized = connector.normalize_symbol("BTC-USDT")
|
||||
print(f"Symbol normalization: BTC-USDT -> {normalized}")
|
||||
|
||||
# Test message type detection
|
||||
msg_type = connector._get_message_type({"e": "depthUpdate"})
|
||||
print(f"Message type detection: {msg_type}")
|
||||
|
||||
print("Simple Binance connector test completed")
|
||||
|
||||
asyncio.run(simple_test())
|
304
COBY/tests/test_data_processor.py
Normal file
304
COBY/tests/test_data_processor.py
Normal file
@ -0,0 +1,304 @@
|
||||
"""
|
||||
Tests for data processing components.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timezone
|
||||
from ..processing.data_processor import StandardDataProcessor
|
||||
from ..processing.quality_checker import DataQualityChecker
|
||||
from ..processing.anomaly_detector import AnomalyDetector
|
||||
from ..processing.metrics_calculator import MetricsCalculator
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_processor():
|
||||
"""Create data processor for testing"""
|
||||
return StandardDataProcessor()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def quality_checker():
|
||||
"""Create quality checker for testing"""
|
||||
return DataQualityChecker()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def anomaly_detector():
|
||||
"""Create anomaly detector for testing"""
|
||||
return AnomalyDetector()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metrics_calculator():
|
||||
"""Create metrics calculator for testing"""
|
||||
return MetricsCalculator()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_orderbook():
|
||||
"""Create sample order book for testing"""
|
||||
return OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[
|
||||
PriceLevel(price=50000.0, size=1.5),
|
||||
PriceLevel(price=49999.0, size=2.0),
|
||||
PriceLevel(price=49998.0, size=1.0)
|
||||
],
|
||||
asks=[
|
||||
PriceLevel(price=50001.0, size=1.0),
|
||||
PriceLevel(price=50002.0, size=1.5),
|
||||
PriceLevel(price=50003.0, size=2.0)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_trade():
|
||||
"""Create sample trade for testing"""
|
||||
return TradeEvent(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
price=50000.5,
|
||||
size=0.1,
|
||||
side="buy",
|
||||
trade_id="test_trade_123"
|
||||
)
|
||||
|
||||
|
||||
class TestDataQualityChecker:
|
||||
"""Test cases for DataQualityChecker"""
|
||||
|
||||
def test_orderbook_quality_check(self, quality_checker, sample_orderbook):
|
||||
"""Test order book quality checking"""
|
||||
quality_score, issues = quality_checker.check_orderbook_quality(sample_orderbook)
|
||||
|
||||
assert 0.0 <= quality_score <= 1.0
|
||||
assert isinstance(issues, list)
|
||||
|
||||
# Good order book should have high quality score
|
||||
assert quality_score > 0.8
|
||||
|
||||
def test_trade_quality_check(self, quality_checker, sample_trade):
|
||||
"""Test trade quality checking"""
|
||||
quality_score, issues = quality_checker.check_trade_quality(sample_trade)
|
||||
|
||||
assert 0.0 <= quality_score <= 1.0
|
||||
assert isinstance(issues, list)
|
||||
|
||||
# Good trade should have high quality score
|
||||
assert quality_score > 0.8
|
||||
|
||||
def test_invalid_orderbook_detection(self, quality_checker):
|
||||
"""Test detection of invalid order book"""
|
||||
# Create invalid order book with crossed spread
|
||||
invalid_orderbook = OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[PriceLevel(price=50002.0, size=1.0)], # Bid higher than ask
|
||||
asks=[PriceLevel(price=50001.0, size=1.0)] # Ask lower than bid
|
||||
)
|
||||
|
||||
quality_score, issues = quality_checker.check_orderbook_quality(invalid_orderbook)
|
||||
|
||||
assert quality_score < 0.8
|
||||
assert any("crossed book" in issue.lower() for issue in issues)
|
||||
|
||||
|
||||
class TestAnomalyDetector:
|
||||
"""Test cases for AnomalyDetector"""
|
||||
|
||||
def test_orderbook_anomaly_detection(self, anomaly_detector, sample_orderbook):
|
||||
"""Test order book anomaly detection"""
|
||||
# First few order books should not trigger anomalies
|
||||
for _ in range(5):
|
||||
anomalies = anomaly_detector.detect_orderbook_anomalies(sample_orderbook)
|
||||
assert isinstance(anomalies, list)
|
||||
|
||||
def test_trade_anomaly_detection(self, anomaly_detector, sample_trade):
|
||||
"""Test trade anomaly detection"""
|
||||
# First few trades should not trigger anomalies
|
||||
for _ in range(5):
|
||||
anomalies = anomaly_detector.detect_trade_anomalies(sample_trade)
|
||||
assert isinstance(anomalies, list)
|
||||
|
||||
def test_price_spike_detection(self, anomaly_detector):
|
||||
"""Test price spike detection"""
|
||||
# Create normal order books
|
||||
for i in range(20):
|
||||
normal_orderbook = OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[PriceLevel(price=50000.0 + i, size=1.0)],
|
||||
asks=[PriceLevel(price=50001.0 + i, size=1.0)]
|
||||
)
|
||||
anomaly_detector.detect_orderbook_anomalies(normal_orderbook)
|
||||
|
||||
# Create order book with price spike
|
||||
spike_orderbook = OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="binance",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[PriceLevel(price=60000.0, size=1.0)], # 20% spike
|
||||
asks=[PriceLevel(price=60001.0, size=1.0)]
|
||||
)
|
||||
|
||||
anomalies = anomaly_detector.detect_orderbook_anomalies(spike_orderbook)
|
||||
assert len(anomalies) > 0
|
||||
assert any("spike" in anomaly.lower() for anomaly in anomalies)
|
||||
|
||||
|
||||
class TestMetricsCalculator:
|
||||
"""Test cases for MetricsCalculator"""
|
||||
|
||||
def test_orderbook_metrics_calculation(self, metrics_calculator, sample_orderbook):
|
||||
"""Test order book metrics calculation"""
|
||||
metrics = metrics_calculator.calculate_orderbook_metrics(sample_orderbook)
|
||||
|
||||
assert metrics.symbol == "BTCUSDT"
|
||||
assert metrics.exchange == "binance"
|
||||
assert metrics.mid_price == 50000.5 # (50000 + 50001) / 2
|
||||
assert metrics.spread == 1.0 # 50001 - 50000
|
||||
assert metrics.spread_percentage > 0
|
||||
assert metrics.bid_volume == 4.5 # 1.5 + 2.0 + 1.0
|
||||
assert metrics.ask_volume == 4.5 # 1.0 + 1.5 + 2.0
|
||||
assert metrics.volume_imbalance == 0.0 # Equal volumes
|
||||
|
||||
def test_imbalance_metrics_calculation(self, metrics_calculator, sample_orderbook):
|
||||
"""Test imbalance metrics calculation"""
|
||||
imbalance = metrics_calculator.calculate_imbalance_metrics(sample_orderbook)
|
||||
|
||||
assert imbalance.symbol == "BTCUSDT"
|
||||
assert -1.0 <= imbalance.volume_imbalance <= 1.0
|
||||
assert -1.0 <= imbalance.price_imbalance <= 1.0
|
||||
assert -1.0 <= imbalance.depth_imbalance <= 1.0
|
||||
assert -1.0 <= imbalance.momentum_score <= 1.0
|
||||
|
||||
def test_liquidity_score_calculation(self, metrics_calculator, sample_orderbook):
|
||||
"""Test liquidity score calculation"""
|
||||
liquidity_score = metrics_calculator.calculate_liquidity_score(sample_orderbook)
|
||||
|
||||
assert 0.0 <= liquidity_score <= 1.0
|
||||
assert liquidity_score > 0.5 # Good order book should have decent liquidity
|
||||
|
||||
|
||||
class TestStandardDataProcessor:
|
||||
"""Test cases for StandardDataProcessor"""
|
||||
|
||||
def test_data_validation(self, data_processor, sample_orderbook, sample_trade):
|
||||
"""Test data validation"""
|
||||
# Valid data should pass validation
|
||||
assert data_processor.validate_data(sample_orderbook) is True
|
||||
assert data_processor.validate_data(sample_trade) is True
|
||||
|
||||
def test_metrics_calculation(self, data_processor, sample_orderbook):
|
||||
"""Test metrics calculation through processor"""
|
||||
metrics = data_processor.calculate_metrics(sample_orderbook)
|
||||
|
||||
assert metrics.symbol == "BTCUSDT"
|
||||
assert metrics.mid_price > 0
|
||||
assert metrics.spread > 0
|
||||
|
||||
def test_anomaly_detection(self, data_processor, sample_orderbook, sample_trade):
|
||||
"""Test anomaly detection through processor"""
|
||||
orderbook_anomalies = data_processor.detect_anomalies(sample_orderbook)
|
||||
trade_anomalies = data_processor.detect_anomalies(sample_trade)
|
||||
|
||||
assert isinstance(orderbook_anomalies, list)
|
||||
assert isinstance(trade_anomalies, list)
|
||||
|
||||
def test_data_filtering(self, data_processor, sample_orderbook, sample_trade):
|
||||
"""Test data filtering"""
|
||||
# Test symbol filter
|
||||
criteria = {'symbols': ['BTCUSDT']}
|
||||
assert data_processor.filter_data(sample_orderbook, criteria) is True
|
||||
assert data_processor.filter_data(sample_trade, criteria) is True
|
||||
|
||||
criteria = {'symbols': ['ETHUSDT']}
|
||||
assert data_processor.filter_data(sample_orderbook, criteria) is False
|
||||
assert data_processor.filter_data(sample_trade, criteria) is False
|
||||
|
||||
# Test price range filter
|
||||
criteria = {'price_range': (40000, 60000)}
|
||||
assert data_processor.filter_data(sample_orderbook, criteria) is True
|
||||
assert data_processor.filter_data(sample_trade, criteria) is True
|
||||
|
||||
criteria = {'price_range': (60000, 70000)}
|
||||
assert data_processor.filter_data(sample_orderbook, criteria) is False
|
||||
assert data_processor.filter_data(sample_trade, criteria) is False
|
||||
|
||||
def test_data_enrichment(self, data_processor, sample_orderbook, sample_trade):
|
||||
"""Test data enrichment"""
|
||||
orderbook_enriched = data_processor.enrich_data(sample_orderbook)
|
||||
trade_enriched = data_processor.enrich_data(sample_trade)
|
||||
|
||||
# Check enriched data structure
|
||||
assert 'original_data' in orderbook_enriched
|
||||
assert 'quality_score' in orderbook_enriched
|
||||
assert 'anomalies' in orderbook_enriched
|
||||
assert 'processing_timestamp' in orderbook_enriched
|
||||
|
||||
assert 'original_data' in trade_enriched
|
||||
assert 'quality_score' in trade_enriched
|
||||
assert 'anomalies' in trade_enriched
|
||||
assert 'trade_value' in trade_enriched
|
||||
|
||||
def test_quality_score_calculation(self, data_processor, sample_orderbook, sample_trade):
|
||||
"""Test quality score calculation"""
|
||||
orderbook_score = data_processor.get_data_quality_score(sample_orderbook)
|
||||
trade_score = data_processor.get_data_quality_score(sample_trade)
|
||||
|
||||
assert 0.0 <= orderbook_score <= 1.0
|
||||
assert 0.0 <= trade_score <= 1.0
|
||||
|
||||
# Good data should have high quality scores
|
||||
assert orderbook_score > 0.8
|
||||
assert trade_score > 0.8
|
||||
|
||||
def test_processing_stats(self, data_processor, sample_orderbook, sample_trade):
|
||||
"""Test processing statistics"""
|
||||
# Process some data
|
||||
data_processor.validate_data(sample_orderbook)
|
||||
data_processor.validate_data(sample_trade)
|
||||
|
||||
stats = data_processor.get_processing_stats()
|
||||
|
||||
assert 'processed_orderbooks' in stats
|
||||
assert 'processed_trades' in stats
|
||||
assert 'quality_failures' in stats
|
||||
assert 'anomalies_detected' in stats
|
||||
assert stats['processed_orderbooks'] >= 1
|
||||
assert stats['processed_trades'] >= 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run simple tests
|
||||
processor = StandardDataProcessor()
|
||||
|
||||
# Test with sample data
|
||||
orderbook = OrderBookSnapshot(
|
||||
symbol="BTCUSDT",
|
||||
exchange="test",
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
bids=[PriceLevel(price=50000.0, size=1.0)],
|
||||
asks=[PriceLevel(price=50001.0, size=1.0)]
|
||||
)
|
||||
|
||||
# Test validation
|
||||
is_valid = processor.validate_data(orderbook)
|
||||
print(f"Order book validation: {'PASSED' if is_valid else 'FAILED'}")
|
||||
|
||||
# Test metrics
|
||||
metrics = processor.calculate_metrics(orderbook)
|
||||
print(f"Metrics calculation: mid_price={metrics.mid_price}, spread={metrics.spread}")
|
||||
|
||||
# Test quality score
|
||||
quality_score = processor.get_data_quality_score(orderbook)
|
||||
print(f"Quality score: {quality_score:.2f}")
|
||||
|
||||
print("Simple data processor test completed")
|
Reference in New Issue
Block a user