bucket aggregation

This commit is contained in:
Dobromir Popov
2025-08-04 17:28:55 +03:00
parent 504736c0f7
commit de77b0afa8
10 changed files with 2592 additions and 0 deletions

View File

@ -23,10 +23,15 @@
- _Requirements: 3.1, 3.2, 3.3, 3.4_ - _Requirements: 3.1, 3.2, 3.3, 3.4_
- [ ] 3. Create base exchange connector framework - [ ] 3. Create base exchange connector framework
- Implement abstract base class for exchange WebSocket connectors - Implement abstract base class for exchange WebSocket connectors
- Create connection management with exponential backoff and circuit breaker patterns - Create connection management with exponential backoff and circuit breaker patterns
- Implement WebSocket message handling with proper error recovery - Implement WebSocket message handling with proper error recovery
- Add connection status monitoring and health checks - Add connection status monitoring and health checks
- _Requirements: 1.1, 1.3, 1.4, 8.5_ - _Requirements: 1.1, 1.3, 1.4, 8.5_

View File

@ -0,0 +1,489 @@
"""
Binance exchange connector implementation.
"""
import json
from typing import Dict, List, Optional, Any
from datetime import datetime, timezone
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import ValidationError
from ..utils.validation import validate_symbol, validate_price, validate_volume
from .base_connector import BaseExchangeConnector
logger = get_logger(__name__)
class BinanceConnector(BaseExchangeConnector):
"""
Binance WebSocket connector implementation.
Supports:
- Order book depth streams
- Trade streams
- Symbol normalization
- Real-time data processing
"""
# Binance WebSocket URLs
WEBSOCKET_URL = "wss://stream.binance.com:9443/ws"
API_URL = "https://api.binance.com/api/v3"
def __init__(self):
"""Initialize Binance connector"""
super().__init__("binance", self.WEBSOCKET_URL)
# Binance-specific message handlers
self.message_handlers.update({
'depthUpdate': self._handle_orderbook_update,
'trade': self._handle_trade_update,
'error': self._handle_error_message
})
# Stream management
self.active_streams: List[str] = []
self.stream_id = 1
logger.info("Binance connector initialized")
def _get_message_type(self, data: Dict) -> str:
"""
Determine message type from Binance message data.
Args:
data: Parsed message data
Returns:
str: Message type identifier
"""
# Binance uses 'e' field for event type
if 'e' in data:
return data['e']
# Handle error messages
if 'error' in data:
return 'error'
# Handle subscription confirmations
if 'result' in data and 'id' in data:
return 'subscription_response'
return 'unknown'
def normalize_symbol(self, symbol: str) -> str:
"""
Normalize symbol to Binance format.
Args:
symbol: Standard symbol format (e.g., 'BTCUSDT')
Returns:
str: Binance symbol format (e.g., 'BTCUSDT')
"""
# Binance uses uppercase symbols without separators
normalized = symbol.upper().replace('-', '').replace('/', '')
# Validate symbol format
if not validate_symbol(normalized):
raise ValidationError(f"Invalid symbol format: {symbol}", "INVALID_SYMBOL")
return normalized
async def subscribe_orderbook(self, symbol: str) -> None:
"""
Subscribe to order book depth updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
set_correlation_id()
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@depth@100ms"
# Create subscription message
subscription_msg = {
"method": "SUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send subscription
success = await self._send_message(subscription_msg)
if success:
# Track subscription
if symbol not in self.subscriptions:
self.subscriptions[symbol] = []
if 'orderbook' not in self.subscriptions[symbol]:
self.subscriptions[symbol].append('orderbook')
self.active_streams.append(stream_name)
self.stream_id += 1
logger.info(f"Subscribed to order book for {symbol} on Binance")
else:
logger.error(f"Failed to subscribe to order book for {symbol} on Binance")
except Exception as e:
logger.error(f"Error subscribing to order book for {symbol}: {e}")
raise
async def subscribe_trades(self, symbol: str) -> None:
"""
Subscribe to trade updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
set_correlation_id()
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@trade"
# Create subscription message
subscription_msg = {
"method": "SUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send subscription
success = await self._send_message(subscription_msg)
if success:
# Track subscription
if symbol not in self.subscriptions:
self.subscriptions[symbol] = []
if 'trades' not in self.subscriptions[symbol]:
self.subscriptions[symbol].append('trades')
self.active_streams.append(stream_name)
self.stream_id += 1
logger.info(f"Subscribed to trades for {symbol} on Binance")
else:
logger.error(f"Failed to subscribe to trades for {symbol} on Binance")
except Exception as e:
logger.error(f"Error subscribing to trades for {symbol}: {e}")
raise
async def unsubscribe_orderbook(self, symbol: str) -> None:
"""
Unsubscribe from order book updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@depth@100ms"
# Create unsubscription message
unsubscription_msg = {
"method": "UNSUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send unsubscription
success = await self._send_message(unsubscription_msg)
if success:
# Remove from tracking
if symbol in self.subscriptions and 'orderbook' in self.subscriptions[symbol]:
self.subscriptions[symbol].remove('orderbook')
if not self.subscriptions[symbol]:
del self.subscriptions[symbol]
if stream_name in self.active_streams:
self.active_streams.remove(stream_name)
self.stream_id += 1
logger.info(f"Unsubscribed from order book for {symbol} on Binance")
else:
logger.error(f"Failed to unsubscribe from order book for {symbol} on Binance")
except Exception as e:
logger.error(f"Error unsubscribing from order book for {symbol}: {e}")
raise
async def unsubscribe_trades(self, symbol: str) -> None:
"""
Unsubscribe from trade updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@trade"
# Create unsubscription message
unsubscription_msg = {
"method": "UNSUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send unsubscription
success = await self._send_message(unsubscription_msg)
if success:
# Remove from tracking
if symbol in self.subscriptions and 'trades' in self.subscriptions[symbol]:
self.subscriptions[symbol].remove('trades')
if not self.subscriptions[symbol]:
del self.subscriptions[symbol]
if stream_name in self.active_streams:
self.active_streams.remove(stream_name)
self.stream_id += 1
logger.info(f"Unsubscribed from trades for {symbol} on Binance")
else:
logger.error(f"Failed to unsubscribe from trades for {symbol} on Binance")
except Exception as e:
logger.error(f"Error unsubscribing from trades for {symbol}: {e}")
raise
async def get_symbols(self) -> List[str]:
"""
Get list of available trading symbols from Binance.
Returns:
List[str]: List of available symbols
"""
try:
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get(f"{self.API_URL}/exchangeInfo") as response:
if response.status == 200:
data = await response.json()
symbols = [
symbol_info['symbol']
for symbol_info in data.get('symbols', [])
if symbol_info.get('status') == 'TRADING'
]
logger.info(f"Retrieved {len(symbols)} symbols from Binance")
return symbols
else:
logger.error(f"Failed to get symbols from Binance: HTTP {response.status}")
return []
except Exception as e:
logger.error(f"Error getting symbols from Binance: {e}")
return []
async def get_orderbook_snapshot(self, symbol: str, depth: int = 20) -> Optional[OrderBookSnapshot]:
"""
Get current order book snapshot from Binance REST API.
Args:
symbol: Trading symbol
depth: Number of price levels to retrieve
Returns:
OrderBookSnapshot: Current order book or None if unavailable
"""
try:
import aiohttp
normalized_symbol = self.normalize_symbol(symbol)
# Binance supports depths: 5, 10, 20, 50, 100, 500, 1000, 5000
valid_depths = [5, 10, 20, 50, 100, 500, 1000, 5000]
api_depth = min(valid_depths, key=lambda x: abs(x - depth))
url = f"{self.API_URL}/depth"
params = {
'symbol': normalized_symbol,
'limit': api_depth
}
async with aiohttp.ClientSession() as session:
async with session.get(url, params=params) as response:
if response.status == 200:
data = await response.json()
return self._parse_orderbook_snapshot(data, symbol)
else:
logger.error(f"Failed to get order book for {symbol}: HTTP {response.status}")
return None
except Exception as e:
logger.error(f"Error getting order book snapshot for {symbol}: {e}")
return None
def _parse_orderbook_snapshot(self, data: Dict, symbol: str) -> OrderBookSnapshot:
"""
Parse Binance order book data into OrderBookSnapshot.
Args:
data: Raw Binance order book data
symbol: Trading symbol
Returns:
OrderBookSnapshot: Parsed order book
"""
try:
# Parse bids and asks
bids = []
for bid_data in data.get('bids', []):
price = float(bid_data[0])
size = float(bid_data[1])
if validate_price(price) and validate_volume(size):
bids.append(PriceLevel(price=price, size=size))
asks = []
for ask_data in data.get('asks', []):
price = float(ask_data[0])
size = float(ask_data[1])
if validate_price(price) and validate_volume(size):
asks.append(PriceLevel(price=price, size=size))
# Create order book snapshot
orderbook = OrderBookSnapshot(
symbol=symbol,
exchange=self.exchange_name,
timestamp=datetime.now(timezone.utc),
bids=bids,
asks=asks,
sequence_id=data.get('lastUpdateId')
)
return orderbook
except Exception as e:
logger.error(f"Error parsing order book snapshot: {e}")
raise ValidationError(f"Invalid order book data: {e}", "PARSE_ERROR")
async def _handle_orderbook_update(self, data: Dict) -> None:
"""
Handle order book depth update from Binance.
Args:
data: Order book update data
"""
try:
set_correlation_id()
# Extract symbol from stream name
stream = data.get('s', '').upper()
if not stream:
logger.warning("Order book update missing symbol")
return
# Parse bids and asks
bids = []
for bid_data in data.get('b', []):
price = float(bid_data[0])
size = float(bid_data[1])
if validate_price(price) and validate_volume(size):
bids.append(PriceLevel(price=price, size=size))
asks = []
for ask_data in data.get('a', []):
price = float(ask_data[0])
size = float(ask_data[1])
if validate_price(price) and validate_volume(size):
asks.append(PriceLevel(price=price, size=size))
# Create order book snapshot
orderbook = OrderBookSnapshot(
symbol=stream,
exchange=self.exchange_name,
timestamp=datetime.fromtimestamp(data.get('E', 0) / 1000, tz=timezone.utc),
bids=bids,
asks=asks,
sequence_id=data.get('u') # Final update ID
)
# Notify callbacks
self._notify_data_callbacks(orderbook)
logger.debug(f"Processed order book update for {stream}")
except Exception as e:
logger.error(f"Error handling order book update: {e}")
async def _handle_trade_update(self, data: Dict) -> None:
"""
Handle trade update from Binance.
Args:
data: Trade update data
"""
try:
set_correlation_id()
# Extract trade data
symbol = data.get('s', '').upper()
if not symbol:
logger.warning("Trade update missing symbol")
return
price = float(data.get('p', 0))
size = float(data.get('q', 0))
# Validate data
if not validate_price(price) or not validate_volume(size):
logger.warning(f"Invalid trade data: price={price}, size={size}")
return
# Determine side (Binance uses 'm' field - true if buyer is market maker)
is_buyer_maker = data.get('m', False)
side = 'sell' if is_buyer_maker else 'buy'
# Create trade event
trade = TradeEvent(
symbol=symbol,
exchange=self.exchange_name,
timestamp=datetime.fromtimestamp(data.get('T', 0) / 1000, tz=timezone.utc),
price=price,
size=size,
side=side,
trade_id=str(data.get('t', ''))
)
# Notify callbacks
self._notify_data_callbacks(trade)
logger.debug(f"Processed trade for {symbol}: {side} {size} @ {price}")
except Exception as e:
logger.error(f"Error handling trade update: {e}")
async def _handle_error_message(self, data: Dict) -> None:
"""
Handle error message from Binance.
Args:
data: Error message data
"""
error_code = data.get('code', 'unknown')
error_msg = data.get('msg', 'Unknown error')
logger.error(f"Binance error {error_code}: {error_msg}")
# Handle specific error codes
if error_code == -1121: # Invalid symbol
logger.error("Invalid symbol error - check symbol format")
elif error_code == -1130: # Invalid listen key
logger.error("Invalid listen key - may need to reconnect")
def get_binance_stats(self) -> Dict[str, Any]:
"""Get Binance-specific statistics"""
base_stats = self.get_stats()
binance_stats = {
'active_streams': len(self.active_streams),
'stream_list': self.active_streams.copy(),
'next_stream_id': self.stream_id
}
base_stats.update(binance_stats)
return base_stats

View File

@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""
Example usage of Binance connector.
"""
import asyncio
import sys
from pathlib import Path
# Add COBY to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from connectors.binance_connector import BinanceConnector
from utils.logging import setup_logging, get_logger
from models.core import OrderBookSnapshot, TradeEvent
# Setup logging
setup_logging(level='INFO', console_output=True)
logger = get_logger(__name__)
class BinanceExample:
"""Example Binance connector usage"""
def __init__(self):
self.connector = BinanceConnector()
self.orderbook_count = 0
self.trade_count = 0
# Add data callbacks
self.connector.add_data_callback(self.on_data_received)
self.connector.add_status_callback(self.on_status_changed)
def on_data_received(self, data):
"""Handle received data"""
if isinstance(data, OrderBookSnapshot):
self.orderbook_count += 1
logger.info(
f"📊 Order Book {self.orderbook_count}: {data.symbol} - "
f"Mid: ${data.mid_price:.2f}, Spread: ${data.spread:.2f}, "
f"Bids: {len(data.bids)}, Asks: {len(data.asks)}"
)
elif isinstance(data, TradeEvent):
self.trade_count += 1
logger.info(
f"💰 Trade {self.trade_count}: {data.symbol} - "
f"{data.side.upper()} {data.size} @ ${data.price:.2f}"
)
def on_status_changed(self, exchange, status):
"""Handle status changes"""
logger.info(f"🔄 {exchange} status changed to: {status.value}")
async def run_example(self):
"""Run the example"""
try:
logger.info("🚀 Starting Binance connector example")
# Connect to Binance
logger.info("🔌 Connecting to Binance...")
connected = await self.connector.connect()
if not connected:
logger.error("❌ Failed to connect to Binance")
return
logger.info("✅ Connected to Binance successfully")
# Get available symbols
logger.info("📋 Getting available symbols...")
symbols = await self.connector.get_symbols()
logger.info(f"📋 Found {len(symbols)} trading symbols")
# Show some popular symbols
popular_symbols = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'BNBUSDT']
available_popular = [s for s in popular_symbols if s in symbols]
logger.info(f"📋 Popular symbols available: {available_popular}")
# Get order book snapshot
if 'BTCUSDT' in symbols:
logger.info("📊 Getting BTC order book snapshot...")
orderbook = await self.connector.get_orderbook_snapshot('BTCUSDT', depth=10)
if orderbook:
logger.info(
f"📊 BTC Order Book: Mid=${orderbook.mid_price:.2f}, "
f"Spread=${orderbook.spread:.2f}"
)
# Subscribe to real-time data
logger.info("🔔 Subscribing to real-time data...")
# Subscribe to BTC order book and trades
if 'BTCUSDT' in symbols:
await self.connector.subscribe_orderbook('BTCUSDT')
await self.connector.subscribe_trades('BTCUSDT')
logger.info("✅ Subscribed to BTCUSDT order book and trades")
# Subscribe to ETH order book
if 'ETHUSDT' in symbols:
await self.connector.subscribe_orderbook('ETHUSDT')
logger.info("✅ Subscribed to ETHUSDT order book")
# Let it run for a while
logger.info("⏳ Collecting data for 30 seconds...")
await asyncio.sleep(30)
# Show statistics
stats = self.connector.get_binance_stats()
logger.info("📈 Final Statistics:")
logger.info(f" 📊 Order books received: {self.orderbook_count}")
logger.info(f" 💰 Trades received: {self.trade_count}")
logger.info(f" 📡 Total messages: {stats['message_count']}")
logger.info(f" ❌ Errors: {stats['error_count']}")
logger.info(f" 🔗 Active streams: {stats['active_streams']}")
logger.info(f" 📋 Subscriptions: {list(stats['subscriptions'].keys())}")
# Unsubscribe and disconnect
logger.info("🔌 Cleaning up...")
if 'BTCUSDT' in self.connector.subscriptions:
await self.connector.unsubscribe_orderbook('BTCUSDT')
await self.connector.unsubscribe_trades('BTCUSDT')
if 'ETHUSDT' in self.connector.subscriptions:
await self.connector.unsubscribe_orderbook('ETHUSDT')
await self.connector.disconnect()
logger.info("✅ Disconnected successfully")
except KeyboardInterrupt:
logger.info("⏹️ Interrupted by user")
except Exception as e:
logger.error(f"❌ Example failed: {e}")
finally:
# Ensure cleanup
try:
await self.connector.disconnect()
except:
pass
async def main():
"""Main function"""
example = BinanceExample()
await example.run_example()
if __name__ == "__main__":
print("Binance Connector Example")
print("=" * 25)
print("This example will:")
print("1. Connect to Binance WebSocket")
print("2. Get available trading symbols")
print("3. Subscribe to real-time order book and trade data")
print("4. Display received data for 30 seconds")
print("5. Show statistics and disconnect")
print()
print("Press Ctrl+C to stop early")
print("=" * 25)
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\n👋 Example stopped by user")
except Exception as e:
print(f"\n❌ Example failed: {e}")
sys.exit(1)

View File

@ -0,0 +1,15 @@
"""
Data processing and normalization components for the COBY system.
"""
from .data_processor import StandardDataProcessor
from .quality_checker import DataQualityChecker
from .anomaly_detector import AnomalyDetector
from .metrics_calculator import MetricsCalculator
__all__ = [
'StandardDataProcessor',
'DataQualityChecker',
'AnomalyDetector',
'MetricsCalculator'
]

View File

@ -0,0 +1,329 @@
"""
Anomaly detection for market data.
"""
import statistics
from typing import Dict, List, Union, Optional, Deque
from collections import deque
from datetime import datetime, timedelta
from ..models.core import OrderBookSnapshot, TradeEvent
from ..utils.logging import get_logger
from ..utils.timing import get_current_timestamp
logger = get_logger(__name__)
class AnomalyDetector:
"""
Detects anomalies in market data using statistical methods.
Detects:
- Price spikes and drops
- Volume anomalies
- Spread anomalies
- Frequency anomalies
"""
def __init__(self, window_size: int = 100, z_score_threshold: float = 3.0):
"""
Initialize anomaly detector.
Args:
window_size: Size of rolling window for statistics
z_score_threshold: Z-score threshold for anomaly detection
"""
self.window_size = window_size
self.z_score_threshold = z_score_threshold
# Rolling windows for statistics
self.price_windows: Dict[str, Deque[float]] = {}
self.volume_windows: Dict[str, Deque[float]] = {}
self.spread_windows: Dict[str, Deque[float]] = {}
self.timestamp_windows: Dict[str, Deque[datetime]] = {}
logger.info(f"Anomaly detector initialized with window_size={window_size}, threshold={z_score_threshold}")
def detect_orderbook_anomalies(self, orderbook: OrderBookSnapshot) -> List[str]:
"""
Detect anomalies in order book data.
Args:
orderbook: Order book snapshot to analyze
Returns:
List[str]: List of detected anomalies
"""
anomalies = []
key = f"{orderbook.symbol}_{orderbook.exchange}"
try:
# Price anomalies
if orderbook.mid_price:
price_anomalies = self._detect_price_anomalies(key, orderbook.mid_price)
anomalies.extend(price_anomalies)
# Volume anomalies
total_volume = orderbook.bid_volume + orderbook.ask_volume
volume_anomalies = self._detect_volume_anomalies(key, total_volume)
anomalies.extend(volume_anomalies)
# Spread anomalies
if orderbook.spread and orderbook.mid_price:
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
spread_anomalies = self._detect_spread_anomalies(key, spread_pct)
anomalies.extend(spread_anomalies)
# Frequency anomalies
frequency_anomalies = self._detect_frequency_anomalies(key, orderbook.timestamp)
anomalies.extend(frequency_anomalies)
# Update windows
self._update_windows(key, orderbook)
except Exception as e:
logger.error(f"Error detecting order book anomalies: {e}")
anomalies.append(f"Anomaly detection error: {e}")
if anomalies:
logger.warning(f"Anomalies detected in {orderbook.symbol}@{orderbook.exchange}: {anomalies}")
return anomalies
def detect_trade_anomalies(self, trade: TradeEvent) -> List[str]:
"""
Detect anomalies in trade data.
Args:
trade: Trade event to analyze
Returns:
List[str]: List of detected anomalies
"""
anomalies = []
key = f"{trade.symbol}_{trade.exchange}_trade"
try:
# Price anomalies
price_anomalies = self._detect_price_anomalies(key, trade.price)
anomalies.extend(price_anomalies)
# Volume anomalies
volume_anomalies = self._detect_volume_anomalies(key, trade.size)
anomalies.extend(volume_anomalies)
# Update windows
self._update_trade_windows(key, trade)
except Exception as e:
logger.error(f"Error detecting trade anomalies: {e}")
anomalies.append(f"Anomaly detection error: {e}")
if anomalies:
logger.warning(f"Trade anomalies detected in {trade.symbol}@{trade.exchange}: {anomalies}")
return anomalies
def _detect_price_anomalies(self, key: str, price: float) -> List[str]:
"""Detect price anomalies using z-score"""
anomalies = []
if key not in self.price_windows:
self.price_windows[key] = deque(maxlen=self.window_size)
return anomalies
window = self.price_windows[key]
if len(window) < 10: # Need minimum data points
return anomalies
try:
mean_price = statistics.mean(window)
std_price = statistics.stdev(window)
if std_price > 0:
z_score = abs(price - mean_price) / std_price
if z_score > self.z_score_threshold:
direction = "spike" if price > mean_price else "drop"
anomalies.append(f"Price {direction}: {price:.6f} (z-score: {z_score:.2f})")
except statistics.StatisticsError:
pass # Not enough data or all values are the same
return anomalies
def _detect_volume_anomalies(self, key: str, volume: float) -> List[str]:
"""Detect volume anomalies using z-score"""
anomalies = []
volume_key = f"{key}_volume"
if volume_key not in self.volume_windows:
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
return anomalies
window = self.volume_windows[volume_key]
if len(window) < 10:
return anomalies
try:
mean_volume = statistics.mean(window)
std_volume = statistics.stdev(window)
if std_volume > 0:
z_score = abs(volume - mean_volume) / std_volume
if z_score > self.z_score_threshold:
direction = "spike" if volume > mean_volume else "drop"
anomalies.append(f"Volume {direction}: {volume:.6f} (z-score: {z_score:.2f})")
except statistics.StatisticsError:
pass
return anomalies
def _detect_spread_anomalies(self, key: str, spread_pct: float) -> List[str]:
"""Detect spread anomalies using z-score"""
anomalies = []
spread_key = f"{key}_spread"
if spread_key not in self.spread_windows:
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
return anomalies
window = self.spread_windows[spread_key]
if len(window) < 10:
return anomalies
try:
mean_spread = statistics.mean(window)
std_spread = statistics.stdev(window)
if std_spread > 0:
z_score = abs(spread_pct - mean_spread) / std_spread
if z_score > self.z_score_threshold:
direction = "widening" if spread_pct > mean_spread else "tightening"
anomalies.append(f"Spread {direction}: {spread_pct:.4f}% (z-score: {z_score:.2f})")
except statistics.StatisticsError:
pass
return anomalies
def _detect_frequency_anomalies(self, key: str, timestamp: datetime) -> List[str]:
"""Detect frequency anomalies in data updates"""
anomalies = []
timestamp_key = f"{key}_timestamp"
if timestamp_key not in self.timestamp_windows:
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
return anomalies
window = self.timestamp_windows[timestamp_key]
if len(window) < 5:
return anomalies
try:
# Calculate intervals between updates
intervals = []
for i in range(1, len(window)):
interval = (window[i] - window[i-1]).total_seconds()
intervals.append(interval)
if len(intervals) >= 5:
mean_interval = statistics.mean(intervals)
std_interval = statistics.stdev(intervals)
# Check current interval
current_interval = (timestamp - window[-1]).total_seconds()
if std_interval > 0:
z_score = abs(current_interval - mean_interval) / std_interval
if z_score > self.z_score_threshold:
if current_interval > mean_interval:
anomalies.append(f"Update delay: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
else:
anomalies.append(f"Update burst: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
except (statistics.StatisticsError, IndexError):
pass
return anomalies
def _update_windows(self, key: str, orderbook: OrderBookSnapshot) -> None:
"""Update rolling windows with new data"""
# Update price window
if orderbook.mid_price:
if key not in self.price_windows:
self.price_windows[key] = deque(maxlen=self.window_size)
self.price_windows[key].append(orderbook.mid_price)
# Update volume window
total_volume = orderbook.bid_volume + orderbook.ask_volume
volume_key = f"{key}_volume"
if volume_key not in self.volume_windows:
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
self.volume_windows[volume_key].append(total_volume)
# Update spread window
if orderbook.spread and orderbook.mid_price:
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
spread_key = f"{key}_spread"
if spread_key not in self.spread_windows:
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
self.spread_windows[spread_key].append(spread_pct)
# Update timestamp window
timestamp_key = f"{key}_timestamp"
if timestamp_key not in self.timestamp_windows:
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
self.timestamp_windows[timestamp_key].append(orderbook.timestamp)
def _update_trade_windows(self, key: str, trade: TradeEvent) -> None:
"""Update rolling windows with trade data"""
# Update price window
if key not in self.price_windows:
self.price_windows[key] = deque(maxlen=self.window_size)
self.price_windows[key].append(trade.price)
# Update volume window
volume_key = f"{key}_volume"
if volume_key not in self.volume_windows:
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
self.volume_windows[volume_key].append(trade.size)
def get_statistics(self) -> Dict[str, Dict[str, float]]:
"""Get current statistics for all tracked symbols"""
stats = {}
for key, window in self.price_windows.items():
if len(window) >= 2:
try:
stats[key] = {
'price_mean': statistics.mean(window),
'price_std': statistics.stdev(window),
'price_min': min(window),
'price_max': max(window),
'data_points': len(window)
}
except statistics.StatisticsError:
stats[key] = {'error': 'insufficient_data'}
return stats
def reset_windows(self, key: Optional[str] = None) -> None:
"""Reset rolling windows for a specific key or all keys"""
if key:
# Reset specific key
self.price_windows.pop(key, None)
self.volume_windows.pop(f"{key}_volume", None)
self.spread_windows.pop(f"{key}_spread", None)
self.timestamp_windows.pop(f"{key}_timestamp", None)
else:
# Reset all windows
self.price_windows.clear()
self.volume_windows.clear()
self.spread_windows.clear()
self.timestamp_windows.clear()
logger.info(f"Reset anomaly detection windows for {key or 'all keys'}")

View File

@ -0,0 +1,378 @@
"""
Main data processor implementation.
"""
from typing import Dict, Union, List, Optional, Any
from ..interfaces.data_processor import DataProcessor
from ..models.core import OrderBookSnapshot, TradeEvent, OrderBookMetrics
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import ValidationError, ProcessingError
from ..utils.timing import get_current_timestamp
from .quality_checker import DataQualityChecker
from .anomaly_detector import AnomalyDetector
from .metrics_calculator import MetricsCalculator
logger = get_logger(__name__)
class StandardDataProcessor(DataProcessor):
"""
Standard implementation of data processor interface.
Provides:
- Data normalization and validation
- Quality checking
- Anomaly detection
- Metrics calculation
- Data enrichment
"""
def __init__(self):
"""Initialize data processor with components"""
self.quality_checker = DataQualityChecker()
self.anomaly_detector = AnomalyDetector()
self.metrics_calculator = MetricsCalculator()
# Processing statistics
self.processed_orderbooks = 0
self.processed_trades = 0
self.quality_failures = 0
self.anomalies_detected = 0
logger.info("Standard data processor initialized")
def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot:
"""
Normalize raw order book data to standard format.
Args:
raw_data: Raw order book data from exchange
exchange: Exchange name
Returns:
OrderBookSnapshot: Normalized order book data
"""
try:
set_correlation_id()
# This is a generic implementation - specific exchanges would override
# For now, assume data is already in correct format
if isinstance(raw_data, OrderBookSnapshot):
return raw_data
# If raw_data is a dict, try to construct OrderBookSnapshot
# This would be customized per exchange
raise NotImplementedError(
"normalize_orderbook should be implemented by exchange-specific processors"
)
except Exception as e:
logger.error(f"Error normalizing order book data: {e}")
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent:
"""
Normalize raw trade data to standard format.
Args:
raw_data: Raw trade data from exchange
exchange: Exchange name
Returns:
TradeEvent: Normalized trade data
"""
try:
set_correlation_id()
# This is a generic implementation - specific exchanges would override
if isinstance(raw_data, TradeEvent):
return raw_data
# If raw_data is a dict, try to construct TradeEvent
# This would be customized per exchange
raise NotImplementedError(
"normalize_trade should be implemented by exchange-specific processors"
)
except Exception as e:
logger.error(f"Error normalizing trade data: {e}")
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool:
"""
Validate normalized data for quality and consistency.
Args:
data: Normalized data to validate
Returns:
bool: True if data is valid, False otherwise
"""
try:
set_correlation_id()
if isinstance(data, OrderBookSnapshot):
quality_score, issues = self.quality_checker.check_orderbook_quality(data)
self.processed_orderbooks += 1
if quality_score < 0.5: # Threshold for acceptable quality
self.quality_failures += 1
logger.warning(f"Low quality order book data: score={quality_score:.2f}, issues={issues}")
return False
return True
elif isinstance(data, TradeEvent):
quality_score, issues = self.quality_checker.check_trade_quality(data)
self.processed_trades += 1
if quality_score < 0.5:
self.quality_failures += 1
logger.warning(f"Low quality trade data: score={quality_score:.2f}, issues={issues}")
return False
return True
else:
logger.error(f"Unknown data type for validation: {type(data)}")
return False
except Exception as e:
logger.error(f"Error validating data: {e}")
return False
def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
"""
Calculate metrics from order book data.
Args:
orderbook: Order book snapshot
Returns:
OrderBookMetrics: Calculated metrics
"""
try:
set_correlation_id()
return self.metrics_calculator.calculate_orderbook_metrics(orderbook)
except Exception as e:
logger.error(f"Error calculating metrics: {e}")
raise ProcessingError(f"Metrics calculation failed: {e}", "METRICS_ERROR")
def detect_anomalies(self, data: Union[OrderBookSnapshot, TradeEvent]) -> List[str]:
"""
Detect anomalies in the data.
Args:
data: Data to analyze for anomalies
Returns:
List[str]: List of detected anomaly descriptions
"""
try:
set_correlation_id()
if isinstance(data, OrderBookSnapshot):
anomalies = self.anomaly_detector.detect_orderbook_anomalies(data)
elif isinstance(data, TradeEvent):
anomalies = self.anomaly_detector.detect_trade_anomalies(data)
else:
logger.error(f"Unknown data type for anomaly detection: {type(data)}")
return ["Unknown data type"]
if anomalies:
self.anomalies_detected += len(anomalies)
return anomalies
except Exception as e:
logger.error(f"Error detecting anomalies: {e}")
return [f"Anomaly detection error: {e}"]
def filter_data(self, data: Union[OrderBookSnapshot, TradeEvent], criteria: Dict) -> bool:
"""
Filter data based on criteria.
Args:
data: Data to filter
criteria: Filtering criteria
Returns:
bool: True if data passes filter, False otherwise
"""
try:
set_correlation_id()
# Symbol filter
if 'symbols' in criteria:
allowed_symbols = criteria['symbols']
if data.symbol not in allowed_symbols:
return False
# Exchange filter
if 'exchanges' in criteria:
allowed_exchanges = criteria['exchanges']
if data.exchange not in allowed_exchanges:
return False
# Quality filter
if 'min_quality' in criteria:
min_quality = criteria['min_quality']
if isinstance(data, OrderBookSnapshot):
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
elif isinstance(data, TradeEvent):
quality_score, _ = self.quality_checker.check_trade_quality(data)
else:
quality_score = 0.0
if quality_score < min_quality:
return False
# Price range filter
if 'price_range' in criteria:
price_range = criteria['price_range']
min_price, max_price = price_range
if isinstance(data, OrderBookSnapshot):
price = data.mid_price
elif isinstance(data, TradeEvent):
price = data.price
else:
return False
if price and (price < min_price or price > max_price):
return False
# Volume filter for trades
if 'min_volume' in criteria and isinstance(data, TradeEvent):
min_volume = criteria['min_volume']
if data.size < min_volume:
return False
return True
except Exception as e:
logger.error(f"Error filtering data: {e}")
return False
def enrich_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> Dict:
"""
Enrich data with additional metadata.
Args:
data: Data to enrich
Returns:
Dict: Enriched data with metadata
"""
try:
set_correlation_id()
enriched = {
'original_data': data,
'processing_timestamp': get_current_timestamp(),
'processor_version': '1.0.0'
}
# Add quality metrics
if isinstance(data, OrderBookSnapshot):
quality_score, quality_issues = self.quality_checker.check_orderbook_quality(data)
enriched['quality_score'] = quality_score
enriched['quality_issues'] = quality_issues
# Add calculated metrics
try:
metrics = self.calculate_metrics(data)
enriched['metrics'] = {
'mid_price': metrics.mid_price,
'spread': metrics.spread,
'spread_percentage': metrics.spread_percentage,
'volume_imbalance': metrics.volume_imbalance,
'depth_10': metrics.depth_10,
'depth_50': metrics.depth_50
}
except Exception as e:
enriched['metrics_error'] = str(e)
# Add liquidity score
try:
liquidity_score = self.metrics_calculator.calculate_liquidity_score(data)
enriched['liquidity_score'] = liquidity_score
except Exception as e:
enriched['liquidity_error'] = str(e)
elif isinstance(data, TradeEvent):
quality_score, quality_issues = self.quality_checker.check_trade_quality(data)
enriched['quality_score'] = quality_score
enriched['quality_issues'] = quality_issues
# Add trade-specific enrichments
enriched['trade_value'] = data.price * data.size
enriched['side_numeric'] = 1 if data.side == 'buy' else -1
# Add anomaly detection results
anomalies = self.detect_anomalies(data)
enriched['anomalies'] = anomalies
enriched['anomaly_count'] = len(anomalies)
return enriched
except Exception as e:
logger.error(f"Error enriching data: {e}")
return {
'original_data': data,
'enrichment_error': str(e)
}
def get_data_quality_score(self, data: Union[OrderBookSnapshot, TradeEvent]) -> float:
"""
Calculate data quality score.
Args:
data: Data to score
Returns:
float: Quality score between 0.0 and 1.0
"""
try:
set_correlation_id()
if isinstance(data, OrderBookSnapshot):
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
elif isinstance(data, TradeEvent):
quality_score, _ = self.quality_checker.check_trade_quality(data)
else:
logger.error(f"Unknown data type for quality scoring: {type(data)}")
return 0.0
return quality_score
except Exception as e:
logger.error(f"Error calculating quality score: {e}")
return 0.0
def get_processing_stats(self) -> Dict[str, Any]:
"""Get processing statistics"""
return {
'processed_orderbooks': self.processed_orderbooks,
'processed_trades': self.processed_trades,
'quality_failures': self.quality_failures,
'anomalies_detected': self.anomalies_detected,
'quality_failure_rate': (
self.quality_failures / max(1, self.processed_orderbooks + self.processed_trades)
),
'anomaly_rate': (
self.anomalies_detected / max(1, self.processed_orderbooks + self.processed_trades)
),
'quality_checker_summary': self.quality_checker.get_quality_summary(),
'anomaly_detector_stats': self.anomaly_detector.get_statistics()
}
def reset_stats(self) -> None:
"""Reset processing statistics"""
self.processed_orderbooks = 0
self.processed_trades = 0
self.quality_failures = 0
self.anomalies_detected = 0
logger.info("Processing statistics reset")

View File

@ -0,0 +1,275 @@
"""
Metrics calculation for order book analysis.
"""
from typing import Dict, List, Optional
from ..models.core import OrderBookSnapshot, OrderBookMetrics, ImbalanceMetrics
from ..utils.logging import get_logger
logger = get_logger(__name__)
class MetricsCalculator:
"""
Calculates various metrics from order book data.
Metrics include:
- Basic metrics (mid price, spread, volumes)
- Imbalance metrics
- Depth metrics
- Liquidity metrics
"""
def __init__(self):
"""Initialize metrics calculator"""
logger.info("Metrics calculator initialized")
def calculate_orderbook_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
"""
Calculate comprehensive order book metrics.
Args:
orderbook: Order book snapshot
Returns:
OrderBookMetrics: Calculated metrics
"""
try:
# Basic calculations
mid_price = self._calculate_mid_price(orderbook)
spread = self._calculate_spread(orderbook)
spread_percentage = (spread / mid_price * 100) if mid_price > 0 else 0.0
# Volume calculations
bid_volume = sum(level.size for level in orderbook.bids)
ask_volume = sum(level.size for level in orderbook.asks)
# Imbalance calculation
total_volume = bid_volume + ask_volume
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
# Depth calculations
depth_10 = self._calculate_depth(orderbook, 10)
depth_50 = self._calculate_depth(orderbook, 50)
return OrderBookMetrics(
symbol=orderbook.symbol,
exchange=orderbook.exchange,
timestamp=orderbook.timestamp,
mid_price=mid_price,
spread=spread,
spread_percentage=spread_percentage,
bid_volume=bid_volume,
ask_volume=ask_volume,
volume_imbalance=volume_imbalance,
depth_10=depth_10,
depth_50=depth_50
)
except Exception as e:
logger.error(f"Error calculating order book metrics: {e}")
raise
def calculate_imbalance_metrics(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics:
"""
Calculate order book imbalance metrics.
Args:
orderbook: Order book snapshot
Returns:
ImbalanceMetrics: Calculated imbalance metrics
"""
try:
# Volume imbalance
bid_volume = sum(level.size for level in orderbook.bids)
ask_volume = sum(level.size for level in orderbook.asks)
total_volume = bid_volume + ask_volume
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
# Price imbalance (weighted by volume)
price_imbalance = self._calculate_price_imbalance(orderbook)
# Depth imbalance
depth_imbalance = self._calculate_depth_imbalance(orderbook)
# Momentum score (simplified - would need historical data for full implementation)
momentum_score = volume_imbalance * 0.5 + price_imbalance * 0.3 + depth_imbalance * 0.2
return ImbalanceMetrics(
symbol=orderbook.symbol,
timestamp=orderbook.timestamp,
volume_imbalance=volume_imbalance,
price_imbalance=price_imbalance,
depth_imbalance=depth_imbalance,
momentum_score=momentum_score
)
except Exception as e:
logger.error(f"Error calculating imbalance metrics: {e}")
raise
def _calculate_mid_price(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate mid price"""
if not orderbook.bids or not orderbook.asks:
return 0.0
best_bid = orderbook.bids[0].price
best_ask = orderbook.asks[0].price
return (best_bid + best_ask) / 2.0
def _calculate_spread(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate bid-ask spread"""
if not orderbook.bids or not orderbook.asks:
return 0.0
best_bid = orderbook.bids[0].price
best_ask = orderbook.asks[0].price
return best_ask - best_bid
def _calculate_depth(self, orderbook: OrderBookSnapshot, levels: int) -> float:
"""Calculate market depth for specified number of levels"""
bid_depth = sum(
level.size for level in orderbook.bids[:levels]
)
ask_depth = sum(
level.size for level in orderbook.asks[:levels]
)
return bid_depth + ask_depth
def _calculate_price_imbalance(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate price-weighted imbalance"""
if not orderbook.bids or not orderbook.asks:
return 0.0
# Calculate volume-weighted average prices for top levels
bid_vwap = self._calculate_vwap(orderbook.bids[:5])
ask_vwap = self._calculate_vwap(orderbook.asks[:5])
if bid_vwap == 0 or ask_vwap == 0:
return 0.0
mid_price = (bid_vwap + ask_vwap) / 2.0
# Normalize imbalance
price_imbalance = (bid_vwap - ask_vwap) / mid_price if mid_price > 0 else 0.0
return max(-1.0, min(1.0, price_imbalance))
def _calculate_depth_imbalance(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate depth imbalance across multiple levels"""
levels_to_check = [5, 10, 20]
imbalances = []
for levels in levels_to_check:
bid_depth = sum(level.size for level in orderbook.bids[:levels])
ask_depth = sum(level.size for level in orderbook.asks[:levels])
total_depth = bid_depth + ask_depth
if total_depth > 0:
imbalance = (bid_depth - ask_depth) / total_depth
imbalances.append(imbalance)
# Return weighted average of imbalances
if imbalances:
return sum(imbalances) / len(imbalances)
return 0.0
def _calculate_vwap(self, levels: List) -> float:
"""Calculate volume-weighted average price for price levels"""
if not levels:
return 0.0
total_volume = sum(level.size for level in levels)
if total_volume == 0:
return 0.0
weighted_sum = sum(level.price * level.size for level in levels)
return weighted_sum / total_volume
def calculate_liquidity_score(self, orderbook: OrderBookSnapshot) -> float:
"""
Calculate liquidity score based on depth and spread.
Args:
orderbook: Order book snapshot
Returns:
float: Liquidity score (0.0 to 1.0)
"""
try:
if not orderbook.bids or not orderbook.asks:
return 0.0
# Spread component (lower spread = higher liquidity)
spread = self._calculate_spread(orderbook)
mid_price = self._calculate_mid_price(orderbook)
if mid_price == 0:
return 0.0
spread_pct = (spread / mid_price) * 100
spread_score = max(0.0, 1.0 - (spread_pct / 5.0)) # Normalize to 5% max spread
# Depth component (higher depth = higher liquidity)
total_depth = self._calculate_depth(orderbook, 10)
depth_score = min(1.0, total_depth / 100.0) # Normalize to 100 units max depth
# Volume balance component (more balanced = higher liquidity)
bid_volume = sum(level.size for level in orderbook.bids[:10])
ask_volume = sum(level.size for level in orderbook.asks[:10])
total_volume = bid_volume + ask_volume
if total_volume > 0:
imbalance = abs(bid_volume - ask_volume) / total_volume
balance_score = 1.0 - imbalance
else:
balance_score = 0.0
# Weighted combination
liquidity_score = (spread_score * 0.4 + depth_score * 0.4 + balance_score * 0.2)
return max(0.0, min(1.0, liquidity_score))
except Exception as e:
logger.error(f"Error calculating liquidity score: {e}")
return 0.0
def get_market_summary(self, orderbook: OrderBookSnapshot) -> Dict[str, float]:
"""
Get comprehensive market summary.
Args:
orderbook: Order book snapshot
Returns:
Dict[str, float]: Market summary metrics
"""
try:
metrics = self.calculate_orderbook_metrics(orderbook)
imbalance = self.calculate_imbalance_metrics(orderbook)
liquidity = self.calculate_liquidity_score(orderbook)
return {
'mid_price': metrics.mid_price,
'spread': metrics.spread,
'spread_percentage': metrics.spread_percentage,
'bid_volume': metrics.bid_volume,
'ask_volume': metrics.ask_volume,
'volume_imbalance': metrics.volume_imbalance,
'depth_10': metrics.depth_10,
'depth_50': metrics.depth_50,
'price_imbalance': imbalance.price_imbalance,
'depth_imbalance': imbalance.depth_imbalance,
'momentum_score': imbalance.momentum_score,
'liquidity_score': liquidity
}
except Exception as e:
logger.error(f"Error generating market summary: {e}")
return {}

View File

@ -0,0 +1,288 @@
"""
Data quality checking and validation for market data.
"""
from typing import Dict, List, Union, Optional, Tuple
from datetime import datetime, timezone
from ..models.core import OrderBookSnapshot, TradeEvent
from ..utils.logging import get_logger
from ..utils.validation import validate_price, validate_volume, validate_symbol
from ..utils.timing import get_current_timestamp
logger = get_logger(__name__)
class DataQualityChecker:
"""
Comprehensive data quality checker for market data.
Validates:
- Data structure integrity
- Price and volume ranges
- Timestamp consistency
- Cross-validation between related data points
"""
def __init__(self):
"""Initialize quality checker with default thresholds"""
# Quality thresholds
self.max_spread_percentage = 10.0 # Maximum spread as % of mid price
self.max_price_change_percentage = 50.0 # Maximum price change between updates
self.min_volume_threshold = 0.000001 # Minimum meaningful volume
self.max_timestamp_drift = 300 # Maximum seconds drift from current time
# Price history for validation
self.price_history: Dict[str, Dict[str, float]] = {} # symbol -> exchange -> last_price
logger.info("Data quality checker initialized")
def check_orderbook_quality(self, orderbook: OrderBookSnapshot) -> Tuple[float, List[str]]:
"""
Check order book data quality.
Args:
orderbook: Order book snapshot to validate
Returns:
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
"""
issues = []
quality_score = 1.0
try:
# Basic structure validation
structure_issues = self._check_orderbook_structure(orderbook)
issues.extend(structure_issues)
quality_score -= len(structure_issues) * 0.1
# Price validation
price_issues = self._check_orderbook_prices(orderbook)
issues.extend(price_issues)
quality_score -= len(price_issues) * 0.15
# Volume validation
volume_issues = self._check_orderbook_volumes(orderbook)
issues.extend(volume_issues)
quality_score -= len(volume_issues) * 0.1
# Spread validation
spread_issues = self._check_orderbook_spread(orderbook)
issues.extend(spread_issues)
quality_score -= len(spread_issues) * 0.2
# Timestamp validation
timestamp_issues = self._check_timestamp(orderbook.timestamp)
issues.extend(timestamp_issues)
quality_score -= len(timestamp_issues) * 0.1
# Cross-validation with history
history_issues = self._check_price_history(orderbook)
issues.extend(history_issues)
quality_score -= len(history_issues) * 0.15
# Update price history
self._update_price_history(orderbook)
except Exception as e:
logger.error(f"Error checking order book quality: {e}")
issues.append(f"Quality check error: {e}")
quality_score = 0.0
# Ensure score is within bounds
quality_score = max(0.0, min(1.0, quality_score))
if issues:
logger.debug(f"Order book quality issues for {orderbook.symbol}@{orderbook.exchange}: {issues}")
return quality_score, issues de
f check_trade_quality(self, trade: TradeEvent) -> Tuple[float, List[str]]:
"""
Check trade data quality.
Args:
trade: Trade event to validate
Returns:
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
"""
issues = []
quality_score = 1.0
try:
# Basic structure validation
if not validate_symbol(trade.symbol):
issues.append("Invalid symbol format")
if not trade.exchange:
issues.append("Missing exchange")
if not trade.trade_id:
issues.append("Missing trade ID")
# Price validation
if not validate_price(trade.price):
issues.append(f"Invalid price: {trade.price}")
# Volume validation
if not validate_volume(trade.size):
issues.append(f"Invalid size: {trade.size}")
if trade.size < self.min_volume_threshold:
issues.append(f"Size below threshold: {trade.size}")
# Side validation
if trade.side not in ['buy', 'sell']:
issues.append(f"Invalid side: {trade.side}")
# Timestamp validation
timestamp_issues = self._check_timestamp(trade.timestamp)
issues.extend(timestamp_issues)
# Calculate quality score
quality_score -= len(issues) * 0.2
except Exception as e:
logger.error(f"Error checking trade quality: {e}")
issues.append(f"Quality check error: {e}")
quality_score = 0.0
# Ensure score is within bounds
quality_score = max(0.0, min(1.0, quality_score))
if issues:
logger.debug(f"Trade quality issues for {trade.symbol}@{trade.exchange}: {issues}")
return quality_score, issues
def _check_orderbook_structure(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check basic order book structure"""
issues = []
if not validate_symbol(orderbook.symbol):
issues.append("Invalid symbol format")
if not orderbook.exchange:
issues.append("Missing exchange")
if not orderbook.bids:
issues.append("No bid levels")
if not orderbook.asks:
issues.append("No ask levels")
return issues
def _check_orderbook_prices(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check order book price validity"""
issues = []
# Check bid prices (should be descending)
for i, bid in enumerate(orderbook.bids):
if not validate_price(bid.price):
issues.append(f"Invalid bid price at level {i}: {bid.price}")
if i > 0 and bid.price >= orderbook.bids[i-1].price:
issues.append(f"Bid prices not descending at level {i}")
# Check ask prices (should be ascending)
for i, ask in enumerate(orderbook.asks):
if not validate_price(ask.price):
issues.append(f"Invalid ask price at level {i}: {ask.price}")
if i > 0 and ask.price <= orderbook.asks[i-1].price:
issues.append(f"Ask prices not ascending at level {i}")
# Check bid-ask ordering
if orderbook.bids and orderbook.asks:
if orderbook.bids[0].price >= orderbook.asks[0].price:
issues.append("Best bid >= best ask (crossed book)")
return issues def
_check_orderbook_volumes(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check order book volume validity"""
issues = []
# Check bid volumes
for i, bid in enumerate(orderbook.bids):
if not validate_volume(bid.size):
issues.append(f"Invalid bid volume at level {i}: {bid.size}")
if bid.size < self.min_volume_threshold:
issues.append(f"Bid volume below threshold at level {i}: {bid.size}")
# Check ask volumes
for i, ask in enumerate(orderbook.asks):
if not validate_volume(ask.size):
issues.append(f"Invalid ask volume at level {i}: {ask.size}")
if ask.size < self.min_volume_threshold:
issues.append(f"Ask volume below threshold at level {i}: {ask.size}")
return issues
def _check_orderbook_spread(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check order book spread validity"""
issues = []
if orderbook.mid_price and orderbook.spread:
spread_percentage = (orderbook.spread / orderbook.mid_price) * 100
if spread_percentage > self.max_spread_percentage:
issues.append(f"Spread too wide: {spread_percentage:.2f}%")
if spread_percentage < 0:
issues.append(f"Negative spread: {spread_percentage:.2f}%")
return issues
def _check_timestamp(self, timestamp: datetime) -> List[str]:
"""Check timestamp validity"""
issues = []
if not timestamp:
issues.append("Missing timestamp")
return issues
# Check if timestamp is timezone-aware
if timestamp.tzinfo is None:
issues.append("Timestamp missing timezone info")
# Check timestamp drift
current_time = get_current_timestamp()
time_diff = abs((timestamp - current_time).total_seconds())
if time_diff > self.max_timestamp_drift:
issues.append(f"Timestamp drift too large: {time_diff:.1f}s")
return issues
def _check_price_history(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check price consistency with history"""
issues = []
key = f"{orderbook.symbol}_{orderbook.exchange}"
if key in self.price_history and orderbook.mid_price:
last_price = self.price_history[key]
price_change = abs(orderbook.mid_price - last_price) / last_price * 100
if price_change > self.max_price_change_percentage:
issues.append(f"Large price change: {price_change:.2f}%")
return issues
def _update_price_history(self, orderbook: OrderBookSnapshot) -> None:
"""Update price history for future validation"""
if orderbook.mid_price:
key = f"{orderbook.symbol}_{orderbook.exchange}"
self.price_history[key] = orderbook.mid_price
def get_quality_summary(self) -> Dict[str, int]:
"""Get summary of quality checks performed"""
return {
'symbols_tracked': len(self.price_history),
'max_spread_percentage': self.max_spread_percentage,
'max_price_change_percentage': self.max_price_change_percentage,
'min_volume_threshold': self.min_volume_threshold,
'max_timestamp_drift': self.max_timestamp_drift
}

View File

@ -0,0 +1,341 @@
"""
Tests for Binance exchange connector.
"""
import pytest
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from datetime import datetime, timezone
from ..connectors.binance_connector import BinanceConnector
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
@pytest.fixture
def binance_connector():
"""Create Binance connector for testing"""
return BinanceConnector()
@pytest.fixture
def sample_binance_orderbook_data():
"""Sample Binance order book data"""
return {
"lastUpdateId": 1027024,
"bids": [
["4.00000000", "431.00000000"],
["3.99000000", "9.00000000"]
],
"asks": [
["4.00000200", "12.00000000"],
["4.01000000", "18.00000000"]
]
}
@pytest.fixture
def sample_binance_depth_update():
"""Sample Binance depth update message"""
return {
"e": "depthUpdate",
"E": 1672515782136,
"s": "BTCUSDT",
"U": 157,
"u": 160,
"b": [
["50000.00", "0.25"],
["49999.00", "0.50"]
],
"a": [
["50001.00", "0.30"],
["50002.00", "0.40"]
]
}
@pytest.fixture
def sample_binance_trade_update():
"""Sample Binance trade update message"""
return {
"e": "trade",
"E": 1672515782136,
"s": "BTCUSDT",
"t": 12345,
"p": "50000.50",
"q": "0.10",
"b": 88,
"a": 50,
"T": 1672515782134,
"m": False,
"M": True
}
class TestBinanceConnector:
"""Test cases for BinanceConnector"""
def test_initialization(self, binance_connector):
"""Test connector initialization"""
assert binance_connector.exchange_name == "binance"
assert binance_connector.websocket_url == BinanceConnector.WEBSOCKET_URL
assert len(binance_connector.message_handlers) >= 3
assert binance_connector.stream_id == 1
assert binance_connector.active_streams == []
def test_normalize_symbol(self, binance_connector):
"""Test symbol normalization"""
# Test standard format
assert binance_connector.normalize_symbol("BTCUSDT") == "BTCUSDT"
# Test with separators
assert binance_connector.normalize_symbol("BTC-USDT") == "BTCUSDT"
assert binance_connector.normalize_symbol("BTC/USDT") == "BTCUSDT"
# Test lowercase
assert binance_connector.normalize_symbol("btcusdt") == "BTCUSDT"
# Test invalid symbol
with pytest.raises(Exception):
binance_connector.normalize_symbol("")
def test_get_message_type(self, binance_connector):
"""Test message type detection"""
# Test depth update
depth_msg = {"e": "depthUpdate", "s": "BTCUSDT"}
assert binance_connector._get_message_type(depth_msg) == "depthUpdate"
# Test trade update
trade_msg = {"e": "trade", "s": "BTCUSDT"}
assert binance_connector._get_message_type(trade_msg) == "trade"
# Test error message
error_msg = {"error": {"code": -1121, "msg": "Invalid symbol"}}
assert binance_connector._get_message_type(error_msg) == "error"
# Test unknown message
unknown_msg = {"data": "something"}
assert binance_connector._get_message_type(unknown_msg) == "unknown"
def test_parse_orderbook_snapshot(self, binance_connector, sample_binance_orderbook_data):
"""Test order book snapshot parsing"""
orderbook = binance_connector._parse_orderbook_snapshot(
sample_binance_orderbook_data,
"BTCUSDT"
)
assert isinstance(orderbook, OrderBookSnapshot)
assert orderbook.symbol == "BTCUSDT"
assert orderbook.exchange == "binance"
assert len(orderbook.bids) == 2
assert len(orderbook.asks) == 2
assert orderbook.sequence_id == 1027024
# Check bid data
assert orderbook.bids[0].price == 4.0
assert orderbook.bids[0].size == 431.0
# Check ask data
assert orderbook.asks[0].price == 4.000002
assert orderbook.asks[0].size == 12.0
@pytest.mark.asyncio
async def test_handle_orderbook_update(self, binance_connector, sample_binance_depth_update):
"""Test order book update handling"""
# Mock callback
callback_called = False
received_data = None
def mock_callback(data):
nonlocal callback_called, received_data
callback_called = True
received_data = data
binance_connector.add_data_callback(mock_callback)
# Handle update
await binance_connector._handle_orderbook_update(sample_binance_depth_update)
# Verify callback was called
assert callback_called
assert isinstance(received_data, OrderBookSnapshot)
assert received_data.symbol == "BTCUSDT"
assert received_data.exchange == "binance"
assert len(received_data.bids) == 2
assert len(received_data.asks) == 2
@pytest.mark.asyncio
async def test_handle_trade_update(self, binance_connector, sample_binance_trade_update):
"""Test trade update handling"""
# Mock callback
callback_called = False
received_data = None
def mock_callback(data):
nonlocal callback_called, received_data
callback_called = True
received_data = data
binance_connector.add_data_callback(mock_callback)
# Handle update
await binance_connector._handle_trade_update(sample_binance_trade_update)
# Verify callback was called
assert callback_called
assert isinstance(received_data, TradeEvent)
assert received_data.symbol == "BTCUSDT"
assert received_data.exchange == "binance"
assert received_data.price == 50000.50
assert received_data.size == 0.10
assert received_data.side == "buy" # m=False means buyer is not maker
assert received_data.trade_id == "12345"
@pytest.mark.asyncio
async def test_subscribe_orderbook(self, binance_connector):
"""Test order book subscription"""
# Mock WebSocket send
binance_connector._send_message = AsyncMock(return_value=True)
# Subscribe
await binance_connector.subscribe_orderbook("BTCUSDT")
# Verify subscription was sent
binance_connector._send_message.assert_called_once()
call_args = binance_connector._send_message.call_args[0][0]
assert call_args["method"] == "SUBSCRIBE"
assert "btcusdt@depth@100ms" in call_args["params"]
assert call_args["id"] == 1
# Verify tracking
assert "BTCUSDT" in binance_connector.subscriptions
assert "orderbook" in binance_connector.subscriptions["BTCUSDT"]
assert "btcusdt@depth@100ms" in binance_connector.active_streams
assert binance_connector.stream_id == 2
@pytest.mark.asyncio
async def test_subscribe_trades(self, binance_connector):
"""Test trade subscription"""
# Mock WebSocket send
binance_connector._send_message = AsyncMock(return_value=True)
# Subscribe
await binance_connector.subscribe_trades("ETHUSDT")
# Verify subscription was sent
binance_connector._send_message.assert_called_once()
call_args = binance_connector._send_message.call_args[0][0]
assert call_args["method"] == "SUBSCRIBE"
assert "ethusdt@trade" in call_args["params"]
assert call_args["id"] == 1
# Verify tracking
assert "ETHUSDT" in binance_connector.subscriptions
assert "trades" in binance_connector.subscriptions["ETHUSDT"]
assert "ethusdt@trade" in binance_connector.active_streams
@pytest.mark.asyncio
async def test_unsubscribe_orderbook(self, binance_connector):
"""Test order book unsubscription"""
# Setup initial subscription
binance_connector.subscriptions["BTCUSDT"] = ["orderbook"]
binance_connector.active_streams.append("btcusdt@depth@100ms")
# Mock WebSocket send
binance_connector._send_message = AsyncMock(return_value=True)
# Unsubscribe
await binance_connector.unsubscribe_orderbook("BTCUSDT")
# Verify unsubscription was sent
binance_connector._send_message.assert_called_once()
call_args = binance_connector._send_message.call_args[0][0]
assert call_args["method"] == "UNSUBSCRIBE"
assert "btcusdt@depth@100ms" in call_args["params"]
# Verify tracking removal
assert "BTCUSDT" not in binance_connector.subscriptions
assert "btcusdt@depth@100ms" not in binance_connector.active_streams
@pytest.mark.asyncio
@patch('aiohttp.ClientSession.get')
async def test_get_symbols(self, mock_get, binance_connector):
"""Test getting available symbols"""
# Mock API response
mock_response = AsyncMock()
mock_response.status = 200
mock_response.json = AsyncMock(return_value={
"symbols": [
{"symbol": "BTCUSDT", "status": "TRADING"},
{"symbol": "ETHUSDT", "status": "TRADING"},
{"symbol": "ADAUSDT", "status": "BREAK"} # Should be filtered out
]
})
mock_get.return_value.__aenter__.return_value = mock_response
# Get symbols
symbols = await binance_connector.get_symbols()
# Verify results
assert len(symbols) == 2
assert "BTCUSDT" in symbols
assert "ETHUSDT" in symbols
assert "ADAUSDT" not in symbols # Filtered out due to status
@pytest.mark.asyncio
@patch('aiohttp.ClientSession.get')
async def test_get_orderbook_snapshot(self, mock_get, binance_connector, sample_binance_orderbook_data):
"""Test getting order book snapshot"""
# Mock API response
mock_response = AsyncMock()
mock_response.status = 200
mock_response.json = AsyncMock(return_value=sample_binance_orderbook_data)
mock_get.return_value.__aenter__.return_value = mock_response
# Get order book snapshot
orderbook = await binance_connector.get_orderbook_snapshot("BTCUSDT", depth=20)
# Verify results
assert isinstance(orderbook, OrderBookSnapshot)
assert orderbook.symbol == "BTCUSDT"
assert orderbook.exchange == "binance"
assert len(orderbook.bids) == 2
assert len(orderbook.asks) == 2
def test_get_binance_stats(self, binance_connector):
"""Test getting Binance-specific statistics"""
# Add some test data
binance_connector.active_streams = ["btcusdt@depth@100ms", "ethusdt@trade"]
binance_connector.stream_id = 5
stats = binance_connector.get_binance_stats()
# Verify Binance-specific stats
assert stats['active_streams'] == 2
assert len(stats['stream_list']) == 2
assert stats['next_stream_id'] == 5
# Verify base stats are included
assert 'exchange' in stats
assert 'connection_status' in stats
assert 'message_count' in stats
if __name__ == "__main__":
# Run a simple test
async def simple_test():
connector = BinanceConnector()
# Test symbol normalization
normalized = connector.normalize_symbol("BTC-USDT")
print(f"Symbol normalization: BTC-USDT -> {normalized}")
# Test message type detection
msg_type = connector._get_message_type({"e": "depthUpdate"})
print(f"Message type detection: {msg_type}")
print("Simple Binance connector test completed")
asyncio.run(simple_test())

View File

@ -0,0 +1,304 @@
"""
Tests for data processing components.
"""
import pytest
from datetime import datetime, timezone
from ..processing.data_processor import StandardDataProcessor
from ..processing.quality_checker import DataQualityChecker
from ..processing.anomaly_detector import AnomalyDetector
from ..processing.metrics_calculator import MetricsCalculator
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
@pytest.fixture
def data_processor():
"""Create data processor for testing"""
return StandardDataProcessor()
@pytest.fixture
def quality_checker():
"""Create quality checker for testing"""
return DataQualityChecker()
@pytest.fixture
def anomaly_detector():
"""Create anomaly detector for testing"""
return AnomalyDetector()
@pytest.fixture
def metrics_calculator():
"""Create metrics calculator for testing"""
return MetricsCalculator()
@pytest.fixture
def sample_orderbook():
"""Create sample order book for testing"""
return OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[
PriceLevel(price=50000.0, size=1.5),
PriceLevel(price=49999.0, size=2.0),
PriceLevel(price=49998.0, size=1.0)
],
asks=[
PriceLevel(price=50001.0, size=1.0),
PriceLevel(price=50002.0, size=1.5),
PriceLevel(price=50003.0, size=2.0)
]
)
@pytest.fixture
def sample_trade():
"""Create sample trade for testing"""
return TradeEvent(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
price=50000.5,
size=0.1,
side="buy",
trade_id="test_trade_123"
)
class TestDataQualityChecker:
"""Test cases for DataQualityChecker"""
def test_orderbook_quality_check(self, quality_checker, sample_orderbook):
"""Test order book quality checking"""
quality_score, issues = quality_checker.check_orderbook_quality(sample_orderbook)
assert 0.0 <= quality_score <= 1.0
assert isinstance(issues, list)
# Good order book should have high quality score
assert quality_score > 0.8
def test_trade_quality_check(self, quality_checker, sample_trade):
"""Test trade quality checking"""
quality_score, issues = quality_checker.check_trade_quality(sample_trade)
assert 0.0 <= quality_score <= 1.0
assert isinstance(issues, list)
# Good trade should have high quality score
assert quality_score > 0.8
def test_invalid_orderbook_detection(self, quality_checker):
"""Test detection of invalid order book"""
# Create invalid order book with crossed spread
invalid_orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50002.0, size=1.0)], # Bid higher than ask
asks=[PriceLevel(price=50001.0, size=1.0)] # Ask lower than bid
)
quality_score, issues = quality_checker.check_orderbook_quality(invalid_orderbook)
assert quality_score < 0.8
assert any("crossed book" in issue.lower() for issue in issues)
class TestAnomalyDetector:
"""Test cases for AnomalyDetector"""
def test_orderbook_anomaly_detection(self, anomaly_detector, sample_orderbook):
"""Test order book anomaly detection"""
# First few order books should not trigger anomalies
for _ in range(5):
anomalies = anomaly_detector.detect_orderbook_anomalies(sample_orderbook)
assert isinstance(anomalies, list)
def test_trade_anomaly_detection(self, anomaly_detector, sample_trade):
"""Test trade anomaly detection"""
# First few trades should not trigger anomalies
for _ in range(5):
anomalies = anomaly_detector.detect_trade_anomalies(sample_trade)
assert isinstance(anomalies, list)
def test_price_spike_detection(self, anomaly_detector):
"""Test price spike detection"""
# Create normal order books
for i in range(20):
normal_orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50000.0 + i, size=1.0)],
asks=[PriceLevel(price=50001.0 + i, size=1.0)]
)
anomaly_detector.detect_orderbook_anomalies(normal_orderbook)
# Create order book with price spike
spike_orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=60000.0, size=1.0)], # 20% spike
asks=[PriceLevel(price=60001.0, size=1.0)]
)
anomalies = anomaly_detector.detect_orderbook_anomalies(spike_orderbook)
assert len(anomalies) > 0
assert any("spike" in anomaly.lower() for anomaly in anomalies)
class TestMetricsCalculator:
"""Test cases for MetricsCalculator"""
def test_orderbook_metrics_calculation(self, metrics_calculator, sample_orderbook):
"""Test order book metrics calculation"""
metrics = metrics_calculator.calculate_orderbook_metrics(sample_orderbook)
assert metrics.symbol == "BTCUSDT"
assert metrics.exchange == "binance"
assert metrics.mid_price == 50000.5 # (50000 + 50001) / 2
assert metrics.spread == 1.0 # 50001 - 50000
assert metrics.spread_percentage > 0
assert metrics.bid_volume == 4.5 # 1.5 + 2.0 + 1.0
assert metrics.ask_volume == 4.5 # 1.0 + 1.5 + 2.0
assert metrics.volume_imbalance == 0.0 # Equal volumes
def test_imbalance_metrics_calculation(self, metrics_calculator, sample_orderbook):
"""Test imbalance metrics calculation"""
imbalance = metrics_calculator.calculate_imbalance_metrics(sample_orderbook)
assert imbalance.symbol == "BTCUSDT"
assert -1.0 <= imbalance.volume_imbalance <= 1.0
assert -1.0 <= imbalance.price_imbalance <= 1.0
assert -1.0 <= imbalance.depth_imbalance <= 1.0
assert -1.0 <= imbalance.momentum_score <= 1.0
def test_liquidity_score_calculation(self, metrics_calculator, sample_orderbook):
"""Test liquidity score calculation"""
liquidity_score = metrics_calculator.calculate_liquidity_score(sample_orderbook)
assert 0.0 <= liquidity_score <= 1.0
assert liquidity_score > 0.5 # Good order book should have decent liquidity
class TestStandardDataProcessor:
"""Test cases for StandardDataProcessor"""
def test_data_validation(self, data_processor, sample_orderbook, sample_trade):
"""Test data validation"""
# Valid data should pass validation
assert data_processor.validate_data(sample_orderbook) is True
assert data_processor.validate_data(sample_trade) is True
def test_metrics_calculation(self, data_processor, sample_orderbook):
"""Test metrics calculation through processor"""
metrics = data_processor.calculate_metrics(sample_orderbook)
assert metrics.symbol == "BTCUSDT"
assert metrics.mid_price > 0
assert metrics.spread > 0
def test_anomaly_detection(self, data_processor, sample_orderbook, sample_trade):
"""Test anomaly detection through processor"""
orderbook_anomalies = data_processor.detect_anomalies(sample_orderbook)
trade_anomalies = data_processor.detect_anomalies(sample_trade)
assert isinstance(orderbook_anomalies, list)
assert isinstance(trade_anomalies, list)
def test_data_filtering(self, data_processor, sample_orderbook, sample_trade):
"""Test data filtering"""
# Test symbol filter
criteria = {'symbols': ['BTCUSDT']}
assert data_processor.filter_data(sample_orderbook, criteria) is True
assert data_processor.filter_data(sample_trade, criteria) is True
criteria = {'symbols': ['ETHUSDT']}
assert data_processor.filter_data(sample_orderbook, criteria) is False
assert data_processor.filter_data(sample_trade, criteria) is False
# Test price range filter
criteria = {'price_range': (40000, 60000)}
assert data_processor.filter_data(sample_orderbook, criteria) is True
assert data_processor.filter_data(sample_trade, criteria) is True
criteria = {'price_range': (60000, 70000)}
assert data_processor.filter_data(sample_orderbook, criteria) is False
assert data_processor.filter_data(sample_trade, criteria) is False
def test_data_enrichment(self, data_processor, sample_orderbook, sample_trade):
"""Test data enrichment"""
orderbook_enriched = data_processor.enrich_data(sample_orderbook)
trade_enriched = data_processor.enrich_data(sample_trade)
# Check enriched data structure
assert 'original_data' in orderbook_enriched
assert 'quality_score' in orderbook_enriched
assert 'anomalies' in orderbook_enriched
assert 'processing_timestamp' in orderbook_enriched
assert 'original_data' in trade_enriched
assert 'quality_score' in trade_enriched
assert 'anomalies' in trade_enriched
assert 'trade_value' in trade_enriched
def test_quality_score_calculation(self, data_processor, sample_orderbook, sample_trade):
"""Test quality score calculation"""
orderbook_score = data_processor.get_data_quality_score(sample_orderbook)
trade_score = data_processor.get_data_quality_score(sample_trade)
assert 0.0 <= orderbook_score <= 1.0
assert 0.0 <= trade_score <= 1.0
# Good data should have high quality scores
assert orderbook_score > 0.8
assert trade_score > 0.8
def test_processing_stats(self, data_processor, sample_orderbook, sample_trade):
"""Test processing statistics"""
# Process some data
data_processor.validate_data(sample_orderbook)
data_processor.validate_data(sample_trade)
stats = data_processor.get_processing_stats()
assert 'processed_orderbooks' in stats
assert 'processed_trades' in stats
assert 'quality_failures' in stats
assert 'anomalies_detected' in stats
assert stats['processed_orderbooks'] >= 1
assert stats['processed_trades'] >= 1
if __name__ == "__main__":
# Run simple tests
processor = StandardDataProcessor()
# Test with sample data
orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="test",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50000.0, size=1.0)],
asks=[PriceLevel(price=50001.0, size=1.0)]
)
# Test validation
is_valid = processor.validate_data(orderbook)
print(f"Order book validation: {'PASSED' if is_valid else 'FAILED'}")
# Test metrics
metrics = processor.calculate_metrics(orderbook)
print(f"Metrics calculation: mid_price={metrics.mid_price}, spread={metrics.spread}")
# Test quality score
quality_score = processor.get_data_quality_score(orderbook)
print(f"Quality score: {quality_score:.2f}")
print("Simple data processor test completed")