bucket aggregation
This commit is contained in:
15
COBY/processing/__init__.py
Normal file
15
COBY/processing/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
"""
|
||||
Data processing and normalization components for the COBY system.
|
||||
"""
|
||||
|
||||
from .data_processor import StandardDataProcessor
|
||||
from .quality_checker import DataQualityChecker
|
||||
from .anomaly_detector import AnomalyDetector
|
||||
from .metrics_calculator import MetricsCalculator
|
||||
|
||||
__all__ = [
|
||||
'StandardDataProcessor',
|
||||
'DataQualityChecker',
|
||||
'AnomalyDetector',
|
||||
'MetricsCalculator'
|
||||
]
|
329
COBY/processing/anomaly_detector.py
Normal file
329
COBY/processing/anomaly_detector.py
Normal file
@ -0,0 +1,329 @@
|
||||
"""
|
||||
Anomaly detection for market data.
|
||||
"""
|
||||
|
||||
import statistics
|
||||
from typing import Dict, List, Union, Optional, Deque
|
||||
from collections import deque
|
||||
from datetime import datetime, timedelta
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.timing import get_current_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class AnomalyDetector:
|
||||
"""
|
||||
Detects anomalies in market data using statistical methods.
|
||||
|
||||
Detects:
|
||||
- Price spikes and drops
|
||||
- Volume anomalies
|
||||
- Spread anomalies
|
||||
- Frequency anomalies
|
||||
"""
|
||||
|
||||
def __init__(self, window_size: int = 100, z_score_threshold: float = 3.0):
|
||||
"""
|
||||
Initialize anomaly detector.
|
||||
|
||||
Args:
|
||||
window_size: Size of rolling window for statistics
|
||||
z_score_threshold: Z-score threshold for anomaly detection
|
||||
"""
|
||||
self.window_size = window_size
|
||||
self.z_score_threshold = z_score_threshold
|
||||
|
||||
# Rolling windows for statistics
|
||||
self.price_windows: Dict[str, Deque[float]] = {}
|
||||
self.volume_windows: Dict[str, Deque[float]] = {}
|
||||
self.spread_windows: Dict[str, Deque[float]] = {}
|
||||
self.timestamp_windows: Dict[str, Deque[datetime]] = {}
|
||||
|
||||
logger.info(f"Anomaly detector initialized with window_size={window_size}, threshold={z_score_threshold}")
|
||||
|
||||
def detect_orderbook_anomalies(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""
|
||||
Detect anomalies in order book data.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot to analyze
|
||||
|
||||
Returns:
|
||||
List[str]: List of detected anomalies
|
||||
"""
|
||||
anomalies = []
|
||||
key = f"{orderbook.symbol}_{orderbook.exchange}"
|
||||
|
||||
try:
|
||||
# Price anomalies
|
||||
if orderbook.mid_price:
|
||||
price_anomalies = self._detect_price_anomalies(key, orderbook.mid_price)
|
||||
anomalies.extend(price_anomalies)
|
||||
|
||||
# Volume anomalies
|
||||
total_volume = orderbook.bid_volume + orderbook.ask_volume
|
||||
volume_anomalies = self._detect_volume_anomalies(key, total_volume)
|
||||
anomalies.extend(volume_anomalies)
|
||||
|
||||
# Spread anomalies
|
||||
if orderbook.spread and orderbook.mid_price:
|
||||
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
|
||||
spread_anomalies = self._detect_spread_anomalies(key, spread_pct)
|
||||
anomalies.extend(spread_anomalies)
|
||||
|
||||
# Frequency anomalies
|
||||
frequency_anomalies = self._detect_frequency_anomalies(key, orderbook.timestamp)
|
||||
anomalies.extend(frequency_anomalies)
|
||||
|
||||
# Update windows
|
||||
self._update_windows(key, orderbook)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting order book anomalies: {e}")
|
||||
anomalies.append(f"Anomaly detection error: {e}")
|
||||
|
||||
if anomalies:
|
||||
logger.warning(f"Anomalies detected in {orderbook.symbol}@{orderbook.exchange}: {anomalies}")
|
||||
|
||||
return anomalies
|
||||
|
||||
def detect_trade_anomalies(self, trade: TradeEvent) -> List[str]:
|
||||
"""
|
||||
Detect anomalies in trade data.
|
||||
|
||||
Args:
|
||||
trade: Trade event to analyze
|
||||
|
||||
Returns:
|
||||
List[str]: List of detected anomalies
|
||||
"""
|
||||
anomalies = []
|
||||
key = f"{trade.symbol}_{trade.exchange}_trade"
|
||||
|
||||
try:
|
||||
# Price anomalies
|
||||
price_anomalies = self._detect_price_anomalies(key, trade.price)
|
||||
anomalies.extend(price_anomalies)
|
||||
|
||||
# Volume anomalies
|
||||
volume_anomalies = self._detect_volume_anomalies(key, trade.size)
|
||||
anomalies.extend(volume_anomalies)
|
||||
|
||||
# Update windows
|
||||
self._update_trade_windows(key, trade)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting trade anomalies: {e}")
|
||||
anomalies.append(f"Anomaly detection error: {e}")
|
||||
|
||||
if anomalies:
|
||||
logger.warning(f"Trade anomalies detected in {trade.symbol}@{trade.exchange}: {anomalies}")
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_price_anomalies(self, key: str, price: float) -> List[str]:
|
||||
"""Detect price anomalies using z-score"""
|
||||
anomalies = []
|
||||
|
||||
if key not in self.price_windows:
|
||||
self.price_windows[key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.price_windows[key]
|
||||
if len(window) < 10: # Need minimum data points
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
mean_price = statistics.mean(window)
|
||||
std_price = statistics.stdev(window)
|
||||
|
||||
if std_price > 0:
|
||||
z_score = abs(price - mean_price) / std_price
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
direction = "spike" if price > mean_price else "drop"
|
||||
anomalies.append(f"Price {direction}: {price:.6f} (z-score: {z_score:.2f})")
|
||||
|
||||
except statistics.StatisticsError:
|
||||
pass # Not enough data or all values are the same
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_volume_anomalies(self, key: str, volume: float) -> List[str]:
|
||||
"""Detect volume anomalies using z-score"""
|
||||
anomalies = []
|
||||
|
||||
volume_key = f"{key}_volume"
|
||||
if volume_key not in self.volume_windows:
|
||||
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.volume_windows[volume_key]
|
||||
if len(window) < 10:
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
mean_volume = statistics.mean(window)
|
||||
std_volume = statistics.stdev(window)
|
||||
|
||||
if std_volume > 0:
|
||||
z_score = abs(volume - mean_volume) / std_volume
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
direction = "spike" if volume > mean_volume else "drop"
|
||||
anomalies.append(f"Volume {direction}: {volume:.6f} (z-score: {z_score:.2f})")
|
||||
|
||||
except statistics.StatisticsError:
|
||||
pass
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_spread_anomalies(self, key: str, spread_pct: float) -> List[str]:
|
||||
"""Detect spread anomalies using z-score"""
|
||||
anomalies = []
|
||||
|
||||
spread_key = f"{key}_spread"
|
||||
if spread_key not in self.spread_windows:
|
||||
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.spread_windows[spread_key]
|
||||
if len(window) < 10:
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
mean_spread = statistics.mean(window)
|
||||
std_spread = statistics.stdev(window)
|
||||
|
||||
if std_spread > 0:
|
||||
z_score = abs(spread_pct - mean_spread) / std_spread
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
direction = "widening" if spread_pct > mean_spread else "tightening"
|
||||
anomalies.append(f"Spread {direction}: {spread_pct:.4f}% (z-score: {z_score:.2f})")
|
||||
|
||||
except statistics.StatisticsError:
|
||||
pass
|
||||
|
||||
return anomalies
|
||||
|
||||
def _detect_frequency_anomalies(self, key: str, timestamp: datetime) -> List[str]:
|
||||
"""Detect frequency anomalies in data updates"""
|
||||
anomalies = []
|
||||
|
||||
timestamp_key = f"{key}_timestamp"
|
||||
if timestamp_key not in self.timestamp_windows:
|
||||
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
|
||||
return anomalies
|
||||
|
||||
window = self.timestamp_windows[timestamp_key]
|
||||
if len(window) < 5:
|
||||
return anomalies
|
||||
|
||||
try:
|
||||
# Calculate intervals between updates
|
||||
intervals = []
|
||||
for i in range(1, len(window)):
|
||||
interval = (window[i] - window[i-1]).total_seconds()
|
||||
intervals.append(interval)
|
||||
|
||||
if len(intervals) >= 5:
|
||||
mean_interval = statistics.mean(intervals)
|
||||
std_interval = statistics.stdev(intervals)
|
||||
|
||||
# Check current interval
|
||||
current_interval = (timestamp - window[-1]).total_seconds()
|
||||
|
||||
if std_interval > 0:
|
||||
z_score = abs(current_interval - mean_interval) / std_interval
|
||||
|
||||
if z_score > self.z_score_threshold:
|
||||
if current_interval > mean_interval:
|
||||
anomalies.append(f"Update delay: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
|
||||
else:
|
||||
anomalies.append(f"Update burst: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
|
||||
|
||||
except (statistics.StatisticsError, IndexError):
|
||||
pass
|
||||
|
||||
return anomalies
|
||||
|
||||
def _update_windows(self, key: str, orderbook: OrderBookSnapshot) -> None:
|
||||
"""Update rolling windows with new data"""
|
||||
# Update price window
|
||||
if orderbook.mid_price:
|
||||
if key not in self.price_windows:
|
||||
self.price_windows[key] = deque(maxlen=self.window_size)
|
||||
self.price_windows[key].append(orderbook.mid_price)
|
||||
|
||||
# Update volume window
|
||||
total_volume = orderbook.bid_volume + orderbook.ask_volume
|
||||
volume_key = f"{key}_volume"
|
||||
if volume_key not in self.volume_windows:
|
||||
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
|
||||
self.volume_windows[volume_key].append(total_volume)
|
||||
|
||||
# Update spread window
|
||||
if orderbook.spread and orderbook.mid_price:
|
||||
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
|
||||
spread_key = f"{key}_spread"
|
||||
if spread_key not in self.spread_windows:
|
||||
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
|
||||
self.spread_windows[spread_key].append(spread_pct)
|
||||
|
||||
# Update timestamp window
|
||||
timestamp_key = f"{key}_timestamp"
|
||||
if timestamp_key not in self.timestamp_windows:
|
||||
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
|
||||
self.timestamp_windows[timestamp_key].append(orderbook.timestamp)
|
||||
|
||||
def _update_trade_windows(self, key: str, trade: TradeEvent) -> None:
|
||||
"""Update rolling windows with trade data"""
|
||||
# Update price window
|
||||
if key not in self.price_windows:
|
||||
self.price_windows[key] = deque(maxlen=self.window_size)
|
||||
self.price_windows[key].append(trade.price)
|
||||
|
||||
# Update volume window
|
||||
volume_key = f"{key}_volume"
|
||||
if volume_key not in self.volume_windows:
|
||||
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
|
||||
self.volume_windows[volume_key].append(trade.size)
|
||||
|
||||
def get_statistics(self) -> Dict[str, Dict[str, float]]:
|
||||
"""Get current statistics for all tracked symbols"""
|
||||
stats = {}
|
||||
|
||||
for key, window in self.price_windows.items():
|
||||
if len(window) >= 2:
|
||||
try:
|
||||
stats[key] = {
|
||||
'price_mean': statistics.mean(window),
|
||||
'price_std': statistics.stdev(window),
|
||||
'price_min': min(window),
|
||||
'price_max': max(window),
|
||||
'data_points': len(window)
|
||||
}
|
||||
except statistics.StatisticsError:
|
||||
stats[key] = {'error': 'insufficient_data'}
|
||||
|
||||
return stats
|
||||
|
||||
def reset_windows(self, key: Optional[str] = None) -> None:
|
||||
"""Reset rolling windows for a specific key or all keys"""
|
||||
if key:
|
||||
# Reset specific key
|
||||
self.price_windows.pop(key, None)
|
||||
self.volume_windows.pop(f"{key}_volume", None)
|
||||
self.spread_windows.pop(f"{key}_spread", None)
|
||||
self.timestamp_windows.pop(f"{key}_timestamp", None)
|
||||
else:
|
||||
# Reset all windows
|
||||
self.price_windows.clear()
|
||||
self.volume_windows.clear()
|
||||
self.spread_windows.clear()
|
||||
self.timestamp_windows.clear()
|
||||
|
||||
logger.info(f"Reset anomaly detection windows for {key or 'all keys'}")
|
378
COBY/processing/data_processor.py
Normal file
378
COBY/processing/data_processor.py
Normal file
@ -0,0 +1,378 @@
|
||||
"""
|
||||
Main data processor implementation.
|
||||
"""
|
||||
|
||||
from typing import Dict, Union, List, Optional, Any
|
||||
from ..interfaces.data_processor import DataProcessor
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent, OrderBookMetrics
|
||||
from ..utils.logging import get_logger, set_correlation_id
|
||||
from ..utils.exceptions import ValidationError, ProcessingError
|
||||
from ..utils.timing import get_current_timestamp
|
||||
from .quality_checker import DataQualityChecker
|
||||
from .anomaly_detector import AnomalyDetector
|
||||
from .metrics_calculator import MetricsCalculator
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class StandardDataProcessor(DataProcessor):
|
||||
"""
|
||||
Standard implementation of data processor interface.
|
||||
|
||||
Provides:
|
||||
- Data normalization and validation
|
||||
- Quality checking
|
||||
- Anomaly detection
|
||||
- Metrics calculation
|
||||
- Data enrichment
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize data processor with components"""
|
||||
self.quality_checker = DataQualityChecker()
|
||||
self.anomaly_detector = AnomalyDetector()
|
||||
self.metrics_calculator = MetricsCalculator()
|
||||
|
||||
# Processing statistics
|
||||
self.processed_orderbooks = 0
|
||||
self.processed_trades = 0
|
||||
self.quality_failures = 0
|
||||
self.anomalies_detected = 0
|
||||
|
||||
logger.info("Standard data processor initialized")
|
||||
|
||||
def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot:
|
||||
"""
|
||||
Normalize raw order book data to standard format.
|
||||
|
||||
Args:
|
||||
raw_data: Raw order book data from exchange
|
||||
exchange: Exchange name
|
||||
|
||||
Returns:
|
||||
OrderBookSnapshot: Normalized order book data
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# This is a generic implementation - specific exchanges would override
|
||||
# For now, assume data is already in correct format
|
||||
if isinstance(raw_data, OrderBookSnapshot):
|
||||
return raw_data
|
||||
|
||||
# If raw_data is a dict, try to construct OrderBookSnapshot
|
||||
# This would be customized per exchange
|
||||
raise NotImplementedError(
|
||||
"normalize_orderbook should be implemented by exchange-specific processors"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing order book data: {e}")
|
||||
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
|
||||
|
||||
def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent:
|
||||
"""
|
||||
Normalize raw trade data to standard format.
|
||||
|
||||
Args:
|
||||
raw_data: Raw trade data from exchange
|
||||
exchange: Exchange name
|
||||
|
||||
Returns:
|
||||
TradeEvent: Normalized trade data
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# This is a generic implementation - specific exchanges would override
|
||||
if isinstance(raw_data, TradeEvent):
|
||||
return raw_data
|
||||
|
||||
# If raw_data is a dict, try to construct TradeEvent
|
||||
# This would be customized per exchange
|
||||
raise NotImplementedError(
|
||||
"normalize_trade should be implemented by exchange-specific processors"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing trade data: {e}")
|
||||
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
|
||||
|
||||
def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool:
|
||||
"""
|
||||
Validate normalized data for quality and consistency.
|
||||
|
||||
Args:
|
||||
data: Normalized data to validate
|
||||
|
||||
Returns:
|
||||
bool: True if data is valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, issues = self.quality_checker.check_orderbook_quality(data)
|
||||
self.processed_orderbooks += 1
|
||||
|
||||
if quality_score < 0.5: # Threshold for acceptable quality
|
||||
self.quality_failures += 1
|
||||
logger.warning(f"Low quality order book data: score={quality_score:.2f}, issues={issues}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, issues = self.quality_checker.check_trade_quality(data)
|
||||
self.processed_trades += 1
|
||||
|
||||
if quality_score < 0.5:
|
||||
self.quality_failures += 1
|
||||
logger.warning(f"Low quality trade data: score={quality_score:.2f}, issues={issues}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
else:
|
||||
logger.error(f"Unknown data type for validation: {type(data)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating data: {e}")
|
||||
return False
|
||||
|
||||
def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
|
||||
"""
|
||||
Calculate metrics from order book data.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
OrderBookMetrics: Calculated metrics
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
return self.metrics_calculator.calculate_orderbook_metrics(orderbook)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating metrics: {e}")
|
||||
raise ProcessingError(f"Metrics calculation failed: {e}", "METRICS_ERROR")
|
||||
|
||||
def detect_anomalies(self, data: Union[OrderBookSnapshot, TradeEvent]) -> List[str]:
|
||||
"""
|
||||
Detect anomalies in the data.
|
||||
|
||||
Args:
|
||||
data: Data to analyze for anomalies
|
||||
|
||||
Returns:
|
||||
List[str]: List of detected anomaly descriptions
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
anomalies = self.anomaly_detector.detect_orderbook_anomalies(data)
|
||||
elif isinstance(data, TradeEvent):
|
||||
anomalies = self.anomaly_detector.detect_trade_anomalies(data)
|
||||
else:
|
||||
logger.error(f"Unknown data type for anomaly detection: {type(data)}")
|
||||
return ["Unknown data type"]
|
||||
|
||||
if anomalies:
|
||||
self.anomalies_detected += len(anomalies)
|
||||
|
||||
return anomalies
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting anomalies: {e}")
|
||||
return [f"Anomaly detection error: {e}"]
|
||||
|
||||
def filter_data(self, data: Union[OrderBookSnapshot, TradeEvent], criteria: Dict) -> bool:
|
||||
"""
|
||||
Filter data based on criteria.
|
||||
|
||||
Args:
|
||||
data: Data to filter
|
||||
criteria: Filtering criteria
|
||||
|
||||
Returns:
|
||||
bool: True if data passes filter, False otherwise
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
# Symbol filter
|
||||
if 'symbols' in criteria:
|
||||
allowed_symbols = criteria['symbols']
|
||||
if data.symbol not in allowed_symbols:
|
||||
return False
|
||||
|
||||
# Exchange filter
|
||||
if 'exchanges' in criteria:
|
||||
allowed_exchanges = criteria['exchanges']
|
||||
if data.exchange not in allowed_exchanges:
|
||||
return False
|
||||
|
||||
# Quality filter
|
||||
if 'min_quality' in criteria:
|
||||
min_quality = criteria['min_quality']
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, _ = self.quality_checker.check_trade_quality(data)
|
||||
else:
|
||||
quality_score = 0.0
|
||||
|
||||
if quality_score < min_quality:
|
||||
return False
|
||||
|
||||
# Price range filter
|
||||
if 'price_range' in criteria:
|
||||
price_range = criteria['price_range']
|
||||
min_price, max_price = price_range
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
price = data.mid_price
|
||||
elif isinstance(data, TradeEvent):
|
||||
price = data.price
|
||||
else:
|
||||
return False
|
||||
|
||||
if price and (price < min_price or price > max_price):
|
||||
return False
|
||||
|
||||
# Volume filter for trades
|
||||
if 'min_volume' in criteria and isinstance(data, TradeEvent):
|
||||
min_volume = criteria['min_volume']
|
||||
if data.size < min_volume:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error filtering data: {e}")
|
||||
return False
|
||||
|
||||
def enrich_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> Dict:
|
||||
"""
|
||||
Enrich data with additional metadata.
|
||||
|
||||
Args:
|
||||
data: Data to enrich
|
||||
|
||||
Returns:
|
||||
Dict: Enriched data with metadata
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
enriched = {
|
||||
'original_data': data,
|
||||
'processing_timestamp': get_current_timestamp(),
|
||||
'processor_version': '1.0.0'
|
||||
}
|
||||
|
||||
# Add quality metrics
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, quality_issues = self.quality_checker.check_orderbook_quality(data)
|
||||
enriched['quality_score'] = quality_score
|
||||
enriched['quality_issues'] = quality_issues
|
||||
|
||||
# Add calculated metrics
|
||||
try:
|
||||
metrics = self.calculate_metrics(data)
|
||||
enriched['metrics'] = {
|
||||
'mid_price': metrics.mid_price,
|
||||
'spread': metrics.spread,
|
||||
'spread_percentage': metrics.spread_percentage,
|
||||
'volume_imbalance': metrics.volume_imbalance,
|
||||
'depth_10': metrics.depth_10,
|
||||
'depth_50': metrics.depth_50
|
||||
}
|
||||
except Exception as e:
|
||||
enriched['metrics_error'] = str(e)
|
||||
|
||||
# Add liquidity score
|
||||
try:
|
||||
liquidity_score = self.metrics_calculator.calculate_liquidity_score(data)
|
||||
enriched['liquidity_score'] = liquidity_score
|
||||
except Exception as e:
|
||||
enriched['liquidity_error'] = str(e)
|
||||
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, quality_issues = self.quality_checker.check_trade_quality(data)
|
||||
enriched['quality_score'] = quality_score
|
||||
enriched['quality_issues'] = quality_issues
|
||||
|
||||
# Add trade-specific enrichments
|
||||
enriched['trade_value'] = data.price * data.size
|
||||
enriched['side_numeric'] = 1 if data.side == 'buy' else -1
|
||||
|
||||
# Add anomaly detection results
|
||||
anomalies = self.detect_anomalies(data)
|
||||
enriched['anomalies'] = anomalies
|
||||
enriched['anomaly_count'] = len(anomalies)
|
||||
|
||||
return enriched
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error enriching data: {e}")
|
||||
return {
|
||||
'original_data': data,
|
||||
'enrichment_error': str(e)
|
||||
}
|
||||
|
||||
def get_data_quality_score(self, data: Union[OrderBookSnapshot, TradeEvent]) -> float:
|
||||
"""
|
||||
Calculate data quality score.
|
||||
|
||||
Args:
|
||||
data: Data to score
|
||||
|
||||
Returns:
|
||||
float: Quality score between 0.0 and 1.0
|
||||
"""
|
||||
try:
|
||||
set_correlation_id()
|
||||
|
||||
if isinstance(data, OrderBookSnapshot):
|
||||
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
|
||||
elif isinstance(data, TradeEvent):
|
||||
quality_score, _ = self.quality_checker.check_trade_quality(data)
|
||||
else:
|
||||
logger.error(f"Unknown data type for quality scoring: {type(data)}")
|
||||
return 0.0
|
||||
|
||||
return quality_score
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating quality score: {e}")
|
||||
return 0.0
|
||||
|
||||
def get_processing_stats(self) -> Dict[str, Any]:
|
||||
"""Get processing statistics"""
|
||||
return {
|
||||
'processed_orderbooks': self.processed_orderbooks,
|
||||
'processed_trades': self.processed_trades,
|
||||
'quality_failures': self.quality_failures,
|
||||
'anomalies_detected': self.anomalies_detected,
|
||||
'quality_failure_rate': (
|
||||
self.quality_failures / max(1, self.processed_orderbooks + self.processed_trades)
|
||||
),
|
||||
'anomaly_rate': (
|
||||
self.anomalies_detected / max(1, self.processed_orderbooks + self.processed_trades)
|
||||
),
|
||||
'quality_checker_summary': self.quality_checker.get_quality_summary(),
|
||||
'anomaly_detector_stats': self.anomaly_detector.get_statistics()
|
||||
}
|
||||
|
||||
def reset_stats(self) -> None:
|
||||
"""Reset processing statistics"""
|
||||
self.processed_orderbooks = 0
|
||||
self.processed_trades = 0
|
||||
self.quality_failures = 0
|
||||
self.anomalies_detected = 0
|
||||
|
||||
logger.info("Processing statistics reset")
|
275
COBY/processing/metrics_calculator.py
Normal file
275
COBY/processing/metrics_calculator.py
Normal file
@ -0,0 +1,275 @@
|
||||
"""
|
||||
Metrics calculation for order book analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
from ..models.core import OrderBookSnapshot, OrderBookMetrics, ImbalanceMetrics
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class MetricsCalculator:
|
||||
"""
|
||||
Calculates various metrics from order book data.
|
||||
|
||||
Metrics include:
|
||||
- Basic metrics (mid price, spread, volumes)
|
||||
- Imbalance metrics
|
||||
- Depth metrics
|
||||
- Liquidity metrics
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize metrics calculator"""
|
||||
logger.info("Metrics calculator initialized")
|
||||
|
||||
def calculate_orderbook_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
|
||||
"""
|
||||
Calculate comprehensive order book metrics.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
OrderBookMetrics: Calculated metrics
|
||||
"""
|
||||
try:
|
||||
# Basic calculations
|
||||
mid_price = self._calculate_mid_price(orderbook)
|
||||
spread = self._calculate_spread(orderbook)
|
||||
spread_percentage = (spread / mid_price * 100) if mid_price > 0 else 0.0
|
||||
|
||||
# Volume calculations
|
||||
bid_volume = sum(level.size for level in orderbook.bids)
|
||||
ask_volume = sum(level.size for level in orderbook.asks)
|
||||
|
||||
# Imbalance calculation
|
||||
total_volume = bid_volume + ask_volume
|
||||
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
|
||||
|
||||
# Depth calculations
|
||||
depth_10 = self._calculate_depth(orderbook, 10)
|
||||
depth_50 = self._calculate_depth(orderbook, 50)
|
||||
|
||||
return OrderBookMetrics(
|
||||
symbol=orderbook.symbol,
|
||||
exchange=orderbook.exchange,
|
||||
timestamp=orderbook.timestamp,
|
||||
mid_price=mid_price,
|
||||
spread=spread,
|
||||
spread_percentage=spread_percentage,
|
||||
bid_volume=bid_volume,
|
||||
ask_volume=ask_volume,
|
||||
volume_imbalance=volume_imbalance,
|
||||
depth_10=depth_10,
|
||||
depth_50=depth_50
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating order book metrics: {e}")
|
||||
raise
|
||||
|
||||
def calculate_imbalance_metrics(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics:
|
||||
"""
|
||||
Calculate order book imbalance metrics.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
ImbalanceMetrics: Calculated imbalance metrics
|
||||
"""
|
||||
try:
|
||||
# Volume imbalance
|
||||
bid_volume = sum(level.size for level in orderbook.bids)
|
||||
ask_volume = sum(level.size for level in orderbook.asks)
|
||||
total_volume = bid_volume + ask_volume
|
||||
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
|
||||
|
||||
# Price imbalance (weighted by volume)
|
||||
price_imbalance = self._calculate_price_imbalance(orderbook)
|
||||
|
||||
# Depth imbalance
|
||||
depth_imbalance = self._calculate_depth_imbalance(orderbook)
|
||||
|
||||
# Momentum score (simplified - would need historical data for full implementation)
|
||||
momentum_score = volume_imbalance * 0.5 + price_imbalance * 0.3 + depth_imbalance * 0.2
|
||||
|
||||
return ImbalanceMetrics(
|
||||
symbol=orderbook.symbol,
|
||||
timestamp=orderbook.timestamp,
|
||||
volume_imbalance=volume_imbalance,
|
||||
price_imbalance=price_imbalance,
|
||||
depth_imbalance=depth_imbalance,
|
||||
momentum_score=momentum_score
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating imbalance metrics: {e}")
|
||||
raise
|
||||
|
||||
def _calculate_mid_price(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate mid price"""
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
best_bid = orderbook.bids[0].price
|
||||
best_ask = orderbook.asks[0].price
|
||||
|
||||
return (best_bid + best_ask) / 2.0
|
||||
|
||||
def _calculate_spread(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate bid-ask spread"""
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
best_bid = orderbook.bids[0].price
|
||||
best_ask = orderbook.asks[0].price
|
||||
|
||||
return best_ask - best_bid
|
||||
|
||||
def _calculate_depth(self, orderbook: OrderBookSnapshot, levels: int) -> float:
|
||||
"""Calculate market depth for specified number of levels"""
|
||||
bid_depth = sum(
|
||||
level.size for level in orderbook.bids[:levels]
|
||||
)
|
||||
ask_depth = sum(
|
||||
level.size for level in orderbook.asks[:levels]
|
||||
)
|
||||
|
||||
return bid_depth + ask_depth
|
||||
|
||||
def _calculate_price_imbalance(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate price-weighted imbalance"""
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
# Calculate volume-weighted average prices for top levels
|
||||
bid_vwap = self._calculate_vwap(orderbook.bids[:5])
|
||||
ask_vwap = self._calculate_vwap(orderbook.asks[:5])
|
||||
|
||||
if bid_vwap == 0 or ask_vwap == 0:
|
||||
return 0.0
|
||||
|
||||
mid_price = (bid_vwap + ask_vwap) / 2.0
|
||||
|
||||
# Normalize imbalance
|
||||
price_imbalance = (bid_vwap - ask_vwap) / mid_price if mid_price > 0 else 0.0
|
||||
|
||||
return max(-1.0, min(1.0, price_imbalance))
|
||||
|
||||
def _calculate_depth_imbalance(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""Calculate depth imbalance across multiple levels"""
|
||||
levels_to_check = [5, 10, 20]
|
||||
imbalances = []
|
||||
|
||||
for levels in levels_to_check:
|
||||
bid_depth = sum(level.size for level in orderbook.bids[:levels])
|
||||
ask_depth = sum(level.size for level in orderbook.asks[:levels])
|
||||
total_depth = bid_depth + ask_depth
|
||||
|
||||
if total_depth > 0:
|
||||
imbalance = (bid_depth - ask_depth) / total_depth
|
||||
imbalances.append(imbalance)
|
||||
|
||||
# Return weighted average of imbalances
|
||||
if imbalances:
|
||||
return sum(imbalances) / len(imbalances)
|
||||
|
||||
return 0.0
|
||||
|
||||
def _calculate_vwap(self, levels: List) -> float:
|
||||
"""Calculate volume-weighted average price for price levels"""
|
||||
if not levels:
|
||||
return 0.0
|
||||
|
||||
total_volume = sum(level.size for level in levels)
|
||||
if total_volume == 0:
|
||||
return 0.0
|
||||
|
||||
weighted_sum = sum(level.price * level.size for level in levels)
|
||||
|
||||
return weighted_sum / total_volume
|
||||
|
||||
def calculate_liquidity_score(self, orderbook: OrderBookSnapshot) -> float:
|
||||
"""
|
||||
Calculate liquidity score based on depth and spread.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
float: Liquidity score (0.0 to 1.0)
|
||||
"""
|
||||
try:
|
||||
if not orderbook.bids or not orderbook.asks:
|
||||
return 0.0
|
||||
|
||||
# Spread component (lower spread = higher liquidity)
|
||||
spread = self._calculate_spread(orderbook)
|
||||
mid_price = self._calculate_mid_price(orderbook)
|
||||
|
||||
if mid_price == 0:
|
||||
return 0.0
|
||||
|
||||
spread_pct = (spread / mid_price) * 100
|
||||
spread_score = max(0.0, 1.0 - (spread_pct / 5.0)) # Normalize to 5% max spread
|
||||
|
||||
# Depth component (higher depth = higher liquidity)
|
||||
total_depth = self._calculate_depth(orderbook, 10)
|
||||
depth_score = min(1.0, total_depth / 100.0) # Normalize to 100 units max depth
|
||||
|
||||
# Volume balance component (more balanced = higher liquidity)
|
||||
bid_volume = sum(level.size for level in orderbook.bids[:10])
|
||||
ask_volume = sum(level.size for level in orderbook.asks[:10])
|
||||
total_volume = bid_volume + ask_volume
|
||||
|
||||
if total_volume > 0:
|
||||
imbalance = abs(bid_volume - ask_volume) / total_volume
|
||||
balance_score = 1.0 - imbalance
|
||||
else:
|
||||
balance_score = 0.0
|
||||
|
||||
# Weighted combination
|
||||
liquidity_score = (spread_score * 0.4 + depth_score * 0.4 + balance_score * 0.2)
|
||||
|
||||
return max(0.0, min(1.0, liquidity_score))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating liquidity score: {e}")
|
||||
return 0.0
|
||||
|
||||
def get_market_summary(self, orderbook: OrderBookSnapshot) -> Dict[str, float]:
|
||||
"""
|
||||
Get comprehensive market summary.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot
|
||||
|
||||
Returns:
|
||||
Dict[str, float]: Market summary metrics
|
||||
"""
|
||||
try:
|
||||
metrics = self.calculate_orderbook_metrics(orderbook)
|
||||
imbalance = self.calculate_imbalance_metrics(orderbook)
|
||||
liquidity = self.calculate_liquidity_score(orderbook)
|
||||
|
||||
return {
|
||||
'mid_price': metrics.mid_price,
|
||||
'spread': metrics.spread,
|
||||
'spread_percentage': metrics.spread_percentage,
|
||||
'bid_volume': metrics.bid_volume,
|
||||
'ask_volume': metrics.ask_volume,
|
||||
'volume_imbalance': metrics.volume_imbalance,
|
||||
'depth_10': metrics.depth_10,
|
||||
'depth_50': metrics.depth_50,
|
||||
'price_imbalance': imbalance.price_imbalance,
|
||||
'depth_imbalance': imbalance.depth_imbalance,
|
||||
'momentum_score': imbalance.momentum_score,
|
||||
'liquidity_score': liquidity
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating market summary: {e}")
|
||||
return {}
|
288
COBY/processing/quality_checker.py
Normal file
288
COBY/processing/quality_checker.py
Normal file
@ -0,0 +1,288 @@
|
||||
"""
|
||||
Data quality checking and validation for market data.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Union, Optional, Tuple
|
||||
from datetime import datetime, timezone
|
||||
from ..models.core import OrderBookSnapshot, TradeEvent
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.validation import validate_price, validate_volume, validate_symbol
|
||||
from ..utils.timing import get_current_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DataQualityChecker:
|
||||
"""
|
||||
Comprehensive data quality checker for market data.
|
||||
|
||||
Validates:
|
||||
- Data structure integrity
|
||||
- Price and volume ranges
|
||||
- Timestamp consistency
|
||||
- Cross-validation between related data points
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize quality checker with default thresholds"""
|
||||
# Quality thresholds
|
||||
self.max_spread_percentage = 10.0 # Maximum spread as % of mid price
|
||||
self.max_price_change_percentage = 50.0 # Maximum price change between updates
|
||||
self.min_volume_threshold = 0.000001 # Minimum meaningful volume
|
||||
self.max_timestamp_drift = 300 # Maximum seconds drift from current time
|
||||
|
||||
# Price history for validation
|
||||
self.price_history: Dict[str, Dict[str, float]] = {} # symbol -> exchange -> last_price
|
||||
|
||||
logger.info("Data quality checker initialized")
|
||||
|
||||
def check_orderbook_quality(self, orderbook: OrderBookSnapshot) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
Check order book data quality.
|
||||
|
||||
Args:
|
||||
orderbook: Order book snapshot to validate
|
||||
|
||||
Returns:
|
||||
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
|
||||
"""
|
||||
issues = []
|
||||
quality_score = 1.0
|
||||
|
||||
try:
|
||||
# Basic structure validation
|
||||
structure_issues = self._check_orderbook_structure(orderbook)
|
||||
issues.extend(structure_issues)
|
||||
quality_score -= len(structure_issues) * 0.1
|
||||
|
||||
# Price validation
|
||||
price_issues = self._check_orderbook_prices(orderbook)
|
||||
issues.extend(price_issues)
|
||||
quality_score -= len(price_issues) * 0.15
|
||||
|
||||
# Volume validation
|
||||
volume_issues = self._check_orderbook_volumes(orderbook)
|
||||
issues.extend(volume_issues)
|
||||
quality_score -= len(volume_issues) * 0.1
|
||||
|
||||
# Spread validation
|
||||
spread_issues = self._check_orderbook_spread(orderbook)
|
||||
issues.extend(spread_issues)
|
||||
quality_score -= len(spread_issues) * 0.2
|
||||
|
||||
# Timestamp validation
|
||||
timestamp_issues = self._check_timestamp(orderbook.timestamp)
|
||||
issues.extend(timestamp_issues)
|
||||
quality_score -= len(timestamp_issues) * 0.1
|
||||
|
||||
# Cross-validation with history
|
||||
history_issues = self._check_price_history(orderbook)
|
||||
issues.extend(history_issues)
|
||||
quality_score -= len(history_issues) * 0.15
|
||||
|
||||
# Update price history
|
||||
self._update_price_history(orderbook)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking order book quality: {e}")
|
||||
issues.append(f"Quality check error: {e}")
|
||||
quality_score = 0.0
|
||||
|
||||
# Ensure score is within bounds
|
||||
quality_score = max(0.0, min(1.0, quality_score))
|
||||
|
||||
if issues:
|
||||
logger.debug(f"Order book quality issues for {orderbook.symbol}@{orderbook.exchange}: {issues}")
|
||||
|
||||
return quality_score, issues de
|
||||
f check_trade_quality(self, trade: TradeEvent) -> Tuple[float, List[str]]:
|
||||
"""
|
||||
Check trade data quality.
|
||||
|
||||
Args:
|
||||
trade: Trade event to validate
|
||||
|
||||
Returns:
|
||||
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
|
||||
"""
|
||||
issues = []
|
||||
quality_score = 1.0
|
||||
|
||||
try:
|
||||
# Basic structure validation
|
||||
if not validate_symbol(trade.symbol):
|
||||
issues.append("Invalid symbol format")
|
||||
|
||||
if not trade.exchange:
|
||||
issues.append("Missing exchange")
|
||||
|
||||
if not trade.trade_id:
|
||||
issues.append("Missing trade ID")
|
||||
|
||||
# Price validation
|
||||
if not validate_price(trade.price):
|
||||
issues.append(f"Invalid price: {trade.price}")
|
||||
|
||||
# Volume validation
|
||||
if not validate_volume(trade.size):
|
||||
issues.append(f"Invalid size: {trade.size}")
|
||||
|
||||
if trade.size < self.min_volume_threshold:
|
||||
issues.append(f"Size below threshold: {trade.size}")
|
||||
|
||||
# Side validation
|
||||
if trade.side not in ['buy', 'sell']:
|
||||
issues.append(f"Invalid side: {trade.side}")
|
||||
|
||||
# Timestamp validation
|
||||
timestamp_issues = self._check_timestamp(trade.timestamp)
|
||||
issues.extend(timestamp_issues)
|
||||
|
||||
# Calculate quality score
|
||||
quality_score -= len(issues) * 0.2
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking trade quality: {e}")
|
||||
issues.append(f"Quality check error: {e}")
|
||||
quality_score = 0.0
|
||||
|
||||
# Ensure score is within bounds
|
||||
quality_score = max(0.0, min(1.0, quality_score))
|
||||
|
||||
if issues:
|
||||
logger.debug(f"Trade quality issues for {trade.symbol}@{trade.exchange}: {issues}")
|
||||
|
||||
return quality_score, issues
|
||||
|
||||
def _check_orderbook_structure(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check basic order book structure"""
|
||||
issues = []
|
||||
|
||||
if not validate_symbol(orderbook.symbol):
|
||||
issues.append("Invalid symbol format")
|
||||
|
||||
if not orderbook.exchange:
|
||||
issues.append("Missing exchange")
|
||||
|
||||
if not orderbook.bids:
|
||||
issues.append("No bid levels")
|
||||
|
||||
if not orderbook.asks:
|
||||
issues.append("No ask levels")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_orderbook_prices(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check order book price validity"""
|
||||
issues = []
|
||||
|
||||
# Check bid prices (should be descending)
|
||||
for i, bid in enumerate(orderbook.bids):
|
||||
if not validate_price(bid.price):
|
||||
issues.append(f"Invalid bid price at level {i}: {bid.price}")
|
||||
|
||||
if i > 0 and bid.price >= orderbook.bids[i-1].price:
|
||||
issues.append(f"Bid prices not descending at level {i}")
|
||||
|
||||
# Check ask prices (should be ascending)
|
||||
for i, ask in enumerate(orderbook.asks):
|
||||
if not validate_price(ask.price):
|
||||
issues.append(f"Invalid ask price at level {i}: {ask.price}")
|
||||
|
||||
if i > 0 and ask.price <= orderbook.asks[i-1].price:
|
||||
issues.append(f"Ask prices not ascending at level {i}")
|
||||
|
||||
# Check bid-ask ordering
|
||||
if orderbook.bids and orderbook.asks:
|
||||
if orderbook.bids[0].price >= orderbook.asks[0].price:
|
||||
issues.append("Best bid >= best ask (crossed book)")
|
||||
|
||||
return issues def
|
||||
_check_orderbook_volumes(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check order book volume validity"""
|
||||
issues = []
|
||||
|
||||
# Check bid volumes
|
||||
for i, bid in enumerate(orderbook.bids):
|
||||
if not validate_volume(bid.size):
|
||||
issues.append(f"Invalid bid volume at level {i}: {bid.size}")
|
||||
|
||||
if bid.size < self.min_volume_threshold:
|
||||
issues.append(f"Bid volume below threshold at level {i}: {bid.size}")
|
||||
|
||||
# Check ask volumes
|
||||
for i, ask in enumerate(orderbook.asks):
|
||||
if not validate_volume(ask.size):
|
||||
issues.append(f"Invalid ask volume at level {i}: {ask.size}")
|
||||
|
||||
if ask.size < self.min_volume_threshold:
|
||||
issues.append(f"Ask volume below threshold at level {i}: {ask.size}")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_orderbook_spread(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check order book spread validity"""
|
||||
issues = []
|
||||
|
||||
if orderbook.mid_price and orderbook.spread:
|
||||
spread_percentage = (orderbook.spread / orderbook.mid_price) * 100
|
||||
|
||||
if spread_percentage > self.max_spread_percentage:
|
||||
issues.append(f"Spread too wide: {spread_percentage:.2f}%")
|
||||
|
||||
if spread_percentage < 0:
|
||||
issues.append(f"Negative spread: {spread_percentage:.2f}%")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_timestamp(self, timestamp: datetime) -> List[str]:
|
||||
"""Check timestamp validity"""
|
||||
issues = []
|
||||
|
||||
if not timestamp:
|
||||
issues.append("Missing timestamp")
|
||||
return issues
|
||||
|
||||
# Check if timestamp is timezone-aware
|
||||
if timestamp.tzinfo is None:
|
||||
issues.append("Timestamp missing timezone info")
|
||||
|
||||
# Check timestamp drift
|
||||
current_time = get_current_timestamp()
|
||||
time_diff = abs((timestamp - current_time).total_seconds())
|
||||
|
||||
if time_diff > self.max_timestamp_drift:
|
||||
issues.append(f"Timestamp drift too large: {time_diff:.1f}s")
|
||||
|
||||
return issues
|
||||
|
||||
def _check_price_history(self, orderbook: OrderBookSnapshot) -> List[str]:
|
||||
"""Check price consistency with history"""
|
||||
issues = []
|
||||
|
||||
key = f"{orderbook.symbol}_{orderbook.exchange}"
|
||||
|
||||
if key in self.price_history and orderbook.mid_price:
|
||||
last_price = self.price_history[key]
|
||||
price_change = abs(orderbook.mid_price - last_price) / last_price * 100
|
||||
|
||||
if price_change > self.max_price_change_percentage:
|
||||
issues.append(f"Large price change: {price_change:.2f}%")
|
||||
|
||||
return issues
|
||||
|
||||
def _update_price_history(self, orderbook: OrderBookSnapshot) -> None:
|
||||
"""Update price history for future validation"""
|
||||
if orderbook.mid_price:
|
||||
key = f"{orderbook.symbol}_{orderbook.exchange}"
|
||||
self.price_history[key] = orderbook.mid_price
|
||||
|
||||
def get_quality_summary(self) -> Dict[str, int]:
|
||||
"""Get summary of quality checks performed"""
|
||||
return {
|
||||
'symbols_tracked': len(self.price_history),
|
||||
'max_spread_percentage': self.max_spread_percentage,
|
||||
'max_price_change_percentage': self.max_price_change_percentage,
|
||||
'min_volume_threshold': self.min_volume_threshold,
|
||||
'max_timestamp_drift': self.max_timestamp_drift
|
||||
}
|
Reference in New Issue
Block a user