""" Anomaly detection for market data. """ import statistics from typing import Dict, List, Union, Optional, Deque from collections import deque from datetime import datetime, timedelta from ..models.core import OrderBookSnapshot, TradeEvent from ..utils.logging import get_logger from ..utils.timing import get_current_timestamp logger = get_logger(__name__) class AnomalyDetector: """ Detects anomalies in market data using statistical methods. Detects: - Price spikes and drops - Volume anomalies - Spread anomalies - Frequency anomalies """ def __init__(self, window_size: int = 100, z_score_threshold: float = 3.0): """ Initialize anomaly detector. Args: window_size: Size of rolling window for statistics z_score_threshold: Z-score threshold for anomaly detection """ self.window_size = window_size self.z_score_threshold = z_score_threshold # Rolling windows for statistics self.price_windows: Dict[str, Deque[float]] = {} self.volume_windows: Dict[str, Deque[float]] = {} self.spread_windows: Dict[str, Deque[float]] = {} self.timestamp_windows: Dict[str, Deque[datetime]] = {} logger.info(f"Anomaly detector initialized with window_size={window_size}, threshold={z_score_threshold}") def detect_orderbook_anomalies(self, orderbook: OrderBookSnapshot) -> List[str]: """ Detect anomalies in order book data. Args: orderbook: Order book snapshot to analyze Returns: List[str]: List of detected anomalies """ anomalies = [] key = f"{orderbook.symbol}_{orderbook.exchange}" try: # Price anomalies if orderbook.mid_price: price_anomalies = self._detect_price_anomalies(key, orderbook.mid_price) anomalies.extend(price_anomalies) # Volume anomalies total_volume = orderbook.bid_volume + orderbook.ask_volume volume_anomalies = self._detect_volume_anomalies(key, total_volume) anomalies.extend(volume_anomalies) # Spread anomalies if orderbook.spread and orderbook.mid_price: spread_pct = (orderbook.spread / orderbook.mid_price) * 100 spread_anomalies = self._detect_spread_anomalies(key, spread_pct) anomalies.extend(spread_anomalies) # Frequency anomalies frequency_anomalies = self._detect_frequency_anomalies(key, orderbook.timestamp) anomalies.extend(frequency_anomalies) # Update windows self._update_windows(key, orderbook) except Exception as e: logger.error(f"Error detecting order book anomalies: {e}") anomalies.append(f"Anomaly detection error: {e}") if anomalies: logger.warning(f"Anomalies detected in {orderbook.symbol}@{orderbook.exchange}: {anomalies}") return anomalies def detect_trade_anomalies(self, trade: TradeEvent) -> List[str]: """ Detect anomalies in trade data. Args: trade: Trade event to analyze Returns: List[str]: List of detected anomalies """ anomalies = [] key = f"{trade.symbol}_{trade.exchange}_trade" try: # Price anomalies price_anomalies = self._detect_price_anomalies(key, trade.price) anomalies.extend(price_anomalies) # Volume anomalies volume_anomalies = self._detect_volume_anomalies(key, trade.size) anomalies.extend(volume_anomalies) # Update windows self._update_trade_windows(key, trade) except Exception as e: logger.error(f"Error detecting trade anomalies: {e}") anomalies.append(f"Anomaly detection error: {e}") if anomalies: logger.warning(f"Trade anomalies detected in {trade.symbol}@{trade.exchange}: {anomalies}") return anomalies def _detect_price_anomalies(self, key: str, price: float) -> List[str]: """Detect price anomalies using z-score""" anomalies = [] if key not in self.price_windows: self.price_windows[key] = deque(maxlen=self.window_size) return anomalies window = self.price_windows[key] if len(window) < 10: # Need minimum data points return anomalies try: mean_price = statistics.mean(window) std_price = statistics.stdev(window) if std_price > 0: z_score = abs(price - mean_price) / std_price if z_score > self.z_score_threshold: direction = "spike" if price > mean_price else "drop" anomalies.append(f"Price {direction}: {price:.6f} (z-score: {z_score:.2f})") except statistics.StatisticsError: pass # Not enough data or all values are the same return anomalies def _detect_volume_anomalies(self, key: str, volume: float) -> List[str]: """Detect volume anomalies using z-score""" anomalies = [] volume_key = f"{key}_volume" if volume_key not in self.volume_windows: self.volume_windows[volume_key] = deque(maxlen=self.window_size) return anomalies window = self.volume_windows[volume_key] if len(window) < 10: return anomalies try: mean_volume = statistics.mean(window) std_volume = statistics.stdev(window) if std_volume > 0: z_score = abs(volume - mean_volume) / std_volume if z_score > self.z_score_threshold: direction = "spike" if volume > mean_volume else "drop" anomalies.append(f"Volume {direction}: {volume:.6f} (z-score: {z_score:.2f})") except statistics.StatisticsError: pass return anomalies def _detect_spread_anomalies(self, key: str, spread_pct: float) -> List[str]: """Detect spread anomalies using z-score""" anomalies = [] spread_key = f"{key}_spread" if spread_key not in self.spread_windows: self.spread_windows[spread_key] = deque(maxlen=self.window_size) return anomalies window = self.spread_windows[spread_key] if len(window) < 10: return anomalies try: mean_spread = statistics.mean(window) std_spread = statistics.stdev(window) if std_spread > 0: z_score = abs(spread_pct - mean_spread) / std_spread if z_score > self.z_score_threshold: direction = "widening" if spread_pct > mean_spread else "tightening" anomalies.append(f"Spread {direction}: {spread_pct:.4f}% (z-score: {z_score:.2f})") except statistics.StatisticsError: pass return anomalies def _detect_frequency_anomalies(self, key: str, timestamp: datetime) -> List[str]: """Detect frequency anomalies in data updates""" anomalies = [] timestamp_key = f"{key}_timestamp" if timestamp_key not in self.timestamp_windows: self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size) return anomalies window = self.timestamp_windows[timestamp_key] if len(window) < 5: return anomalies try: # Calculate intervals between updates intervals = [] for i in range(1, len(window)): interval = (window[i] - window[i-1]).total_seconds() intervals.append(interval) if len(intervals) >= 5: mean_interval = statistics.mean(intervals) std_interval = statistics.stdev(intervals) # Check current interval current_interval = (timestamp - window[-1]).total_seconds() if std_interval > 0: z_score = abs(current_interval - mean_interval) / std_interval if z_score > self.z_score_threshold: if current_interval > mean_interval: anomalies.append(f"Update delay: {current_interval:.1f}s (expected: {mean_interval:.1f}s)") else: anomalies.append(f"Update burst: {current_interval:.1f}s (expected: {mean_interval:.1f}s)") except (statistics.StatisticsError, IndexError): pass return anomalies def _update_windows(self, key: str, orderbook: OrderBookSnapshot) -> None: """Update rolling windows with new data""" # Update price window if orderbook.mid_price: if key not in self.price_windows: self.price_windows[key] = deque(maxlen=self.window_size) self.price_windows[key].append(orderbook.mid_price) # Update volume window total_volume = orderbook.bid_volume + orderbook.ask_volume volume_key = f"{key}_volume" if volume_key not in self.volume_windows: self.volume_windows[volume_key] = deque(maxlen=self.window_size) self.volume_windows[volume_key].append(total_volume) # Update spread window if orderbook.spread and orderbook.mid_price: spread_pct = (orderbook.spread / orderbook.mid_price) * 100 spread_key = f"{key}_spread" if spread_key not in self.spread_windows: self.spread_windows[spread_key] = deque(maxlen=self.window_size) self.spread_windows[spread_key].append(spread_pct) # Update timestamp window timestamp_key = f"{key}_timestamp" if timestamp_key not in self.timestamp_windows: self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size) self.timestamp_windows[timestamp_key].append(orderbook.timestamp) def _update_trade_windows(self, key: str, trade: TradeEvent) -> None: """Update rolling windows with trade data""" # Update price window if key not in self.price_windows: self.price_windows[key] = deque(maxlen=self.window_size) self.price_windows[key].append(trade.price) # Update volume window volume_key = f"{key}_volume" if volume_key not in self.volume_windows: self.volume_windows[volume_key] = deque(maxlen=self.window_size) self.volume_windows[volume_key].append(trade.size) def get_statistics(self) -> Dict[str, Dict[str, float]]: """Get current statistics for all tracked symbols""" stats = {} for key, window in self.price_windows.items(): if len(window) >= 2: try: stats[key] = { 'price_mean': statistics.mean(window), 'price_std': statistics.stdev(window), 'price_min': min(window), 'price_max': max(window), 'data_points': len(window) } except statistics.StatisticsError: stats[key] = {'error': 'insufficient_data'} return stats def reset_windows(self, key: Optional[str] = None) -> None: """Reset rolling windows for a specific key or all keys""" if key: # Reset specific key self.price_windows.pop(key, None) self.volume_windows.pop(f"{key}_volume", None) self.spread_windows.pop(f"{key}_spread", None) self.timestamp_windows.pop(f"{key}_timestamp", None) else: # Reset all windows self.price_windows.clear() self.volume_windows.clear() self.spread_windows.clear() self.timestamp_windows.clear() logger.info(f"Reset anomaly detection windows for {key or 'all keys'}")