bucket aggregation

2025-08-04 17:28:55 +03:00
parent 504736c0f7
commit de77b0afa8
10 changed files with 2592 additions and 0 deletions
--- a/.kiro/specs/multi-exchange-data-aggregation/tasks.md
+++ b/.kiro/specs/multi-exchange-data-aggregation/tasks.md
@@ -23,10 +23,15 @@
  - _Requirements: 3.1, 3.2, 3.3, 3.4_
 - [ ] 3. Create base exchange connector framework
  - Implement abstract base class for exchange WebSocket connectors
  - Create connection management with exponential backoff and circuit breaker patterns
  - Implement WebSocket message handling with proper error recovery
  - Add connection status monitoring and health checks
  - _Requirements: 1.1, 1.3, 1.4, 8.5_
--- a/COBY/connectors/binance_connector.py
+++ b/COBY/connectors/binance_connector.py
@@ -0,0 +1,489 @@
 """
 Binance exchange connector implementation.
 """
 import json
 from typing import Dict, List, Optional, Any
 from datetime import datetime, timezone
 from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
 from ..utils.logging import get_logger, set_correlation_id
 from ..utils.exceptions import ValidationError
 from ..utils.validation import validate_symbol, validate_price, validate_volume
 from .base_connector import BaseExchangeConnector
 logger = get_logger(__name__)
 class BinanceConnector(BaseExchangeConnector):
    """
    Binance WebSocket connector implementation.
    Supports:
    - Order book depth streams
    - Trade streams
    - Symbol normalization
    - Real-time data processing
    """
    # Binance WebSocket URLs
    WEBSOCKET_URL = "wss://stream.binance.com:9443/ws"
    API_URL = "https://api.binance.com/api/v3"
    def __init__(self):
        """Initialize Binance connector"""
        super().__init__("binance", self.WEBSOCKET_URL)
        # Binance-specific message handlers
        self.message_handlers.update({
            'depthUpdate': self._handle_orderbook_update,
            'trade': self._handle_trade_update,
            'error': self._handle_error_message
        })
        # Stream management
        self.active_streams: List[str] = []
        self.stream_id = 1
        logger.info("Binance connector initialized")
    def _get_message_type(self, data: Dict) -> str:
        """
        Determine message type from Binance message data.
        Args:
            data: Parsed message data
        Returns:
            str: Message type identifier
        """
        # Binance uses 'e' field for event type
        if 'e' in data:
            return data['e']
        # Handle error messages
        if 'error' in data:
            return 'error'
        # Handle subscription confirmations
        if 'result' in data and 'id' in data:
            return 'subscription_response'
        return 'unknown'
    def normalize_symbol(self, symbol: str) -> str:
        """
        Normalize symbol to Binance format.
        Args:
            symbol: Standard symbol format (e.g., 'BTCUSDT')
        Returns:
            str: Binance symbol format (e.g., 'BTCUSDT')
        """
        # Binance uses uppercase symbols without separators
        normalized = symbol.upper().replace('-', '').replace('/', '')
        # Validate symbol format
        if not validate_symbol(normalized):
            raise ValidationError(f"Invalid symbol format: {symbol}", "INVALID_SYMBOL")
        return normalized
    async def subscribe_orderbook(self, symbol: str) -> None:
        """
        Subscribe to order book depth updates for a symbol.
        Args:
            symbol: Trading symbol (e.g., 'BTCUSDT')
        """
        try:
            set_correlation_id()
            normalized_symbol = self.normalize_symbol(symbol)
            stream_name = f"{normalized_symbol.lower()}@depth@100ms"
            # Create subscription message
            subscription_msg = {
                "method": "SUBSCRIBE",
                "params": [stream_name],
                "id": self.stream_id
            }
            # Send subscription
            success = await self._send_message(subscription_msg)
            if success:
                # Track subscription
                if symbol not in self.subscriptions:
                    self.subscriptions[symbol] = []
                if 'orderbook' not in self.subscriptions[symbol]:
                    self.subscriptions[symbol].append('orderbook')
                self.active_streams.append(stream_name)
                self.stream_id += 1
                logger.info(f"Subscribed to order book for {symbol} on Binance")
            else:
                logger.error(f"Failed to subscribe to order book for {symbol} on Binance")
        except Exception as e:
            logger.error(f"Error subscribing to order book for {symbol}: {e}")
            raise
    async def subscribe_trades(self, symbol: str) -> None:
        """
        Subscribe to trade updates for a symbol.
        Args:
            symbol: Trading symbol (e.g., 'BTCUSDT')
        """
        try:
            set_correlation_id()
            normalized_symbol = self.normalize_symbol(symbol)
            stream_name = f"{normalized_symbol.lower()}@trade"
            # Create subscription message
            subscription_msg = {
                "method": "SUBSCRIBE",
                "params": [stream_name],
                "id": self.stream_id
            }
            # Send subscription
            success = await self._send_message(subscription_msg)
            if success:
                # Track subscription
                if symbol not in self.subscriptions:
                    self.subscriptions[symbol] = []
                if 'trades' not in self.subscriptions[symbol]:
                    self.subscriptions[symbol].append('trades')
                self.active_streams.append(stream_name)
                self.stream_id += 1
                logger.info(f"Subscribed to trades for {symbol} on Binance")
            else:
                logger.error(f"Failed to subscribe to trades for {symbol} on Binance")
        except Exception as e:
            logger.error(f"Error subscribing to trades for {symbol}: {e}")
            raise
    async def unsubscribe_orderbook(self, symbol: str) -> None:
        """
        Unsubscribe from order book updates for a symbol.
        Args:
            symbol: Trading symbol (e.g., 'BTCUSDT')
        """
        try:
            normalized_symbol = self.normalize_symbol(symbol)
            stream_name = f"{normalized_symbol.lower()}@depth@100ms"
            # Create unsubscription message
            unsubscription_msg = {
                "method": "UNSUBSCRIBE",
                "params": [stream_name],
                "id": self.stream_id
            }
            # Send unsubscription
            success = await self._send_message(unsubscription_msg)
            if success:
                # Remove from tracking
                if symbol in self.subscriptions and 'orderbook' in self.subscriptions[symbol]:
                    self.subscriptions[symbol].remove('orderbook')
                    if not self.subscriptions[symbol]:
                        del self.subscriptions[symbol]
                if stream_name in self.active_streams:
                    self.active_streams.remove(stream_name)
                self.stream_id += 1
                logger.info(f"Unsubscribed from order book for {symbol} on Binance")
            else:
                logger.error(f"Failed to unsubscribe from order book for {symbol} on Binance")
        except Exception as e:
            logger.error(f"Error unsubscribing from order book for {symbol}: {e}")
            raise
    async def unsubscribe_trades(self, symbol: str) -> None:
        """
        Unsubscribe from trade updates for a symbol.
        Args:
            symbol: Trading symbol (e.g., 'BTCUSDT')
        """
        try:
            normalized_symbol = self.normalize_symbol(symbol)
            stream_name = f"{normalized_symbol.lower()}@trade"
            # Create unsubscription message
            unsubscription_msg = {
                "method": "UNSUBSCRIBE",
                "params": [stream_name],
                "id": self.stream_id
            }
            # Send unsubscription
            success = await self._send_message(unsubscription_msg)
            if success:
                # Remove from tracking
                if symbol in self.subscriptions and 'trades' in self.subscriptions[symbol]:
                    self.subscriptions[symbol].remove('trades')
                    if not self.subscriptions[symbol]:
                        del self.subscriptions[symbol]
                if stream_name in self.active_streams:
                    self.active_streams.remove(stream_name)
                self.stream_id += 1
                logger.info(f"Unsubscribed from trades for {symbol} on Binance")
            else:
                logger.error(f"Failed to unsubscribe from trades for {symbol} on Binance")
        except Exception as e:
            logger.error(f"Error unsubscribing from trades for {symbol}: {e}")
            raise
    async def get_symbols(self) -> List[str]:
        """
        Get list of available trading symbols from Binance.
        Returns:
            List[str]: List of available symbols
        """
        try:
            import aiohttp
            async with aiohttp.ClientSession() as session:
                async with session.get(f"{self.API_URL}/exchangeInfo") as response:
                    if response.status == 200:
                        data = await response.json()
                        symbols = [
                            symbol_info['symbol'] 
                            for symbol_info in data.get('symbols', [])
                            if symbol_info.get('status') == 'TRADING'
                        ]
                        logger.info(f"Retrieved {len(symbols)} symbols from Binance")
                        return symbols
                    else:
                        logger.error(f"Failed to get symbols from Binance: HTTP {response.status}")
                        return []
        except Exception as e:
            logger.error(f"Error getting symbols from Binance: {e}")
            return []
    async def get_orderbook_snapshot(self, symbol: str, depth: int = 20) -> Optional[OrderBookSnapshot]:
        """
        Get current order book snapshot from Binance REST API.
        Args:
            symbol: Trading symbol
            depth: Number of price levels to retrieve
        Returns:
            OrderBookSnapshot: Current order book or None if unavailable
        """
        try:
            import aiohttp
            normalized_symbol = self.normalize_symbol(symbol)
            # Binance supports depths: 5, 10, 20, 50, 100, 500, 1000, 5000
            valid_depths = [5, 10, 20, 50, 100, 500, 1000, 5000]
            api_depth = min(valid_depths, key=lambda x: abs(x - depth))
            url = f"{self.API_URL}/depth"
            params = {
                'symbol': normalized_symbol,
                'limit': api_depth
            }
            async with aiohttp.ClientSession() as session:
                async with session.get(url, params=params) as response:
                    if response.status == 200:
                        data = await response.json()
                        return self._parse_orderbook_snapshot(data, symbol)
                    else:
                        logger.error(f"Failed to get order book for {symbol}: HTTP {response.status}")
                        return None
        except Exception as e:
            logger.error(f"Error getting order book snapshot for {symbol}: {e}")
            return None
    def _parse_orderbook_snapshot(self, data: Dict, symbol: str) -> OrderBookSnapshot:
        """
        Parse Binance order book data into OrderBookSnapshot.
        Args:
            data: Raw Binance order book data
            symbol: Trading symbol
        Returns:
            OrderBookSnapshot: Parsed order book
        """
        try:
            # Parse bids and asks
            bids = []
            for bid_data in data.get('bids', []):
                price = float(bid_data[0])
                size = float(bid_data[1])
                if validate_price(price) and validate_volume(size):
                    bids.append(PriceLevel(price=price, size=size))
            asks = []
            for ask_data in data.get('asks', []):
                price = float(ask_data[0])
                size = float(ask_data[1])
                if validate_price(price) and validate_volume(size):
                    asks.append(PriceLevel(price=price, size=size))
            # Create order book snapshot
            orderbook = OrderBookSnapshot(
                symbol=symbol,
                exchange=self.exchange_name,
                timestamp=datetime.now(timezone.utc),
                bids=bids,
                asks=asks,
                sequence_id=data.get('lastUpdateId')
            )
            return orderbook
        except Exception as e:
            logger.error(f"Error parsing order book snapshot: {e}")
            raise ValidationError(f"Invalid order book data: {e}", "PARSE_ERROR")
    async def _handle_orderbook_update(self, data: Dict) -> None:
        """
        Handle order book depth update from Binance.
        Args:
            data: Order book update data
        """
        try:
            set_correlation_id()
            # Extract symbol from stream name
            stream = data.get('s', '').upper()
            if not stream:
                logger.warning("Order book update missing symbol")
                return
            # Parse bids and asks
            bids = []
            for bid_data in data.get('b', []):
                price = float(bid_data[0])
                size = float(bid_data[1])
                if validate_price(price) and validate_volume(size):
                    bids.append(PriceLevel(price=price, size=size))
            asks = []
            for ask_data in data.get('a', []):
                price = float(ask_data[0])
                size = float(ask_data[1])
                if validate_price(price) and validate_volume(size):
                    asks.append(PriceLevel(price=price, size=size))
            # Create order book snapshot
            orderbook = OrderBookSnapshot(
                symbol=stream,
                exchange=self.exchange_name,
                timestamp=datetime.fromtimestamp(data.get('E', 0) / 1000, tz=timezone.utc),
                bids=bids,
                asks=asks,
                sequence_id=data.get('u')  # Final update ID
            )
            # Notify callbacks
            self._notify_data_callbacks(orderbook)
            logger.debug(f"Processed order book update for {stream}")
        except Exception as e:
            logger.error(f"Error handling order book update: {e}")
    async def _handle_trade_update(self, data: Dict) -> None:
        """
        Handle trade update from Binance.
        Args:
            data: Trade update data
        """
        try:
            set_correlation_id()
            # Extract trade data
            symbol = data.get('s', '').upper()
            if not symbol:
                logger.warning("Trade update missing symbol")
                return
            price = float(data.get('p', 0))
            size = float(data.get('q', 0))
            # Validate data
            if not validate_price(price) or not validate_volume(size):
                logger.warning(f"Invalid trade data: price={price}, size={size}")
                return
            # Determine side (Binance uses 'm' field - true if buyer is market maker)
            is_buyer_maker = data.get('m', False)
            side = 'sell' if is_buyer_maker else 'buy'
            # Create trade event
            trade = TradeEvent(
                symbol=symbol,
                exchange=self.exchange_name,
                timestamp=datetime.fromtimestamp(data.get('T', 0) / 1000, tz=timezone.utc),
                price=price,
                size=size,
                side=side,
                trade_id=str(data.get('t', ''))
            )
            # Notify callbacks
            self._notify_data_callbacks(trade)
            logger.debug(f"Processed trade for {symbol}: {side} {size} @ {price}")
        except Exception as e:
            logger.error(f"Error handling trade update: {e}")
    async def _handle_error_message(self, data: Dict) -> None:
        """
        Handle error message from Binance.
        Args:
            data: Error message data
        """
        error_code = data.get('code', 'unknown')
        error_msg = data.get('msg', 'Unknown error')
        logger.error(f"Binance error {error_code}: {error_msg}")
        # Handle specific error codes
        if error_code == -1121:  # Invalid symbol
            logger.error("Invalid symbol error - check symbol format")
        elif error_code == -1130:  # Invalid listen key
            logger.error("Invalid listen key - may need to reconnect")
    def get_binance_stats(self) -> Dict[str, Any]:
        """Get Binance-specific statistics"""
        base_stats = self.get_stats()
        binance_stats = {
            'active_streams': len(self.active_streams),
            'stream_list': self.active_streams.copy(),
            'next_stream_id': self.stream_id
        }
        base_stats.update(binance_stats)
        return base_stats
--- a/COBY/examples/binance_example.py
+++ b/COBY/examples/binance_example.py
@@ -0,0 +1,168 @@
 #!/usr/bin/env python3
 """
 Example usage of Binance connector.
 """
 import asyncio
 import sys
 from pathlib import Path
 # Add COBY to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from connectors.binance_connector import BinanceConnector
 from utils.logging import setup_logging, get_logger
 from models.core import OrderBookSnapshot, TradeEvent
 # Setup logging
 setup_logging(level='INFO', console_output=True)
 logger = get_logger(__name__)
 class BinanceExample:
    """Example Binance connector usage"""
    def __init__(self):
        self.connector = BinanceConnector()
        self.orderbook_count = 0
        self.trade_count = 0
        # Add data callbacks
        self.connector.add_data_callback(self.on_data_received)
        self.connector.add_status_callback(self.on_status_changed)
    def on_data_received(self, data):
        """Handle received data"""
        if isinstance(data, OrderBookSnapshot):
            self.orderbook_count += 1
            logger.info(
                f"📊 Order Book {self.orderbook_count}: {data.symbol} - "
                f"Mid: ${data.mid_price:.2f}, Spread: ${data.spread:.2f}, "
                f"Bids: {len(data.bids)}, Asks: {len(data.asks)}"
            )
        elif isinstance(data, TradeEvent):
            self.trade_count += 1
            logger.info(
                f"💰 Trade {self.trade_count}: {data.symbol} - "
                f"{data.side.upper()} {data.size} @ ${data.price:.2f}"
            )
    def on_status_changed(self, exchange, status):
        """Handle status changes"""
        logger.info(f"🔄 {exchange} status changed to: {status.value}")
    async def run_example(self):
        """Run the example"""
        try:
            logger.info("🚀 Starting Binance connector example")
            # Connect to Binance
            logger.info("🔌 Connecting to Binance...")
            connected = await self.connector.connect()
            if not connected:
                logger.error("❌ Failed to connect to Binance")
                return
            logger.info("✅ Connected to Binance successfully")
            # Get available symbols
            logger.info("📋 Getting available symbols...")
            symbols = await self.connector.get_symbols()
            logger.info(f"📋 Found {len(symbols)} trading symbols")
            # Show some popular symbols
            popular_symbols = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'BNBUSDT']
            available_popular = [s for s in popular_symbols if s in symbols]
            logger.info(f"📋 Popular symbols available: {available_popular}")
            # Get order book snapshot
            if 'BTCUSDT' in symbols:
                logger.info("📊 Getting BTC order book snapshot...")
                orderbook = await self.connector.get_orderbook_snapshot('BTCUSDT', depth=10)
                if orderbook:
                    logger.info(
                        f"📊 BTC Order Book: Mid=${orderbook.mid_price:.2f}, "
                        f"Spread=${orderbook.spread:.2f}"
                    )
            # Subscribe to real-time data
            logger.info("🔔 Subscribing to real-time data...")
            # Subscribe to BTC order book and trades
            if 'BTCUSDT' in symbols:
                await self.connector.subscribe_orderbook('BTCUSDT')
                await self.connector.subscribe_trades('BTCUSDT')
                logger.info("✅ Subscribed to BTCUSDT order book and trades")
            # Subscribe to ETH order book
            if 'ETHUSDT' in symbols:
                await self.connector.subscribe_orderbook('ETHUSDT')
                logger.info("✅ Subscribed to ETHUSDT order book")
            # Let it run for a while
            logger.info("⏳ Collecting data for 30 seconds...")
            await asyncio.sleep(30)
            # Show statistics
            stats = self.connector.get_binance_stats()
            logger.info("📈 Final Statistics:")
            logger.info(f"   📊 Order books received: {self.orderbook_count}")
            logger.info(f"   💰 Trades received: {self.trade_count}")
            logger.info(f"   📡 Total messages: {stats['message_count']}")
            logger.info(f"   ❌ Errors: {stats['error_count']}")
            logger.info(f"   🔗 Active streams: {stats['active_streams']}")
            logger.info(f"   📋 Subscriptions: {list(stats['subscriptions'].keys())}")
            # Unsubscribe and disconnect
            logger.info("🔌 Cleaning up...")
            if 'BTCUSDT' in self.connector.subscriptions:
                await self.connector.unsubscribe_orderbook('BTCUSDT')
                await self.connector.unsubscribe_trades('BTCUSDT')
            if 'ETHUSDT' in self.connector.subscriptions:
                await self.connector.unsubscribe_orderbook('ETHUSDT')
            await self.connector.disconnect()
            logger.info("✅ Disconnected successfully")
        except KeyboardInterrupt:
            logger.info("⏹️ Interrupted by user")
        except Exception as e:
            logger.error(f"❌ Example failed: {e}")
        finally:
            # Ensure cleanup
            try:
                await self.connector.disconnect()
            except:
                pass
 async def main():
    """Main function"""
    example = BinanceExample()
    await example.run_example()
 if __name__ == "__main__":
    print("Binance Connector Example")
    print("=" * 25)
    print("This example will:")
    print("1. Connect to Binance WebSocket")
    print("2. Get available trading symbols")
    print("3. Subscribe to real-time order book and trade data")
    print("4. Display received data for 30 seconds")
    print("5. Show statistics and disconnect")
    print()
    print("Press Ctrl+C to stop early")
    print("=" * 25)
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        print("\n👋 Example stopped by user")
    except Exception as e:
        print(f"\n❌ Example failed: {e}")
        sys.exit(1)
--- a/COBY/processing/init.py
+++ b/COBY/processing/init.py
@@ -0,0 +1,15 @@
 """
 Data processing and normalization components for the COBY system.
 """
 from .data_processor import StandardDataProcessor
 from .quality_checker import DataQualityChecker
 from .anomaly_detector import AnomalyDetector
 from .metrics_calculator import MetricsCalculator
 __all__ = [
    'StandardDataProcessor',
    'DataQualityChecker',
    'AnomalyDetector',
    'MetricsCalculator'
 ]
--- a/COBY/processing/anomaly_detector.py
+++ b/COBY/processing/anomaly_detector.py
@@ -0,0 +1,329 @@
 """
 Anomaly detection for market data.
 """
 import statistics
 from typing import Dict, List, Union, Optional, Deque
 from collections import deque
 from datetime import datetime, timedelta
 from ..models.core import OrderBookSnapshot, TradeEvent
 from ..utils.logging import get_logger
 from ..utils.timing import get_current_timestamp
 logger = get_logger(__name__)
 class AnomalyDetector:
    """
    Detects anomalies in market data using statistical methods.
    Detects:
    - Price spikes and drops
    - Volume anomalies
    - Spread anomalies
    - Frequency anomalies
    """
    def __init__(self, window_size: int = 100, z_score_threshold: float = 3.0):
        """
        Initialize anomaly detector.
        Args:
            window_size: Size of rolling window for statistics
            z_score_threshold: Z-score threshold for anomaly detection
        """
        self.window_size = window_size
        self.z_score_threshold = z_score_threshold
        # Rolling windows for statistics
        self.price_windows: Dict[str, Deque[float]] = {}
        self.volume_windows: Dict[str, Deque[float]] = {}
        self.spread_windows: Dict[str, Deque[float]] = {}
        self.timestamp_windows: Dict[str, Deque[datetime]] = {}
        logger.info(f"Anomaly detector initialized with window_size={window_size}, threshold={z_score_threshold}")
    def detect_orderbook_anomalies(self, orderbook: OrderBookSnapshot) -> List[str]:
        """
        Detect anomalies in order book data.
        Args:
            orderbook: Order book snapshot to analyze
        Returns:
            List[str]: List of detected anomalies
        """
        anomalies = []
        key = f"{orderbook.symbol}_{orderbook.exchange}"
        try:
            # Price anomalies
            if orderbook.mid_price:
                price_anomalies = self._detect_price_anomalies(key, orderbook.mid_price)
                anomalies.extend(price_anomalies)
            # Volume anomalies
            total_volume = orderbook.bid_volume + orderbook.ask_volume
            volume_anomalies = self._detect_volume_anomalies(key, total_volume)
            anomalies.extend(volume_anomalies)
            # Spread anomalies
            if orderbook.spread and orderbook.mid_price:
                spread_pct = (orderbook.spread / orderbook.mid_price) * 100
                spread_anomalies = self._detect_spread_anomalies(key, spread_pct)
                anomalies.extend(spread_anomalies)
            # Frequency anomalies
            frequency_anomalies = self._detect_frequency_anomalies(key, orderbook.timestamp)
            anomalies.extend(frequency_anomalies)
            # Update windows
            self._update_windows(key, orderbook)
        except Exception as e:
            logger.error(f"Error detecting order book anomalies: {e}")
            anomalies.append(f"Anomaly detection error: {e}")
        if anomalies:
            logger.warning(f"Anomalies detected in {orderbook.symbol}@{orderbook.exchange}: {anomalies}")
        return anomalies    
    def detect_trade_anomalies(self, trade: TradeEvent) -> List[str]:
        """
        Detect anomalies in trade data.
        Args:
            trade: Trade event to analyze
        Returns:
            List[str]: List of detected anomalies
        """
        anomalies = []
        key = f"{trade.symbol}_{trade.exchange}_trade"
        try:
            # Price anomalies
            price_anomalies = self._detect_price_anomalies(key, trade.price)
            anomalies.extend(price_anomalies)
            # Volume anomalies
            volume_anomalies = self._detect_volume_anomalies(key, trade.size)
            anomalies.extend(volume_anomalies)
            # Update windows
            self._update_trade_windows(key, trade)
        except Exception as e:
            logger.error(f"Error detecting trade anomalies: {e}")
            anomalies.append(f"Anomaly detection error: {e}")
        if anomalies:
            logger.warning(f"Trade anomalies detected in {trade.symbol}@{trade.exchange}: {anomalies}")
        return anomalies
    def _detect_price_anomalies(self, key: str, price: float) -> List[str]:
        """Detect price anomalies using z-score"""
        anomalies = []
        if key not in self.price_windows:
            self.price_windows[key] = deque(maxlen=self.window_size)
            return anomalies
        window = self.price_windows[key]
        if len(window) < 10:  # Need minimum data points
            return anomalies
        try:
            mean_price = statistics.mean(window)
            std_price = statistics.stdev(window)
            if std_price > 0:
                z_score = abs(price - mean_price) / std_price
                if z_score > self.z_score_threshold:
                    direction = "spike" if price > mean_price else "drop"
                    anomalies.append(f"Price {direction}: {price:.6f} (z-score: {z_score:.2f})")
        except statistics.StatisticsError:
            pass  # Not enough data or all values are the same
        return anomalies
    def _detect_volume_anomalies(self, key: str, volume: float) -> List[str]:
        """Detect volume anomalies using z-score"""
        anomalies = []
        volume_key = f"{key}_volume"
        if volume_key not in self.volume_windows:
            self.volume_windows[volume_key] = deque(maxlen=self.window_size)
            return anomalies
        window = self.volume_windows[volume_key]
        if len(window) < 10:
            return anomalies
        try:
            mean_volume = statistics.mean(window)
            std_volume = statistics.stdev(window)
            if std_volume > 0:
                z_score = abs(volume - mean_volume) / std_volume
                if z_score > self.z_score_threshold:
                    direction = "spike" if volume > mean_volume else "drop"
                    anomalies.append(f"Volume {direction}: {volume:.6f} (z-score: {z_score:.2f})")
        except statistics.StatisticsError:
            pass
        return anomalies
    def _detect_spread_anomalies(self, key: str, spread_pct: float) -> List[str]:
        """Detect spread anomalies using z-score"""
        anomalies = []
        spread_key = f"{key}_spread"
        if spread_key not in self.spread_windows:
            self.spread_windows[spread_key] = deque(maxlen=self.window_size)
            return anomalies
        window = self.spread_windows[spread_key]
        if len(window) < 10:
            return anomalies
        try:
            mean_spread = statistics.mean(window)
            std_spread = statistics.stdev(window)
            if std_spread > 0:
                z_score = abs(spread_pct - mean_spread) / std_spread
                if z_score > self.z_score_threshold:
                    direction = "widening" if spread_pct > mean_spread else "tightening"
                    anomalies.append(f"Spread {direction}: {spread_pct:.4f}% (z-score: {z_score:.2f})")
        except statistics.StatisticsError:
            pass
        return anomalies    
    def _detect_frequency_anomalies(self, key: str, timestamp: datetime) -> List[str]:
        """Detect frequency anomalies in data updates"""
        anomalies = []
        timestamp_key = f"{key}_timestamp"
        if timestamp_key not in self.timestamp_windows:
            self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
            return anomalies
        window = self.timestamp_windows[timestamp_key]
        if len(window) < 5:
            return anomalies
        try:
            # Calculate intervals between updates
            intervals = []
            for i in range(1, len(window)):
                interval = (window[i] - window[i-1]).total_seconds()
                intervals.append(interval)
            if len(intervals) >= 5:
                mean_interval = statistics.mean(intervals)
                std_interval = statistics.stdev(intervals)
                # Check current interval
                current_interval = (timestamp - window[-1]).total_seconds()
                if std_interval > 0:
                    z_score = abs(current_interval - mean_interval) / std_interval
                    if z_score > self.z_score_threshold:
                        if current_interval > mean_interval:
                            anomalies.append(f"Update delay: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
                        else:
                            anomalies.append(f"Update burst: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
        except (statistics.StatisticsError, IndexError):
            pass
        return anomalies
    def _update_windows(self, key: str, orderbook: OrderBookSnapshot) -> None:
        """Update rolling windows with new data"""
        # Update price window
        if orderbook.mid_price:
            if key not in self.price_windows:
                self.price_windows[key] = deque(maxlen=self.window_size)
            self.price_windows[key].append(orderbook.mid_price)
        # Update volume window
        total_volume = orderbook.bid_volume + orderbook.ask_volume
        volume_key = f"{key}_volume"
        if volume_key not in self.volume_windows:
            self.volume_windows[volume_key] = deque(maxlen=self.window_size)
        self.volume_windows[volume_key].append(total_volume)
        # Update spread window
        if orderbook.spread and orderbook.mid_price:
            spread_pct = (orderbook.spread / orderbook.mid_price) * 100
            spread_key = f"{key}_spread"
            if spread_key not in self.spread_windows:
                self.spread_windows[spread_key] = deque(maxlen=self.window_size)
            self.spread_windows[spread_key].append(spread_pct)
        # Update timestamp window
        timestamp_key = f"{key}_timestamp"
        if timestamp_key not in self.timestamp_windows:
            self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
        self.timestamp_windows[timestamp_key].append(orderbook.timestamp)
    def _update_trade_windows(self, key: str, trade: TradeEvent) -> None:
        """Update rolling windows with trade data"""
        # Update price window
        if key not in self.price_windows:
            self.price_windows[key] = deque(maxlen=self.window_size)
        self.price_windows[key].append(trade.price)
        # Update volume window
        volume_key = f"{key}_volume"
        if volume_key not in self.volume_windows:
            self.volume_windows[volume_key] = deque(maxlen=self.window_size)
        self.volume_windows[volume_key].append(trade.size)
    def get_statistics(self) -> Dict[str, Dict[str, float]]:
        """Get current statistics for all tracked symbols"""
        stats = {}
        for key, window in self.price_windows.items():
            if len(window) >= 2:
                try:
                    stats[key] = {
                        'price_mean': statistics.mean(window),
                        'price_std': statistics.stdev(window),
                        'price_min': min(window),
                        'price_max': max(window),
                        'data_points': len(window)
                    }
                except statistics.StatisticsError:
                    stats[key] = {'error': 'insufficient_data'}
        return stats
    def reset_windows(self, key: Optional[str] = None) -> None:
        """Reset rolling windows for a specific key or all keys"""
        if key:
            # Reset specific key
            self.price_windows.pop(key, None)
            self.volume_windows.pop(f"{key}_volume", None)
            self.spread_windows.pop(f"{key}_spread", None)
            self.timestamp_windows.pop(f"{key}_timestamp", None)
        else:
            # Reset all windows
            self.price_windows.clear()
            self.volume_windows.clear()
            self.spread_windows.clear()
            self.timestamp_windows.clear()
        logger.info(f"Reset anomaly detection windows for {key or 'all keys'}")
--- a/COBY/processing/data_processor.py
+++ b/COBY/processing/data_processor.py
@@ -0,0 +1,378 @@
 """
 Main data processor implementation.
 """
 from typing import Dict, Union, List, Optional, Any
 from ..interfaces.data_processor import DataProcessor
 from ..models.core import OrderBookSnapshot, TradeEvent, OrderBookMetrics
 from ..utils.logging import get_logger, set_correlation_id
 from ..utils.exceptions import ValidationError, ProcessingError
 from ..utils.timing import get_current_timestamp
 from .quality_checker import DataQualityChecker
 from .anomaly_detector import AnomalyDetector
 from .metrics_calculator import MetricsCalculator
 logger = get_logger(__name__)
 class StandardDataProcessor(DataProcessor):
    """
    Standard implementation of data processor interface.
    Provides:
    - Data normalization and validation
    - Quality checking
    - Anomaly detection
    - Metrics calculation
    - Data enrichment
    """
    def __init__(self):
        """Initialize data processor with components"""
        self.quality_checker = DataQualityChecker()
        self.anomaly_detector = AnomalyDetector()
        self.metrics_calculator = MetricsCalculator()
        # Processing statistics
        self.processed_orderbooks = 0
        self.processed_trades = 0
        self.quality_failures = 0
        self.anomalies_detected = 0
        logger.info("Standard data processor initialized")
    def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot:
        """
        Normalize raw order book data to standard format.
        Args:
            raw_data: Raw order book data from exchange
            exchange: Exchange name
        Returns:
            OrderBookSnapshot: Normalized order book data
        """
        try:
            set_correlation_id()
            # This is a generic implementation - specific exchanges would override
            # For now, assume data is already in correct format
            if isinstance(raw_data, OrderBookSnapshot):
                return raw_data
            # If raw_data is a dict, try to construct OrderBookSnapshot
            # This would be customized per exchange
            raise NotImplementedError(
                "normalize_orderbook should be implemented by exchange-specific processors"
            )
        except Exception as e:
            logger.error(f"Error normalizing order book data: {e}")
            raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
    def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent:
        """
        Normalize raw trade data to standard format.
        Args:
            raw_data: Raw trade data from exchange
            exchange: Exchange name
        Returns:
            TradeEvent: Normalized trade data
        """
        try:
            set_correlation_id()
            # This is a generic implementation - specific exchanges would override
            if isinstance(raw_data, TradeEvent):
                return raw_data
            # If raw_data is a dict, try to construct TradeEvent
            # This would be customized per exchange
            raise NotImplementedError(
                "normalize_trade should be implemented by exchange-specific processors"
            )
        except Exception as e:
            logger.error(f"Error normalizing trade data: {e}")
            raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")    
    def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool:
        """
        Validate normalized data for quality and consistency.
        Args:
            data: Normalized data to validate
        Returns:
            bool: True if data is valid, False otherwise
        """
        try:
            set_correlation_id()
            if isinstance(data, OrderBookSnapshot):
                quality_score, issues = self.quality_checker.check_orderbook_quality(data)
                self.processed_orderbooks += 1
                if quality_score < 0.5:  # Threshold for acceptable quality
                    self.quality_failures += 1
                    logger.warning(f"Low quality order book data: score={quality_score:.2f}, issues={issues}")
                    return False
                return True
            elif isinstance(data, TradeEvent):
                quality_score, issues = self.quality_checker.check_trade_quality(data)
                self.processed_trades += 1
                if quality_score < 0.5:
                    self.quality_failures += 1
                    logger.warning(f"Low quality trade data: score={quality_score:.2f}, issues={issues}")
                    return False
                return True
            else:
                logger.error(f"Unknown data type for validation: {type(data)}")
                return False
        except Exception as e:
            logger.error(f"Error validating data: {e}")
            return False
    def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
        """
        Calculate metrics from order book data.
        Args:
            orderbook: Order book snapshot
        Returns:
            OrderBookMetrics: Calculated metrics
        """
        try:
            set_correlation_id()
            return self.metrics_calculator.calculate_orderbook_metrics(orderbook)
        except Exception as e:
            logger.error(f"Error calculating metrics: {e}")
            raise ProcessingError(f"Metrics calculation failed: {e}", "METRICS_ERROR")
    def detect_anomalies(self, data: Union[OrderBookSnapshot, TradeEvent]) -> List[str]:
        """
        Detect anomalies in the data.
        Args:
            data: Data to analyze for anomalies
        Returns:
            List[str]: List of detected anomaly descriptions
        """
        try:
            set_correlation_id()
            if isinstance(data, OrderBookSnapshot):
                anomalies = self.anomaly_detector.detect_orderbook_anomalies(data)
            elif isinstance(data, TradeEvent):
                anomalies = self.anomaly_detector.detect_trade_anomalies(data)
            else:
                logger.error(f"Unknown data type for anomaly detection: {type(data)}")
                return ["Unknown data type"]
            if anomalies:
                self.anomalies_detected += len(anomalies)
            return anomalies
        except Exception as e:
            logger.error(f"Error detecting anomalies: {e}")
            return [f"Anomaly detection error: {e}"]
    def filter_data(self, data: Union[OrderBookSnapshot, TradeEvent], criteria: Dict) -> bool:
        """
        Filter data based on criteria.
        Args:
            data: Data to filter
            criteria: Filtering criteria
        Returns:
            bool: True if data passes filter, False otherwise
        """
        try:
            set_correlation_id()
            # Symbol filter
            if 'symbols' in criteria:
                allowed_symbols = criteria['symbols']
                if data.symbol not in allowed_symbols:
                    return False
            # Exchange filter
            if 'exchanges' in criteria:
                allowed_exchanges = criteria['exchanges']
                if data.exchange not in allowed_exchanges:
                    return False
            # Quality filter
            if 'min_quality' in criteria:
                min_quality = criteria['min_quality']
                if isinstance(data, OrderBookSnapshot):
                    quality_score, _ = self.quality_checker.check_orderbook_quality(data)
                elif isinstance(data, TradeEvent):
                    quality_score, _ = self.quality_checker.check_trade_quality(data)
                else:
                    quality_score = 0.0
                if quality_score < min_quality:
                    return False
            # Price range filter
            if 'price_range' in criteria:
                price_range = criteria['price_range']
                min_price, max_price = price_range
                if isinstance(data, OrderBookSnapshot):
                    price = data.mid_price
                elif isinstance(data, TradeEvent):
                    price = data.price
                else:
                    return False
                if price and (price < min_price or price > max_price):
                    return False
            # Volume filter for trades
            if 'min_volume' in criteria and isinstance(data, TradeEvent):
                min_volume = criteria['min_volume']
                if data.size < min_volume:
                    return False
            return True
        except Exception as e:
            logger.error(f"Error filtering data: {e}")
            return False 
    def enrich_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> Dict:
        """
        Enrich data with additional metadata.
        Args:
            data: Data to enrich
        Returns:
            Dict: Enriched data with metadata
        """
        try:
            set_correlation_id()
            enriched = {
                'original_data': data,
                'processing_timestamp': get_current_timestamp(),
                'processor_version': '1.0.0'
            }
            # Add quality metrics
            if isinstance(data, OrderBookSnapshot):
                quality_score, quality_issues = self.quality_checker.check_orderbook_quality(data)
                enriched['quality_score'] = quality_score
                enriched['quality_issues'] = quality_issues
                # Add calculated metrics
                try:
                    metrics = self.calculate_metrics(data)
                    enriched['metrics'] = {
                        'mid_price': metrics.mid_price,
                        'spread': metrics.spread,
                        'spread_percentage': metrics.spread_percentage,
                        'volume_imbalance': metrics.volume_imbalance,
                        'depth_10': metrics.depth_10,
                        'depth_50': metrics.depth_50
                    }
                except Exception as e:
                    enriched['metrics_error'] = str(e)
                # Add liquidity score
                try:
                    liquidity_score = self.metrics_calculator.calculate_liquidity_score(data)
                    enriched['liquidity_score'] = liquidity_score
                except Exception as e:
                    enriched['liquidity_error'] = str(e)
            elif isinstance(data, TradeEvent):
                quality_score, quality_issues = self.quality_checker.check_trade_quality(data)
                enriched['quality_score'] = quality_score
                enriched['quality_issues'] = quality_issues
                # Add trade-specific enrichments
                enriched['trade_value'] = data.price * data.size
                enriched['side_numeric'] = 1 if data.side == 'buy' else -1
            # Add anomaly detection results
            anomalies = self.detect_anomalies(data)
            enriched['anomalies'] = anomalies
            enriched['anomaly_count'] = len(anomalies)
            return enriched
        except Exception as e:
            logger.error(f"Error enriching data: {e}")
            return {
                'original_data': data,
                'enrichment_error': str(e)
            }
    def get_data_quality_score(self, data: Union[OrderBookSnapshot, TradeEvent]) -> float:
        """
        Calculate data quality score.
        Args:
            data: Data to score
        Returns:
            float: Quality score between 0.0 and 1.0
        """
        try:
            set_correlation_id()
            if isinstance(data, OrderBookSnapshot):
                quality_score, _ = self.quality_checker.check_orderbook_quality(data)
            elif isinstance(data, TradeEvent):
                quality_score, _ = self.quality_checker.check_trade_quality(data)
            else:
                logger.error(f"Unknown data type for quality scoring: {type(data)}")
                return 0.0
            return quality_score
        except Exception as e:
            logger.error(f"Error calculating quality score: {e}")
            return 0.0
    def get_processing_stats(self) -> Dict[str, Any]:
        """Get processing statistics"""
        return {
            'processed_orderbooks': self.processed_orderbooks,
            'processed_trades': self.processed_trades,
            'quality_failures': self.quality_failures,
            'anomalies_detected': self.anomalies_detected,
            'quality_failure_rate': (
                self.quality_failures / max(1, self.processed_orderbooks + self.processed_trades)
            ),
            'anomaly_rate': (
                self.anomalies_detected / max(1, self.processed_orderbooks + self.processed_trades)
            ),
            'quality_checker_summary': self.quality_checker.get_quality_summary(),
            'anomaly_detector_stats': self.anomaly_detector.get_statistics()
        }
    def reset_stats(self) -> None:
        """Reset processing statistics"""
        self.processed_orderbooks = 0
        self.processed_trades = 0
        self.quality_failures = 0
        self.anomalies_detected = 0
        logger.info("Processing statistics reset")
--- a/COBY/processing/metrics_calculator.py
+++ b/COBY/processing/metrics_calculator.py
@@ -0,0 +1,275 @@
 """
 Metrics calculation for order book analysis.
 """
 from typing import Dict, List, Optional
 from ..models.core import OrderBookSnapshot, OrderBookMetrics, ImbalanceMetrics
 from ..utils.logging import get_logger
 logger = get_logger(__name__)
 class MetricsCalculator:
    """
    Calculates various metrics from order book data.
    Metrics include:
    - Basic metrics (mid price, spread, volumes)
    - Imbalance metrics
    - Depth metrics
    - Liquidity metrics
    """
    def __init__(self):
        """Initialize metrics calculator"""
        logger.info("Metrics calculator initialized")
    def calculate_orderbook_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
        """
        Calculate comprehensive order book metrics.
        Args:
            orderbook: Order book snapshot
        Returns:
            OrderBookMetrics: Calculated metrics
        """
        try:
            # Basic calculations
            mid_price = self._calculate_mid_price(orderbook)
            spread = self._calculate_spread(orderbook)
            spread_percentage = (spread / mid_price * 100) if mid_price > 0 else 0.0
            # Volume calculations
            bid_volume = sum(level.size for level in orderbook.bids)
            ask_volume = sum(level.size for level in orderbook.asks)
            # Imbalance calculation
            total_volume = bid_volume + ask_volume
            volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
            # Depth calculations
            depth_10 = self._calculate_depth(orderbook, 10)
            depth_50 = self._calculate_depth(orderbook, 50)
            return OrderBookMetrics(
                symbol=orderbook.symbol,
                exchange=orderbook.exchange,
                timestamp=orderbook.timestamp,
                mid_price=mid_price,
                spread=spread,
                spread_percentage=spread_percentage,
                bid_volume=bid_volume,
                ask_volume=ask_volume,
                volume_imbalance=volume_imbalance,
                depth_10=depth_10,
                depth_50=depth_50
            )
        except Exception as e:
            logger.error(f"Error calculating order book metrics: {e}")
            raise
    def calculate_imbalance_metrics(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics:
        """
        Calculate order book imbalance metrics.
        Args:
            orderbook: Order book snapshot
        Returns:
            ImbalanceMetrics: Calculated imbalance metrics
        """
        try:
            # Volume imbalance
            bid_volume = sum(level.size for level in orderbook.bids)
            ask_volume = sum(level.size for level in orderbook.asks)
            total_volume = bid_volume + ask_volume
            volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
            # Price imbalance (weighted by volume)
            price_imbalance = self._calculate_price_imbalance(orderbook)
            # Depth imbalance
            depth_imbalance = self._calculate_depth_imbalance(orderbook)
            # Momentum score (simplified - would need historical data for full implementation)
            momentum_score = volume_imbalance * 0.5 + price_imbalance * 0.3 + depth_imbalance * 0.2
            return ImbalanceMetrics(
                symbol=orderbook.symbol,
                timestamp=orderbook.timestamp,
                volume_imbalance=volume_imbalance,
                price_imbalance=price_imbalance,
                depth_imbalance=depth_imbalance,
                momentum_score=momentum_score
            )
        except Exception as e:
            logger.error(f"Error calculating imbalance metrics: {e}")
            raise 
    def _calculate_mid_price(self, orderbook: OrderBookSnapshot) -> float:
        """Calculate mid price"""
        if not orderbook.bids or not orderbook.asks:
            return 0.0
        best_bid = orderbook.bids[0].price
        best_ask = orderbook.asks[0].price
        return (best_bid + best_ask) / 2.0
    def _calculate_spread(self, orderbook: OrderBookSnapshot) -> float:
        """Calculate bid-ask spread"""
        if not orderbook.bids or not orderbook.asks:
            return 0.0
        best_bid = orderbook.bids[0].price
        best_ask = orderbook.asks[0].price
        return best_ask - best_bid
    def _calculate_depth(self, orderbook: OrderBookSnapshot, levels: int) -> float:
        """Calculate market depth for specified number of levels"""
        bid_depth = sum(
            level.size for level in orderbook.bids[:levels]
        )
        ask_depth = sum(
            level.size for level in orderbook.asks[:levels]
        )
        return bid_depth + ask_depth
    def _calculate_price_imbalance(self, orderbook: OrderBookSnapshot) -> float:
        """Calculate price-weighted imbalance"""
        if not orderbook.bids or not orderbook.asks:
            return 0.0
        # Calculate volume-weighted average prices for top levels
        bid_vwap = self._calculate_vwap(orderbook.bids[:5])
        ask_vwap = self._calculate_vwap(orderbook.asks[:5])
        if bid_vwap == 0 or ask_vwap == 0:
            return 0.0
        mid_price = (bid_vwap + ask_vwap) / 2.0
        # Normalize imbalance
        price_imbalance = (bid_vwap - ask_vwap) / mid_price if mid_price > 0 else 0.0
        return max(-1.0, min(1.0, price_imbalance))
    def _calculate_depth_imbalance(self, orderbook: OrderBookSnapshot) -> float:
        """Calculate depth imbalance across multiple levels"""
        levels_to_check = [5, 10, 20]
        imbalances = []
        for levels in levels_to_check:
            bid_depth = sum(level.size for level in orderbook.bids[:levels])
            ask_depth = sum(level.size for level in orderbook.asks[:levels])
            total_depth = bid_depth + ask_depth
            if total_depth > 0:
                imbalance = (bid_depth - ask_depth) / total_depth
                imbalances.append(imbalance)
        # Return weighted average of imbalances
        if imbalances:
            return sum(imbalances) / len(imbalances)
        return 0.0
    def _calculate_vwap(self, levels: List) -> float:
        """Calculate volume-weighted average price for price levels"""
        if not levels:
            return 0.0
        total_volume = sum(level.size for level in levels)
        if total_volume == 0:
            return 0.0
        weighted_sum = sum(level.price * level.size for level in levels)
        return weighted_sum / total_volume
    def calculate_liquidity_score(self, orderbook: OrderBookSnapshot) -> float:
        """
        Calculate liquidity score based on depth and spread.
        Args:
            orderbook: Order book snapshot
        Returns:
            float: Liquidity score (0.0 to 1.0)
        """
        try:
            if not orderbook.bids or not orderbook.asks:
                return 0.0
            # Spread component (lower spread = higher liquidity)
            spread = self._calculate_spread(orderbook)
            mid_price = self._calculate_mid_price(orderbook)
            if mid_price == 0:
                return 0.0
            spread_pct = (spread / mid_price) * 100
            spread_score = max(0.0, 1.0 - (spread_pct / 5.0))  # Normalize to 5% max spread
            # Depth component (higher depth = higher liquidity)
            total_depth = self._calculate_depth(orderbook, 10)
            depth_score = min(1.0, total_depth / 100.0)  # Normalize to 100 units max depth
            # Volume balance component (more balanced = higher liquidity)
            bid_volume = sum(level.size for level in orderbook.bids[:10])
            ask_volume = sum(level.size for level in orderbook.asks[:10])
            total_volume = bid_volume + ask_volume
            if total_volume > 0:
                imbalance = abs(bid_volume - ask_volume) / total_volume
                balance_score = 1.0 - imbalance
            else:
                balance_score = 0.0
            # Weighted combination
            liquidity_score = (spread_score * 0.4 + depth_score * 0.4 + balance_score * 0.2)
            return max(0.0, min(1.0, liquidity_score))
        except Exception as e:
            logger.error(f"Error calculating liquidity score: {e}")
            return 0.0
    def get_market_summary(self, orderbook: OrderBookSnapshot) -> Dict[str, float]:
        """
        Get comprehensive market summary.
        Args:
            orderbook: Order book snapshot
        Returns:
            Dict[str, float]: Market summary metrics
        """
        try:
            metrics = self.calculate_orderbook_metrics(orderbook)
            imbalance = self.calculate_imbalance_metrics(orderbook)
            liquidity = self.calculate_liquidity_score(orderbook)
            return {
                'mid_price': metrics.mid_price,
                'spread': metrics.spread,
                'spread_percentage': metrics.spread_percentage,
                'bid_volume': metrics.bid_volume,
                'ask_volume': metrics.ask_volume,
                'volume_imbalance': metrics.volume_imbalance,
                'depth_10': metrics.depth_10,
                'depth_50': metrics.depth_50,
                'price_imbalance': imbalance.price_imbalance,
                'depth_imbalance': imbalance.depth_imbalance,
                'momentum_score': imbalance.momentum_score,
                'liquidity_score': liquidity
            }
        except Exception as e:
            logger.error(f"Error generating market summary: {e}")
            return {}
--- a/COBY/processing/quality_checker.py
+++ b/COBY/processing/quality_checker.py
@@ -0,0 +1,288 @@
 """
 Data quality checking and validation for market data.
 """
 from typing import Dict, List, Union, Optional, Tuple
 from datetime import datetime, timezone
 from ..models.core import OrderBookSnapshot, TradeEvent
 from ..utils.logging import get_logger
 from ..utils.validation import validate_price, validate_volume, validate_symbol
 from ..utils.timing import get_current_timestamp
 logger = get_logger(__name__)
 class DataQualityChecker:
    """
    Comprehensive data quality checker for market data.
    Validates:
    - Data structure integrity
    - Price and volume ranges
    - Timestamp consistency
    - Cross-validation between related data points
    """
    def __init__(self):
        """Initialize quality checker with default thresholds"""
        # Quality thresholds
        self.max_spread_percentage = 10.0  # Maximum spread as % of mid price
        self.max_price_change_percentage = 50.0  # Maximum price change between updates
        self.min_volume_threshold = 0.000001  # Minimum meaningful volume
        self.max_timestamp_drift = 300  # Maximum seconds drift from current time
        # Price history for validation
        self.price_history: Dict[str, Dict[str, float]] = {}  # symbol -> exchange -> last_price
        logger.info("Data quality checker initialized")    
    def check_orderbook_quality(self, orderbook: OrderBookSnapshot) -> Tuple[float, List[str]]:
        """
        Check order book data quality.
        Args:
            orderbook: Order book snapshot to validate
        Returns:
            Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
        """
        issues = []
        quality_score = 1.0
        try:
            # Basic structure validation
            structure_issues = self._check_orderbook_structure(orderbook)
            issues.extend(structure_issues)
            quality_score -= len(structure_issues) * 0.1
            # Price validation
            price_issues = self._check_orderbook_prices(orderbook)
            issues.extend(price_issues)
            quality_score -= len(price_issues) * 0.15
            # Volume validation
            volume_issues = self._check_orderbook_volumes(orderbook)
            issues.extend(volume_issues)
            quality_score -= len(volume_issues) * 0.1
            # Spread validation
            spread_issues = self._check_orderbook_spread(orderbook)
            issues.extend(spread_issues)
            quality_score -= len(spread_issues) * 0.2
            # Timestamp validation
            timestamp_issues = self._check_timestamp(orderbook.timestamp)
            issues.extend(timestamp_issues)
            quality_score -= len(timestamp_issues) * 0.1
            # Cross-validation with history
            history_issues = self._check_price_history(orderbook)
            issues.extend(history_issues)
            quality_score -= len(history_issues) * 0.15
            # Update price history
            self._update_price_history(orderbook)
        except Exception as e:
            logger.error(f"Error checking order book quality: {e}")
            issues.append(f"Quality check error: {e}")
            quality_score = 0.0
        # Ensure score is within bounds
        quality_score = max(0.0, min(1.0, quality_score))
        if issues:
            logger.debug(f"Order book quality issues for {orderbook.symbol}@{orderbook.exchange}: {issues}")
        return quality_score, issues    de
 f check_trade_quality(self, trade: TradeEvent) -> Tuple[float, List[str]]:
        """
        Check trade data quality.
        Args:
            trade: Trade event to validate
        Returns:
            Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
        """
        issues = []
        quality_score = 1.0
        try:
            # Basic structure validation
            if not validate_symbol(trade.symbol):
                issues.append("Invalid symbol format")
            if not trade.exchange:
                issues.append("Missing exchange")
            if not trade.trade_id:
                issues.append("Missing trade ID")
            # Price validation
            if not validate_price(trade.price):
                issues.append(f"Invalid price: {trade.price}")
            # Volume validation
            if not validate_volume(trade.size):
                issues.append(f"Invalid size: {trade.size}")
            if trade.size < self.min_volume_threshold:
                issues.append(f"Size below threshold: {trade.size}")
            # Side validation
            if trade.side not in ['buy', 'sell']:
                issues.append(f"Invalid side: {trade.side}")
            # Timestamp validation
            timestamp_issues = self._check_timestamp(trade.timestamp)
            issues.extend(timestamp_issues)
            # Calculate quality score
            quality_score -= len(issues) * 0.2
        except Exception as e:
            logger.error(f"Error checking trade quality: {e}")
            issues.append(f"Quality check error: {e}")
            quality_score = 0.0
        # Ensure score is within bounds
        quality_score = max(0.0, min(1.0, quality_score))
        if issues:
            logger.debug(f"Trade quality issues for {trade.symbol}@{trade.exchange}: {issues}")
        return quality_score, issues
    def _check_orderbook_structure(self, orderbook: OrderBookSnapshot) -> List[str]:
        """Check basic order book structure"""
        issues = []
        if not validate_symbol(orderbook.symbol):
            issues.append("Invalid symbol format")
        if not orderbook.exchange:
            issues.append("Missing exchange")
        if not orderbook.bids:
            issues.append("No bid levels")
        if not orderbook.asks:
            issues.append("No ask levels")
        return issues
    def _check_orderbook_prices(self, orderbook: OrderBookSnapshot) -> List[str]:
        """Check order book price validity"""
        issues = []
        # Check bid prices (should be descending)
        for i, bid in enumerate(orderbook.bids):
            if not validate_price(bid.price):
                issues.append(f"Invalid bid price at level {i}: {bid.price}")
            if i > 0 and bid.price >= orderbook.bids[i-1].price:
                issues.append(f"Bid prices not descending at level {i}")
        # Check ask prices (should be ascending)
        for i, ask in enumerate(orderbook.asks):
            if not validate_price(ask.price):
                issues.append(f"Invalid ask price at level {i}: {ask.price}")
            if i > 0 and ask.price <= orderbook.asks[i-1].price:
                issues.append(f"Ask prices not ascending at level {i}")
        # Check bid-ask ordering
        if orderbook.bids and orderbook.asks:
            if orderbook.bids[0].price >= orderbook.asks[0].price:
                issues.append("Best bid >= best ask (crossed book)")
        return issues    def
 _check_orderbook_volumes(self, orderbook: OrderBookSnapshot) -> List[str]:
        """Check order book volume validity"""
        issues = []
        # Check bid volumes
        for i, bid in enumerate(orderbook.bids):
            if not validate_volume(bid.size):
                issues.append(f"Invalid bid volume at level {i}: {bid.size}")
            if bid.size < self.min_volume_threshold:
                issues.append(f"Bid volume below threshold at level {i}: {bid.size}")
        # Check ask volumes
        for i, ask in enumerate(orderbook.asks):
            if not validate_volume(ask.size):
                issues.append(f"Invalid ask volume at level {i}: {ask.size}")
            if ask.size < self.min_volume_threshold:
                issues.append(f"Ask volume below threshold at level {i}: {ask.size}")
        return issues
    def _check_orderbook_spread(self, orderbook: OrderBookSnapshot) -> List[str]:
        """Check order book spread validity"""
        issues = []
        if orderbook.mid_price and orderbook.spread:
            spread_percentage = (orderbook.spread / orderbook.mid_price) * 100
            if spread_percentage > self.max_spread_percentage:
                issues.append(f"Spread too wide: {spread_percentage:.2f}%")
            if spread_percentage < 0:
                issues.append(f"Negative spread: {spread_percentage:.2f}%")
        return issues
    def _check_timestamp(self, timestamp: datetime) -> List[str]:
        """Check timestamp validity"""
        issues = []
        if not timestamp:
            issues.append("Missing timestamp")
            return issues
        # Check if timestamp is timezone-aware
        if timestamp.tzinfo is None:
            issues.append("Timestamp missing timezone info")
        # Check timestamp drift
        current_time = get_current_timestamp()
        time_diff = abs((timestamp - current_time).total_seconds())
        if time_diff > self.max_timestamp_drift:
            issues.append(f"Timestamp drift too large: {time_diff:.1f}s")
        return issues
    def _check_price_history(self, orderbook: OrderBookSnapshot) -> List[str]:
        """Check price consistency with history"""
        issues = []
        key = f"{orderbook.symbol}_{orderbook.exchange}"
        if key in self.price_history and orderbook.mid_price:
            last_price = self.price_history[key]
            price_change = abs(orderbook.mid_price - last_price) / last_price * 100
            if price_change > self.max_price_change_percentage:
                issues.append(f"Large price change: {price_change:.2f}%")
        return issues
    def _update_price_history(self, orderbook: OrderBookSnapshot) -> None:
        """Update price history for future validation"""
        if orderbook.mid_price:
            key = f"{orderbook.symbol}_{orderbook.exchange}"
            self.price_history[key] = orderbook.mid_price
    def get_quality_summary(self) -> Dict[str, int]:
        """Get summary of quality checks performed"""
        return {
            'symbols_tracked': len(self.price_history),
            'max_spread_percentage': self.max_spread_percentage,
            'max_price_change_percentage': self.max_price_change_percentage,
            'min_volume_threshold': self.min_volume_threshold,
            'max_timestamp_drift': self.max_timestamp_drift
        }
--- a/COBY/tests/test_binance_connector.py
+++ b/COBY/tests/test_binance_connector.py
@@ -0,0 +1,341 @@
 """
 Tests for Binance exchange connector.
 """
 import pytest
 import asyncio
 from unittest.mock import AsyncMock, MagicMock, patch
 from datetime import datetime, timezone
 from ..connectors.binance_connector import BinanceConnector
 from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
@pytest.fixture
 def binance_connector():
    """Create Binance connector for testing"""
    return BinanceConnector()
@pytest.fixture
 def sample_binance_orderbook_data():
    """Sample Binance order book data"""
    return {
        "lastUpdateId": 1027024,
        "bids": [
            ["4.00000000", "431.00000000"],
            ["3.99000000", "9.00000000"]
        ],
        "asks": [
            ["4.00000200", "12.00000000"],
            ["4.01000000", "18.00000000"]
        ]
    }
@pytest.fixture
 def sample_binance_depth_update():
    """Sample Binance depth update message"""
    return {
        "e": "depthUpdate",
        "E": 1672515782136,
        "s": "BTCUSDT",
        "U": 157,
        "u": 160,
        "b": [
            ["50000.00", "0.25"],
            ["49999.00", "0.50"]
        ],
        "a": [
            ["50001.00", "0.30"],
            ["50002.00", "0.40"]
        ]
    }
@pytest.fixture
 def sample_binance_trade_update():
    """Sample Binance trade update message"""
    return {
        "e": "trade",
        "E": 1672515782136,
        "s": "BTCUSDT",
        "t": 12345,
        "p": "50000.50",
        "q": "0.10",
        "b": 88,
        "a": 50,
        "T": 1672515782134,
        "m": False,
        "M": True
    }
 class TestBinanceConnector:
    """Test cases for BinanceConnector"""
    def test_initialization(self, binance_connector):
        """Test connector initialization"""
        assert binance_connector.exchange_name == "binance"
        assert binance_connector.websocket_url == BinanceConnector.WEBSOCKET_URL
        assert len(binance_connector.message_handlers) >= 3
        assert binance_connector.stream_id == 1
        assert binance_connector.active_streams == []
    def test_normalize_symbol(self, binance_connector):
        """Test symbol normalization"""
        # Test standard format
        assert binance_connector.normalize_symbol("BTCUSDT") == "BTCUSDT"
        # Test with separators
        assert binance_connector.normalize_symbol("BTC-USDT") == "BTCUSDT"
        assert binance_connector.normalize_symbol("BTC/USDT") == "BTCUSDT"
        # Test lowercase
        assert binance_connector.normalize_symbol("btcusdt") == "BTCUSDT"
        # Test invalid symbol
        with pytest.raises(Exception):
            binance_connector.normalize_symbol("")
    def test_get_message_type(self, binance_connector):
        """Test message type detection"""
        # Test depth update
        depth_msg = {"e": "depthUpdate", "s": "BTCUSDT"}
        assert binance_connector._get_message_type(depth_msg) == "depthUpdate"
        # Test trade update
        trade_msg = {"e": "trade", "s": "BTCUSDT"}
        assert binance_connector._get_message_type(trade_msg) == "trade"
        # Test error message
        error_msg = {"error": {"code": -1121, "msg": "Invalid symbol"}}
        assert binance_connector._get_message_type(error_msg) == "error"
        # Test unknown message
        unknown_msg = {"data": "something"}
        assert binance_connector._get_message_type(unknown_msg) == "unknown"
    def test_parse_orderbook_snapshot(self, binance_connector, sample_binance_orderbook_data):
        """Test order book snapshot parsing"""
        orderbook = binance_connector._parse_orderbook_snapshot(
            sample_binance_orderbook_data, 
            "BTCUSDT"
        )
        assert isinstance(orderbook, OrderBookSnapshot)
        assert orderbook.symbol == "BTCUSDT"
        assert orderbook.exchange == "binance"
        assert len(orderbook.bids) == 2
        assert len(orderbook.asks) == 2
        assert orderbook.sequence_id == 1027024
        # Check bid data
        assert orderbook.bids[0].price == 4.0
        assert orderbook.bids[0].size == 431.0
        # Check ask data
        assert orderbook.asks[0].price == 4.000002
        assert orderbook.asks[0].size == 12.0
    @pytest.mark.asyncio
    async def test_handle_orderbook_update(self, binance_connector, sample_binance_depth_update):
        """Test order book update handling"""
        # Mock callback
        callback_called = False
        received_data = None
        def mock_callback(data):
            nonlocal callback_called, received_data
            callback_called = True
            received_data = data
        binance_connector.add_data_callback(mock_callback)
        # Handle update
        await binance_connector._handle_orderbook_update(sample_binance_depth_update)
        # Verify callback was called
        assert callback_called
        assert isinstance(received_data, OrderBookSnapshot)
        assert received_data.symbol == "BTCUSDT"
        assert received_data.exchange == "binance"
        assert len(received_data.bids) == 2
        assert len(received_data.asks) == 2
    @pytest.mark.asyncio
    async def test_handle_trade_update(self, binance_connector, sample_binance_trade_update):
        """Test trade update handling"""
        # Mock callback
        callback_called = False
        received_data = None
        def mock_callback(data):
            nonlocal callback_called, received_data
            callback_called = True
            received_data = data
        binance_connector.add_data_callback(mock_callback)
        # Handle update
        await binance_connector._handle_trade_update(sample_binance_trade_update)
        # Verify callback was called
        assert callback_called
        assert isinstance(received_data, TradeEvent)
        assert received_data.symbol == "BTCUSDT"
        assert received_data.exchange == "binance"
        assert received_data.price == 50000.50
        assert received_data.size == 0.10
        assert received_data.side == "buy"  # m=False means buyer is not maker
        assert received_data.trade_id == "12345"
    @pytest.mark.asyncio
    async def test_subscribe_orderbook(self, binance_connector):
        """Test order book subscription"""
        # Mock WebSocket send
        binance_connector._send_message = AsyncMock(return_value=True)
        # Subscribe
        await binance_connector.subscribe_orderbook("BTCUSDT")
        # Verify subscription was sent
        binance_connector._send_message.assert_called_once()
        call_args = binance_connector._send_message.call_args[0][0]
        assert call_args["method"] == "SUBSCRIBE"
        assert "btcusdt@depth@100ms" in call_args["params"]
        assert call_args["id"] == 1
        # Verify tracking
        assert "BTCUSDT" in binance_connector.subscriptions
        assert "orderbook" in binance_connector.subscriptions["BTCUSDT"]
        assert "btcusdt@depth@100ms" in binance_connector.active_streams
        assert binance_connector.stream_id == 2
    @pytest.mark.asyncio
    async def test_subscribe_trades(self, binance_connector):
        """Test trade subscription"""
        # Mock WebSocket send
        binance_connector._send_message = AsyncMock(return_value=True)
        # Subscribe
        await binance_connector.subscribe_trades("ETHUSDT")
        # Verify subscription was sent
        binance_connector._send_message.assert_called_once()
        call_args = binance_connector._send_message.call_args[0][0]
        assert call_args["method"] == "SUBSCRIBE"
        assert "ethusdt@trade" in call_args["params"]
        assert call_args["id"] == 1
        # Verify tracking
        assert "ETHUSDT" in binance_connector.subscriptions
        assert "trades" in binance_connector.subscriptions["ETHUSDT"]
        assert "ethusdt@trade" in binance_connector.active_streams
    @pytest.mark.asyncio
    async def test_unsubscribe_orderbook(self, binance_connector):
        """Test order book unsubscription"""
        # Setup initial subscription
        binance_connector.subscriptions["BTCUSDT"] = ["orderbook"]
        binance_connector.active_streams.append("btcusdt@depth@100ms")
        # Mock WebSocket send
        binance_connector._send_message = AsyncMock(return_value=True)
        # Unsubscribe
        await binance_connector.unsubscribe_orderbook("BTCUSDT")
        # Verify unsubscription was sent
        binance_connector._send_message.assert_called_once()
        call_args = binance_connector._send_message.call_args[0][0]
        assert call_args["method"] == "UNSUBSCRIBE"
        assert "btcusdt@depth@100ms" in call_args["params"]
        # Verify tracking removal
        assert "BTCUSDT" not in binance_connector.subscriptions
        assert "btcusdt@depth@100ms" not in binance_connector.active_streams
    @pytest.mark.asyncio
    @patch('aiohttp.ClientSession.get')
    async def test_get_symbols(self, mock_get, binance_connector):
        """Test getting available symbols"""
        # Mock API response
        mock_response = AsyncMock()
        mock_response.status = 200
        mock_response.json = AsyncMock(return_value={
            "symbols": [
                {"symbol": "BTCUSDT", "status": "TRADING"},
                {"symbol": "ETHUSDT", "status": "TRADING"},
                {"symbol": "ADAUSDT", "status": "BREAK"}  # Should be filtered out
            ]
        })
        mock_get.return_value.__aenter__.return_value = mock_response
        # Get symbols
        symbols = await binance_connector.get_symbols()
        # Verify results
        assert len(symbols) == 2
        assert "BTCUSDT" in symbols
        assert "ETHUSDT" in symbols
        assert "ADAUSDT" not in symbols  # Filtered out due to status
    @pytest.mark.asyncio
    @patch('aiohttp.ClientSession.get')
    async def test_get_orderbook_snapshot(self, mock_get, binance_connector, sample_binance_orderbook_data):
        """Test getting order book snapshot"""
        # Mock API response
        mock_response = AsyncMock()
        mock_response.status = 200
        mock_response.json = AsyncMock(return_value=sample_binance_orderbook_data)
        mock_get.return_value.__aenter__.return_value = mock_response
        # Get order book snapshot
        orderbook = await binance_connector.get_orderbook_snapshot("BTCUSDT", depth=20)
        # Verify results
        assert isinstance(orderbook, OrderBookSnapshot)
        assert orderbook.symbol == "BTCUSDT"
        assert orderbook.exchange == "binance"
        assert len(orderbook.bids) == 2
        assert len(orderbook.asks) == 2
    def test_get_binance_stats(self, binance_connector):
        """Test getting Binance-specific statistics"""
        # Add some test data
        binance_connector.active_streams = ["btcusdt@depth@100ms", "ethusdt@trade"]
        binance_connector.stream_id = 5
        stats = binance_connector.get_binance_stats()
        # Verify Binance-specific stats
        assert stats['active_streams'] == 2
        assert len(stats['stream_list']) == 2
        assert stats['next_stream_id'] == 5
        # Verify base stats are included
        assert 'exchange' in stats
        assert 'connection_status' in stats
        assert 'message_count' in stats
 if __name__ == "__main__":
    # Run a simple test
    async def simple_test():
        connector = BinanceConnector()
        # Test symbol normalization
        normalized = connector.normalize_symbol("BTC-USDT")
        print(f"Symbol normalization: BTC-USDT -> {normalized}")
        # Test message type detection
        msg_type = connector._get_message_type({"e": "depthUpdate"})
        print(f"Message type detection: {msg_type}")
        print("Simple Binance connector test completed")
    asyncio.run(simple_test())
--- a/COBY/tests/test_data_processor.py
+++ b/COBY/tests/test_data_processor.py
@@ -0,0 +1,304 @@
 """
 Tests for data processing components.
 """
 import pytest
 from datetime import datetime, timezone
 from ..processing.data_processor import StandardDataProcessor
 from ..processing.quality_checker import DataQualityChecker
 from ..processing.anomaly_detector import AnomalyDetector
 from ..processing.metrics_calculator import MetricsCalculator
 from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
@pytest.fixture
 def data_processor():
    """Create data processor for testing"""
    return StandardDataProcessor()
@pytest.fixture
 def quality_checker():
    """Create quality checker for testing"""
    return DataQualityChecker()
@pytest.fixture
 def anomaly_detector():
    """Create anomaly detector for testing"""
    return AnomalyDetector()
@pytest.fixture
 def metrics_calculator():
    """Create metrics calculator for testing"""
    return MetricsCalculator()
@pytest.fixture
 def sample_orderbook():
    """Create sample order book for testing"""
    return OrderBookSnapshot(
        symbol="BTCUSDT",
        exchange="binance",
        timestamp=datetime.now(timezone.utc),
        bids=[
            PriceLevel(price=50000.0, size=1.5),
            PriceLevel(price=49999.0, size=2.0),
            PriceLevel(price=49998.0, size=1.0)
        ],
        asks=[
            PriceLevel(price=50001.0, size=1.0),
            PriceLevel(price=50002.0, size=1.5),
            PriceLevel(price=50003.0, size=2.0)
        ]
    )
@pytest.fixture
 def sample_trade():
    """Create sample trade for testing"""
    return TradeEvent(
        symbol="BTCUSDT",
        exchange="binance",
        timestamp=datetime.now(timezone.utc),
        price=50000.5,
        size=0.1,
        side="buy",
        trade_id="test_trade_123"
    )
 class TestDataQualityChecker:
    """Test cases for DataQualityChecker"""
    def test_orderbook_quality_check(self, quality_checker, sample_orderbook):
        """Test order book quality checking"""
        quality_score, issues = quality_checker.check_orderbook_quality(sample_orderbook)
        assert 0.0 <= quality_score <= 1.0
        assert isinstance(issues, list)
        # Good order book should have high quality score
        assert quality_score > 0.8
    def test_trade_quality_check(self, quality_checker, sample_trade):
        """Test trade quality checking"""
        quality_score, issues = quality_checker.check_trade_quality(sample_trade)
        assert 0.0 <= quality_score <= 1.0
        assert isinstance(issues, list)
        # Good trade should have high quality score
        assert quality_score > 0.8
    def test_invalid_orderbook_detection(self, quality_checker):
        """Test detection of invalid order book"""
        # Create invalid order book with crossed spread
        invalid_orderbook = OrderBookSnapshot(
            symbol="BTCUSDT",
            exchange="binance",
            timestamp=datetime.now(timezone.utc),
            bids=[PriceLevel(price=50002.0, size=1.0)],  # Bid higher than ask
            asks=[PriceLevel(price=50001.0, size=1.0)]   # Ask lower than bid
        )
        quality_score, issues = quality_checker.check_orderbook_quality(invalid_orderbook)
        assert quality_score < 0.8
        assert any("crossed book" in issue.lower() for issue in issues)
 class TestAnomalyDetector:
    """Test cases for AnomalyDetector"""
    def test_orderbook_anomaly_detection(self, anomaly_detector, sample_orderbook):
        """Test order book anomaly detection"""
        # First few order books should not trigger anomalies
        for _ in range(5):
            anomalies = anomaly_detector.detect_orderbook_anomalies(sample_orderbook)
            assert isinstance(anomalies, list)
    def test_trade_anomaly_detection(self, anomaly_detector, sample_trade):
        """Test trade anomaly detection"""
        # First few trades should not trigger anomalies
        for _ in range(5):
            anomalies = anomaly_detector.detect_trade_anomalies(sample_trade)
            assert isinstance(anomalies, list)
    def test_price_spike_detection(self, anomaly_detector):
        """Test price spike detection"""
        # Create normal order books
        for i in range(20):
            normal_orderbook = OrderBookSnapshot(
                symbol="BTCUSDT",
                exchange="binance",
                timestamp=datetime.now(timezone.utc),
                bids=[PriceLevel(price=50000.0 + i, size=1.0)],
                asks=[PriceLevel(price=50001.0 + i, size=1.0)]
            )
            anomaly_detector.detect_orderbook_anomalies(normal_orderbook)
        # Create order book with price spike
        spike_orderbook = OrderBookSnapshot(
            symbol="BTCUSDT",
            exchange="binance",
            timestamp=datetime.now(timezone.utc),
            bids=[PriceLevel(price=60000.0, size=1.0)],  # 20% spike
            asks=[PriceLevel(price=60001.0, size=1.0)]
        )
        anomalies = anomaly_detector.detect_orderbook_anomalies(spike_orderbook)
        assert len(anomalies) > 0
        assert any("spike" in anomaly.lower() for anomaly in anomalies)
 class TestMetricsCalculator:
    """Test cases for MetricsCalculator"""
    def test_orderbook_metrics_calculation(self, metrics_calculator, sample_orderbook):
        """Test order book metrics calculation"""
        metrics = metrics_calculator.calculate_orderbook_metrics(sample_orderbook)
        assert metrics.symbol == "BTCUSDT"
        assert metrics.exchange == "binance"
        assert metrics.mid_price == 50000.5  # (50000 + 50001) / 2
        assert metrics.spread == 1.0  # 50001 - 50000
        assert metrics.spread_percentage > 0
        assert metrics.bid_volume == 4.5  # 1.5 + 2.0 + 1.0
        assert metrics.ask_volume == 4.5  # 1.0 + 1.5 + 2.0
        assert metrics.volume_imbalance == 0.0  # Equal volumes
    def test_imbalance_metrics_calculation(self, metrics_calculator, sample_orderbook):
        """Test imbalance metrics calculation"""
        imbalance = metrics_calculator.calculate_imbalance_metrics(sample_orderbook)
        assert imbalance.symbol == "BTCUSDT"
        assert -1.0 <= imbalance.volume_imbalance <= 1.0
        assert -1.0 <= imbalance.price_imbalance <= 1.0
        assert -1.0 <= imbalance.depth_imbalance <= 1.0
        assert -1.0 <= imbalance.momentum_score <= 1.0
    def test_liquidity_score_calculation(self, metrics_calculator, sample_orderbook):
        """Test liquidity score calculation"""
        liquidity_score = metrics_calculator.calculate_liquidity_score(sample_orderbook)
        assert 0.0 <= liquidity_score <= 1.0
        assert liquidity_score > 0.5  # Good order book should have decent liquidity
 class TestStandardDataProcessor:
    """Test cases for StandardDataProcessor"""
    def test_data_validation(self, data_processor, sample_orderbook, sample_trade):
        """Test data validation"""
        # Valid data should pass validation
        assert data_processor.validate_data(sample_orderbook) is True
        assert data_processor.validate_data(sample_trade) is True
    def test_metrics_calculation(self, data_processor, sample_orderbook):
        """Test metrics calculation through processor"""
        metrics = data_processor.calculate_metrics(sample_orderbook)
        assert metrics.symbol == "BTCUSDT"
        assert metrics.mid_price > 0
        assert metrics.spread > 0
    def test_anomaly_detection(self, data_processor, sample_orderbook, sample_trade):
        """Test anomaly detection through processor"""
        orderbook_anomalies = data_processor.detect_anomalies(sample_orderbook)
        trade_anomalies = data_processor.detect_anomalies(sample_trade)
        assert isinstance(orderbook_anomalies, list)
        assert isinstance(trade_anomalies, list)
    def test_data_filtering(self, data_processor, sample_orderbook, sample_trade):
        """Test data filtering"""
        # Test symbol filter
        criteria = {'symbols': ['BTCUSDT']}
        assert data_processor.filter_data(sample_orderbook, criteria) is True
        assert data_processor.filter_data(sample_trade, criteria) is True
        criteria = {'symbols': ['ETHUSDT']}
        assert data_processor.filter_data(sample_orderbook, criteria) is False
        assert data_processor.filter_data(sample_trade, criteria) is False
        # Test price range filter
        criteria = {'price_range': (40000, 60000)}
        assert data_processor.filter_data(sample_orderbook, criteria) is True
        assert data_processor.filter_data(sample_trade, criteria) is True
        criteria = {'price_range': (60000, 70000)}
        assert data_processor.filter_data(sample_orderbook, criteria) is False
        assert data_processor.filter_data(sample_trade, criteria) is False
    def test_data_enrichment(self, data_processor, sample_orderbook, sample_trade):
        """Test data enrichment"""
        orderbook_enriched = data_processor.enrich_data(sample_orderbook)
        trade_enriched = data_processor.enrich_data(sample_trade)
        # Check enriched data structure
        assert 'original_data' in orderbook_enriched
        assert 'quality_score' in orderbook_enriched
        assert 'anomalies' in orderbook_enriched
        assert 'processing_timestamp' in orderbook_enriched
        assert 'original_data' in trade_enriched
        assert 'quality_score' in trade_enriched
        assert 'anomalies' in trade_enriched
        assert 'trade_value' in trade_enriched
    def test_quality_score_calculation(self, data_processor, sample_orderbook, sample_trade):
        """Test quality score calculation"""
        orderbook_score = data_processor.get_data_quality_score(sample_orderbook)
        trade_score = data_processor.get_data_quality_score(sample_trade)
        assert 0.0 <= orderbook_score <= 1.0
        assert 0.0 <= trade_score <= 1.0
        # Good data should have high quality scores
        assert orderbook_score > 0.8
        assert trade_score > 0.8
    def test_processing_stats(self, data_processor, sample_orderbook, sample_trade):
        """Test processing statistics"""
        # Process some data
        data_processor.validate_data(sample_orderbook)
        data_processor.validate_data(sample_trade)
        stats = data_processor.get_processing_stats()
        assert 'processed_orderbooks' in stats
        assert 'processed_trades' in stats
        assert 'quality_failures' in stats
        assert 'anomalies_detected' in stats
        assert stats['processed_orderbooks'] >= 1
        assert stats['processed_trades'] >= 1
 if __name__ == "__main__":
    # Run simple tests
    processor = StandardDataProcessor()
    # Test with sample data
    orderbook = OrderBookSnapshot(
        symbol="BTCUSDT",
        exchange="test",
        timestamp=datetime.now(timezone.utc),
        bids=[PriceLevel(price=50000.0, size=1.0)],
        asks=[PriceLevel(price=50001.0, size=1.0)]
    )
    # Test validation
    is_valid = processor.validate_data(orderbook)
    print(f"Order book validation: {'PASSED' if is_valid else 'FAILED'}")
    # Test metrics
    metrics = processor.calculate_metrics(orderbook)
    print(f"Metrics calculation: mid_price={metrics.mid_price}, spread={metrics.spread}")
    # Test quality score
    quality_score = processor.get_data_quality_score(orderbook)
    print(f"Quality score: {quality_score:.2f}")
    print("Simple data processor test completed")