""" Price bucketing system for order book aggregation. """ import math from typing import Dict, List, Tuple, Optional from collections import defaultdict from ..models.core import OrderBookSnapshot, PriceBuckets, PriceLevel from ..config import config from ..utils.logging import get_logger from ..utils.validation import validate_price, validate_volume logger = get_logger(__name__) class PriceBucketer: """ Converts order book data into price buckets for heatmap visualization. Uses universal $1 USD buckets for all symbols to simplify logic. """ def __init__(self, bucket_size: float = None): """ Initialize price bucketer. Args: bucket_size: Size of price buckets in USD (defaults to config value) """ self.bucket_size = bucket_size or config.get_bucket_size() # Statistics self.buckets_created = 0 self.total_volume_processed = 0.0 logger.info(f"Price bucketer initialized with ${self.bucket_size} buckets") def create_price_buckets(self, orderbook: OrderBookSnapshot) -> PriceBuckets: """ Convert order book data to price buckets. Args: orderbook: Order book snapshot Returns: PriceBuckets: Aggregated price bucket data """ try: # Create price buckets object buckets = PriceBuckets( symbol=orderbook.symbol, timestamp=orderbook.timestamp, bucket_size=self.bucket_size ) # Process bids (aggregate into buckets) for bid in orderbook.bids: if validate_price(bid.price) and validate_volume(bid.size): buckets.add_bid(bid.price, bid.size) self.total_volume_processed += bid.size # Process asks (aggregate into buckets) for ask in orderbook.asks: if validate_price(ask.price) and validate_volume(ask.size): buckets.add_ask(ask.price, ask.size) self.total_volume_processed += ask.size self.buckets_created += 1 logger.debug( f"Created price buckets for {orderbook.symbol}: " f"{len(buckets.bid_buckets)} bid buckets, {len(buckets.ask_buckets)} ask buckets" ) return buckets except Exception as e: logger.error(f"Error creating price buckets: {e}") raise def aggregate_buckets(self, bucket_list: List[PriceBuckets]) -> PriceBuckets: """ Aggregate multiple price buckets into a single bucket set. Args: bucket_list: List of price buckets to aggregate Returns: PriceBuckets: Aggregated buckets """ if not bucket_list: raise ValueError("Cannot aggregate empty bucket list") # Use first bucket as template first_bucket = bucket_list[0] aggregated = PriceBuckets( symbol=first_bucket.symbol, timestamp=first_bucket.timestamp, bucket_size=self.bucket_size ) # Aggregate all bid buckets for buckets in bucket_list: for price, volume in buckets.bid_buckets.items(): bucket_price = aggregated.get_bucket_price(price) aggregated.bid_buckets[bucket_price] = ( aggregated.bid_buckets.get(bucket_price, 0) + volume ) # Aggregate all ask buckets for buckets in bucket_list: for price, volume in buckets.ask_buckets.items(): bucket_price = aggregated.get_bucket_price(price) aggregated.ask_buckets[bucket_price] = ( aggregated.ask_buckets.get(bucket_price, 0) + volume ) logger.debug(f"Aggregated {len(bucket_list)} bucket sets") return aggregated def get_bucket_range(self, center_price: float, depth: int) -> Tuple[float, float]: """ Get price range for buckets around a center price. Args: center_price: Center price for the range depth: Number of buckets on each side Returns: Tuple[float, float]: (min_price, max_price) """ half_range = depth * self.bucket_size min_price = center_price - half_range max_price = center_price + half_range return (max(0, min_price), max_price) def filter_buckets_by_range(self, buckets: PriceBuckets, min_price: float, max_price: float) -> PriceBuckets: """ Filter buckets to only include those within a price range. Args: buckets: Original price buckets min_price: Minimum price to include max_price: Maximum price to include Returns: PriceBuckets: Filtered buckets """ filtered = PriceBuckets( symbol=buckets.symbol, timestamp=buckets.timestamp, bucket_size=buckets.bucket_size ) # Filter bid buckets for price, volume in buckets.bid_buckets.items(): if min_price <= price <= max_price: filtered.bid_buckets[price] = volume # Filter ask buckets for price, volume in buckets.ask_buckets.items(): if min_price <= price <= max_price: filtered.ask_buckets[price] = volume return filtered def get_top_buckets(self, buckets: PriceBuckets, count: int) -> PriceBuckets: """ Get top N buckets by volume. Args: buckets: Original price buckets count: Number of top buckets to return Returns: PriceBuckets: Top buckets by volume """ top_buckets = PriceBuckets( symbol=buckets.symbol, timestamp=buckets.timestamp, bucket_size=buckets.bucket_size ) # Get top bid buckets top_bids = sorted( buckets.bid_buckets.items(), key=lambda x: x[1], # Sort by volume reverse=True )[:count] for price, volume in top_bids: top_buckets.bid_buckets[price] = volume # Get top ask buckets top_asks = sorted( buckets.ask_buckets.items(), key=lambda x: x[1], # Sort by volume reverse=True )[:count] for price, volume in top_asks: top_buckets.ask_buckets[price] = volume return top_buckets def calculate_bucket_statistics(self, buckets: PriceBuckets) -> Dict[str, float]: """ Calculate statistics for price buckets. Args: buckets: Price buckets to analyze Returns: Dict[str, float]: Bucket statistics """ stats = { 'total_bid_buckets': len(buckets.bid_buckets), 'total_ask_buckets': len(buckets.ask_buckets), 'total_bid_volume': sum(buckets.bid_buckets.values()), 'total_ask_volume': sum(buckets.ask_buckets.values()), 'bid_price_range': 0.0, 'ask_price_range': 0.0, 'max_bid_volume': 0.0, 'max_ask_volume': 0.0, 'avg_bid_volume': 0.0, 'avg_ask_volume': 0.0 } # Calculate bid statistics if buckets.bid_buckets: bid_prices = list(buckets.bid_buckets.keys()) bid_volumes = list(buckets.bid_buckets.values()) stats['bid_price_range'] = max(bid_prices) - min(bid_prices) stats['max_bid_volume'] = max(bid_volumes) stats['avg_bid_volume'] = sum(bid_volumes) / len(bid_volumes) # Calculate ask statistics if buckets.ask_buckets: ask_prices = list(buckets.ask_buckets.keys()) ask_volumes = list(buckets.ask_buckets.values()) stats['ask_price_range'] = max(ask_prices) - min(ask_prices) stats['max_ask_volume'] = max(ask_volumes) stats['avg_ask_volume'] = sum(ask_volumes) / len(ask_volumes) # Calculate combined statistics stats['total_volume'] = stats['total_bid_volume'] + stats['total_ask_volume'] stats['volume_imbalance'] = ( (stats['total_bid_volume'] - stats['total_ask_volume']) / max(stats['total_volume'], 1e-10) ) return stats def merge_adjacent_buckets(self, buckets: PriceBuckets, merge_factor: int = 2) -> PriceBuckets: """ Merge adjacent buckets to create larger bucket sizes. Args: buckets: Original price buckets merge_factor: Number of adjacent buckets to merge Returns: PriceBuckets: Merged buckets with larger bucket size """ merged = PriceBuckets( symbol=buckets.symbol, timestamp=buckets.timestamp, bucket_size=buckets.bucket_size * merge_factor ) # Merge bid buckets bid_groups = defaultdict(float) for price, volume in buckets.bid_buckets.items(): # Calculate new bucket price new_bucket_price = merged.get_bucket_price(price) bid_groups[new_bucket_price] += volume merged.bid_buckets = dict(bid_groups) # Merge ask buckets ask_groups = defaultdict(float) for price, volume in buckets.ask_buckets.items(): # Calculate new bucket price new_bucket_price = merged.get_bucket_price(price) ask_groups[new_bucket_price] += volume merged.ask_buckets = dict(ask_groups) logger.debug(f"Merged buckets with factor {merge_factor}") return merged def get_bucket_depth_profile(self, buckets: PriceBuckets, center_price: float) -> Dict[str, List[Tuple[float, float]]]: """ Get depth profile showing volume at different distances from center price. Args: buckets: Price buckets center_price: Center price for depth calculation Returns: Dict: Depth profile with 'bids' and 'asks' lists of (distance, volume) tuples """ profile = {'bids': [], 'asks': []} # Calculate bid depth profile for price, volume in buckets.bid_buckets.items(): distance = abs(center_price - price) profile['bids'].append((distance, volume)) # Calculate ask depth profile for price, volume in buckets.ask_buckets.items(): distance = abs(price - center_price) profile['asks'].append((distance, volume)) # Sort by distance profile['bids'].sort(key=lambda x: x[0]) profile['asks'].sort(key=lambda x: x[0]) return profile def get_processing_stats(self) -> Dict[str, float]: """Get processing statistics""" return { 'bucket_size': self.bucket_size, 'buckets_created': self.buckets_created, 'total_volume_processed': self.total_volume_processed, 'avg_volume_per_bucket': ( self.total_volume_processed / max(self.buckets_created, 1) ) } def reset_stats(self) -> None: """Reset processing statistics""" self.buckets_created = 0 self.total_volume_processed = 0.0 logger.info("Price bucketer statistics reset")