""" Data Cache Manager for unified storage system. Provides low-latency in-memory caching for real-time data access. """ import time import logging from collections import deque from datetime import datetime, timedelta from typing import Dict, List, Optional, Deque, Any from threading import Lock import pandas as pd from .unified_data_models import OHLCVCandle, OrderBookDataFrame, TradeEvent logger = logging.getLogger(__name__) class DataCacheManager: """ Manages in-memory cache for real-time data. Provides <10ms latency for latest data access. Cache Structure: - OHLCV: Last 5 minutes per symbol per timeframe - Order book: Last 5 minutes per symbol - Imbalances: Last 5 minutes per symbol - Trades: Last 5 minutes per symbol """ def __init__(self, cache_duration_seconds: int = 300): """ Initialize cache manager. Args: cache_duration_seconds: Duration to keep data in cache (default 5 minutes) """ self.cache_duration = cache_duration_seconds self.cache_duration_td = timedelta(seconds=cache_duration_seconds) # In-memory storage with thread-safe locks self.lock = Lock() # OHLCV cache: {symbol: {timeframe: deque of candles}} self.ohlcv_cache: Dict[str, Dict[str, Deque[Dict]]] = {} # Order book cache: {symbol: deque of snapshots} self.orderbook_cache: Dict[str, Deque[Dict]] = {} # Imbalance cache: {symbol: deque of imbalance metrics} self.imbalance_cache: Dict[str, Deque[Dict]] = {} # Trade cache: {symbol: deque of trades} self.trade_cache: Dict[str, Deque[Dict]] = {} # Cache statistics self.cache_hits = 0 self.cache_misses = 0 self.total_inserts = 0 self.total_evictions = 0 # Last eviction time self.last_eviction = datetime.now() self.eviction_interval = timedelta(seconds=10) # Evict every 10 seconds logger.info(f"DataCacheManager initialized with {cache_duration_seconds}s cache duration") def add_ohlcv_candle(self, symbol: str, timeframe: str, candle: Dict): """ Add OHLCV candle to cache. Args: symbol: Trading symbol timeframe: Timeframe (1s, 1m, etc.) candle: Candle dictionary with OHLCV data """ with self.lock: try: # Initialize symbol cache if needed if symbol not in self.ohlcv_cache: self.ohlcv_cache[symbol] = {} # Initialize timeframe cache if needed if timeframe not in self.ohlcv_cache[symbol]: # Calculate max items for this timeframe max_items = self._calculate_max_items(timeframe) self.ohlcv_cache[symbol][timeframe] = deque(maxlen=max_items) # Add candle with timestamp candle_with_ts = candle.copy() if 'timestamp' not in candle_with_ts: candle_with_ts['timestamp'] = datetime.now() self.ohlcv_cache[symbol][timeframe].append(candle_with_ts) self.total_inserts += 1 logger.debug(f"Added OHLCV candle to cache: {symbol} {timeframe}") except Exception as e: logger.error(f"Error adding OHLCV candle to cache: {e}") def add_orderbook_snapshot(self, symbol: str, snapshot: Dict): """ Add order book snapshot to cache. Args: symbol: Trading symbol snapshot: Order book snapshot dictionary """ with self.lock: try: # Initialize symbol cache if needed if symbol not in self.orderbook_cache: # 5 minutes at ~1 snapshot per second = 300 snapshots self.orderbook_cache[symbol] = deque(maxlen=300) # Add snapshot with timestamp snapshot_with_ts = snapshot.copy() if 'timestamp' not in snapshot_with_ts: snapshot_with_ts['timestamp'] = datetime.now() self.orderbook_cache[symbol].append(snapshot_with_ts) self.total_inserts += 1 logger.debug(f"Added order book snapshot to cache: {symbol}") except Exception as e: logger.error(f"Error adding order book snapshot to cache: {e}") def add_imbalance_data(self, symbol: str, imbalance: Dict): """ Add imbalance metrics to cache. Args: symbol: Trading symbol imbalance: Imbalance metrics dictionary """ with self.lock: try: # Initialize symbol cache if needed if symbol not in self.imbalance_cache: # 5 minutes at 1 per second = 300 entries self.imbalance_cache[symbol] = deque(maxlen=300) # Add imbalance with timestamp imbalance_with_ts = imbalance.copy() if 'timestamp' not in imbalance_with_ts: imbalance_with_ts['timestamp'] = datetime.now() self.imbalance_cache[symbol].append(imbalance_with_ts) self.total_inserts += 1 logger.debug(f"Added imbalance data to cache: {symbol}") except Exception as e: logger.error(f"Error adding imbalance data to cache: {e}") def add_trade(self, symbol: str, trade: Dict): """ Add trade event to cache. Args: symbol: Trading symbol trade: Trade event dictionary """ with self.lock: try: # Initialize symbol cache if needed if symbol not in self.trade_cache: # 5 minutes at ~10 trades per second = 3000 trades self.trade_cache[symbol] = deque(maxlen=3000) # Add trade with timestamp trade_with_ts = trade.copy() if 'timestamp' not in trade_with_ts: trade_with_ts['timestamp'] = datetime.now() self.trade_cache[symbol].append(trade_with_ts) self.total_inserts += 1 logger.debug(f"Added trade to cache: {symbol}") except Exception as e: logger.error(f"Error adding trade to cache: {e}") def get_latest_ohlcv(self, symbol: str, timeframe: str, limit: int = 100) -> List[Dict]: """ Get latest OHLCV candles from cache. Args: symbol: Trading symbol timeframe: Timeframe limit: Maximum number of candles to return Returns: List of candle dictionaries (most recent last) """ start_time = time.time() with self.lock: try: # Check if symbol and timeframe exist in cache if symbol not in self.ohlcv_cache or timeframe not in self.ohlcv_cache[symbol]: self.cache_misses += 1 return [] # Get candles candles = list(self.ohlcv_cache[symbol][timeframe]) # Return last N candles result = candles[-limit:] if len(candles) > limit else candles self.cache_hits += 1 latency_ms = (time.time() - start_time) * 1000 logger.debug(f"Retrieved {len(result)} OHLCV candles from cache in {latency_ms:.2f}ms") return result except Exception as e: logger.error(f"Error getting OHLCV from cache: {e}") self.cache_misses += 1 return [] def get_latest_orderbook(self, symbol: str) -> Optional[Dict]: """ Get latest order book snapshot from cache. Args: symbol: Trading symbol Returns: Latest order book snapshot or None """ start_time = time.time() with self.lock: try: # Check if symbol exists in cache if symbol not in self.orderbook_cache or not self.orderbook_cache[symbol]: self.cache_misses += 1 return None # Get latest snapshot result = self.orderbook_cache[symbol][-1].copy() self.cache_hits += 1 latency_ms = (time.time() - start_time) * 1000 logger.debug(f"Retrieved order book from cache in {latency_ms:.2f}ms") return result except Exception as e: logger.error(f"Error getting order book from cache: {e}") self.cache_misses += 1 return None def get_latest_imbalances(self, symbol: str, limit: int = 60) -> List[Dict]: """ Get latest imbalance metrics from cache. Args: symbol: Trading symbol limit: Maximum number of entries to return Returns: List of imbalance dictionaries (most recent last) """ start_time = time.time() with self.lock: try: # Check if symbol exists in cache if symbol not in self.imbalance_cache: self.cache_misses += 1 return [] # Get imbalances imbalances = list(self.imbalance_cache[symbol]) # Return last N entries result = imbalances[-limit:] if len(imbalances) > limit else imbalances self.cache_hits += 1 latency_ms = (time.time() - start_time) * 1000 logger.debug(f"Retrieved {len(result)} imbalances from cache in {latency_ms:.2f}ms") return result except Exception as e: logger.error(f"Error getting imbalances from cache: {e}") self.cache_misses += 1 return [] def get_latest_trades(self, symbol: str, limit: int = 100) -> List[Dict]: """ Get latest trades from cache. Args: symbol: Trading symbol limit: Maximum number of trades to return Returns: List of trade dictionaries (most recent last) """ start_time = time.time() with self.lock: try: # Check if symbol exists in cache if symbol not in self.trade_cache: self.cache_misses += 1 return [] # Get trades trades = list(self.trade_cache[symbol]) # Return last N trades result = trades[-limit:] if len(trades) > limit else trades self.cache_hits += 1 latency_ms = (time.time() - start_time) * 1000 logger.debug(f"Retrieved {len(result)} trades from cache in {latency_ms:.2f}ms") return result except Exception as e: logger.error(f"Error getting trades from cache: {e}") self.cache_misses += 1 return [] def get_ohlcv_dataframe(self, symbol: str, timeframe: str, limit: int = 100) -> pd.DataFrame: """ Get OHLCV data as pandas DataFrame. Args: symbol: Trading symbol timeframe: Timeframe limit: Maximum number of candles Returns: DataFrame with OHLCV data """ candles = self.get_latest_ohlcv(symbol, timeframe, limit) if not candles: return pd.DataFrame() return pd.DataFrame(candles) def evict_old_data(self): """Remove data older than cache duration.""" with self.lock: try: now = datetime.now() cutoff_time = now - self.cache_duration_td eviction_count = 0 # Evict old OHLCV data for symbol in list(self.ohlcv_cache.keys()): for timeframe in list(self.ohlcv_cache[symbol].keys()): cache = self.ohlcv_cache[symbol][timeframe] original_len = len(cache) # Remove old entries while cache and cache[0]['timestamp'] < cutoff_time: cache.popleft() eviction_count += 1 # Remove empty timeframe caches if not cache: del self.ohlcv_cache[symbol][timeframe] # Remove empty symbol caches if not self.ohlcv_cache[symbol]: del self.ohlcv_cache[symbol] # Evict old order book data for symbol in list(self.orderbook_cache.keys()): cache = self.orderbook_cache[symbol] while cache and cache[0]['timestamp'] < cutoff_time: cache.popleft() eviction_count += 1 if not cache: del self.orderbook_cache[symbol] # Evict old imbalance data for symbol in list(self.imbalance_cache.keys()): cache = self.imbalance_cache[symbol] while cache and cache[0]['timestamp'] < cutoff_time: cache.popleft() eviction_count += 1 if not cache: del self.imbalance_cache[symbol] # Evict old trade data for symbol in list(self.trade_cache.keys()): cache = self.trade_cache[symbol] while cache and cache[0]['timestamp'] < cutoff_time: cache.popleft() eviction_count += 1 if not cache: del self.trade_cache[symbol] self.total_evictions += eviction_count self.last_eviction = now if eviction_count > 0: logger.debug(f"Evicted {eviction_count} old entries from cache") except Exception as e: logger.error(f"Error evicting old data: {e}") def auto_evict_if_needed(self): """Automatically evict old data if interval has passed.""" now = datetime.now() if now - self.last_eviction >= self.eviction_interval: self.evict_old_data() def clear_cache(self, symbol: Optional[str] = None): """ Clear cache data. Args: symbol: Symbol to clear (None = clear all) """ with self.lock: if symbol: # Clear specific symbol if symbol in self.ohlcv_cache: del self.ohlcv_cache[symbol] if symbol in self.orderbook_cache: del self.orderbook_cache[symbol] if symbol in self.imbalance_cache: del self.imbalance_cache[symbol] if symbol in self.trade_cache: del self.trade_cache[symbol] logger.info(f"Cleared cache for symbol: {symbol}") else: # Clear all self.ohlcv_cache.clear() self.orderbook_cache.clear() self.imbalance_cache.clear() self.trade_cache.clear() logger.info("Cleared all cache data") def get_cache_stats(self) -> Dict[str, Any]: """Get cache statistics.""" with self.lock: # Calculate cache sizes ohlcv_count = sum( sum(len(tf_cache) for tf_cache in symbol_cache.values()) for symbol_cache in self.ohlcv_cache.values() ) orderbook_count = sum(len(cache) for cache in self.orderbook_cache.values()) imbalance_count = sum(len(cache) for cache in self.imbalance_cache.values()) trade_count = sum(len(cache) for cache in self.trade_cache.values()) total_requests = self.cache_hits + self.cache_misses hit_rate = (self.cache_hits / total_requests * 100) if total_requests > 0 else 0 return { 'cache_duration_seconds': self.cache_duration, 'ohlcv_entries': ohlcv_count, 'orderbook_entries': orderbook_count, 'imbalance_entries': imbalance_count, 'trade_entries': trade_count, 'total_entries': ohlcv_count + orderbook_count + imbalance_count + trade_count, 'cache_hits': self.cache_hits, 'cache_misses': self.cache_misses, 'hit_rate_percent': round(hit_rate, 2), 'total_inserts': self.total_inserts, 'total_evictions': self.total_evictions, 'last_eviction': self.last_eviction.isoformat(), 'symbols_cached': { 'ohlcv': list(self.ohlcv_cache.keys()), 'orderbook': list(self.orderbook_cache.keys()), 'imbalance': list(self.imbalance_cache.keys()), 'trade': list(self.trade_cache.keys()) } } def _calculate_max_items(self, timeframe: str) -> int: """ Calculate maximum cache items for a timeframe. Args: timeframe: Timeframe string Returns: Maximum number of items to cache """ # Timeframe to seconds mapping timeframe_seconds = { '1s': 1, '1m': 60, '5m': 300, '15m': 900, '30m': 1800, '1h': 3600, '4h': 14400, '1d': 86400 } seconds = timeframe_seconds.get(timeframe, 60) # Calculate how many candles fit in cache duration max_items = self.cache_duration // seconds # Ensure at least 10 items return max(10, max_items) def get_cache_summary(self) -> str: """Get human-readable cache summary.""" stats = self.get_cache_stats() summary = f""" Cache Summary: -------------- Duration: {stats['cache_duration_seconds']}s Total Entries: {stats['total_entries']} - OHLCV: {stats['ohlcv_entries']} - Order Book: {stats['orderbook_entries']} - Imbalances: {stats['imbalance_entries']} - Trades: {stats['trade_entries']} Performance: - Cache Hits: {stats['cache_hits']} - Cache Misses: {stats['cache_misses']} - Hit Rate: {stats['hit_rate_percent']}% - Total Inserts: {stats['total_inserts']} - Total Evictions: {stats['total_evictions']} Last Eviction: {stats['last_eviction']} """ return summary