""" Simplified Data Cache System Replaces complex FIFO queues with a simple current state cache. Supports unordered updates and extensible data types. """ import threading import time import logging from datetime import datetime, timedelta from typing import Dict, List, Optional, Any, Callable from dataclasses import dataclass, field from collections import defaultdict import pandas as pd import numpy as np logger = logging.getLogger(__name__) @dataclass class DataCacheEntry: """Single cache entry with metadata""" data: Any timestamp: datetime source: str = "unknown" version: int = 1 class DataCache: """ Simplified data cache that stores only the latest data for each type. Thread-safe and supports unordered updates from multiple sources. """ def __init__(self): self.cache: Dict[str, Dict[str, DataCacheEntry]] = defaultdict(dict) # {data_type: {symbol: entry}} self.locks: Dict[str, threading.RLock] = defaultdict(threading.RLock) # Per data_type locks self.update_callbacks: Dict[str, List[Callable]] = defaultdict(list) # Update notifications # Historical data storage (loaded once) self.historical_data: Dict[str, Dict[str, pd.DataFrame]] = defaultdict(dict) # {symbol: {timeframe: df}} self.historical_locks: Dict[str, threading.RLock] = defaultdict(threading.RLock) logger.info("DataCache initialized with simplified architecture") def update(self, data_type: str, symbol: str, data: Any, source: str = "unknown") -> bool: """ Update cache with latest data (thread-safe, unordered updates supported) Args: data_type: Type of data ('ohlcv_1s', 'technical_indicators', etc.) symbol: Trading symbol data: New data to store source: Source of the update Returns: bool: True if updated successfully """ try: with self.locks[data_type]: # Create or update entry old_entry = self.cache[data_type].get(symbol) new_version = (old_entry.version + 1) if old_entry else 1 self.cache[data_type][symbol] = DataCacheEntry( data=data, timestamp=datetime.now(), source=source, version=new_version ) # Notify callbacks for callback in self.update_callbacks[data_type]: try: callback(symbol, data, source) except Exception as e: logger.error(f"Error in update callback: {e}") return True except Exception as e: logger.error(f"Error updating cache {data_type}/{symbol}: {e}") return False def get(self, data_type: str, symbol: str) -> Optional[Any]: """Get latest data for a type/symbol""" try: with self.locks[data_type]: entry = self.cache[data_type].get(symbol) return entry.data if entry else None except Exception as e: logger.error(f"Error getting cache {data_type}/{symbol}: {e}") return None def get_with_metadata(self, data_type: str, symbol: str) -> Optional[DataCacheEntry]: """Get latest data with metadata""" try: with self.locks[data_type]: return self.cache[data_type].get(symbol) except Exception as e: logger.error(f"Error getting cache metadata {data_type}/{symbol}: {e}") return None def get_all(self, data_type: str) -> Dict[str, Any]: """Get all data for a data type""" try: with self.locks[data_type]: return {symbol: entry.data for symbol, entry in self.cache[data_type].items()} except Exception as e: logger.error(f"Error getting all cache data for {data_type}: {e}") return {} def has_data(self, data_type: str, symbol: str, max_age_seconds: int = None) -> bool: """Check if we have recent data""" try: with self.locks[data_type]: entry = self.cache[data_type].get(symbol) if not entry: return False if max_age_seconds: age = (datetime.now() - entry.timestamp).total_seconds() return age <= max_age_seconds return True except Exception as e: logger.error(f"Error checking cache data {data_type}/{symbol}: {e}") return False def register_callback(self, data_type: str, callback: Callable[[str, Any, str], None]): """Register callback for data updates""" self.update_callbacks[data_type].append(callback) def get_status(self) -> Dict[str, Dict[str, Dict[str, Any]]]: """Get cache status for monitoring""" status = {} for data_type in self.cache: with self.locks[data_type]: status[data_type] = {} for symbol, entry in self.cache[data_type].items(): age_seconds = (datetime.now() - entry.timestamp).total_seconds() status[data_type][symbol] = { 'timestamp': entry.timestamp.isoformat(), 'age_seconds': age_seconds, 'source': entry.source, 'version': entry.version, 'has_data': entry.data is not None } return status # Historical data management def store_historical_data(self, symbol: str, timeframe: str, df: pd.DataFrame): """Store historical data (loaded once at startup)""" try: with self.historical_locks[symbol]: self.historical_data[symbol][timeframe] = df.copy() logger.info(f"Stored {len(df)} historical bars for {symbol} {timeframe}") except Exception as e: logger.error(f"Error storing historical data {symbol}/{timeframe}: {e}") def get_historical_data(self, symbol: str, timeframe: str) -> Optional[pd.DataFrame]: """Get historical data""" try: with self.historical_locks[symbol]: return self.historical_data[symbol].get(timeframe) except Exception as e: logger.error(f"Error getting historical data {symbol}/{timeframe}: {e}") return None def has_historical_data(self, symbol: str, timeframe: str, min_bars: int = 100) -> bool: """Check if we have sufficient historical data""" try: with self.historical_locks[symbol]: df = self.historical_data[symbol].get(timeframe) return df is not None and len(df) >= min_bars except Exception: return False # Global cache instance _data_cache_instance = None def get_data_cache() -> DataCache: """Get the global data cache instance""" global _data_cache_instance if _data_cache_instance is None: _data_cache_instance = DataCache() return _data_cache_instance