""" TimescaleDB connection manager and database operations. Provides connection pooling, schema management, and optimized time-series operations. """ import asyncio import logging from datetime import datetime from typing import Dict, List, Optional, Any from dataclasses import asdict import asyncpg from asyncpg import Pool, Connection import json from ..models.core import OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets from ..utils.exceptions import DatabaseError, ConnectionError from ..config import Config logger = logging.getLogger(__name__) class TimescaleManager: """Manages TimescaleDB connections and operations for time-series data.""" def __init__(self, config: Config): self.config = config self.pool: Optional[Pool] = None self._connection_string = self._build_connection_string() def _build_connection_string(self) -> str: """Build PostgreSQL connection string from config.""" return ( f"postgresql://{self.config.database.user}:{self.config.database.password}" f"@{self.config.database.host}:{self.config.database.port}/{self.config.database.name}" ) async def initialize(self) -> None: """Initialize connection pool and database schema.""" try: logger.info("Initializing TimescaleDB connection pool...") self.pool = await asyncpg.create_pool( self._connection_string, min_size=5, max_size=self.config.database.pool_size, command_timeout=60, server_settings={ 'jit': 'off', # Disable JIT for better performance with time-series 'timezone': 'UTC' } ) # Test connection async with self.pool.acquire() as conn: await conn.execute('SELECT 1') logger.info("TimescaleDB connection pool initialized successfully") # Initialize database schema await self.setup_database_schema() except Exception as e: logger.error(f"Failed to initialize TimescaleDB: {e}") raise ConnectionError(f"TimescaleDB initialization failed: {e}") async def close(self) -> None: """Close connection pool.""" if self.pool: await self.pool.close() logger.info("TimescaleDB connection pool closed") async def setup_database_schema(self) -> None: """Create database schema with hypertables and indexes.""" try: async with self.pool.acquire() as conn: # Enable TimescaleDB extension await conn.execute("CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;") # Create order book snapshots table await conn.execute(""" CREATE TABLE IF NOT EXISTS order_book_snapshots ( id BIGSERIAL, symbol VARCHAR(20) NOT NULL, exchange VARCHAR(20) NOT NULL, timestamp TIMESTAMPTZ NOT NULL, bids JSONB NOT NULL, asks JSONB NOT NULL, sequence_id BIGINT, mid_price DECIMAL(20,8), spread DECIMAL(20,8), bid_volume DECIMAL(30,8), ask_volume DECIMAL(30,8), PRIMARY KEY (timestamp, symbol, exchange) ); """) # Convert to hypertable if not already try: await conn.execute(""" SELECT create_hypertable('order_book_snapshots', 'timestamp', if_not_exists => TRUE); """) except Exception as e: if "already a hypertable" not in str(e): logger.warning(f"Could not create hypertable for order_book_snapshots: {e}") # Create trade events table await conn.execute(""" CREATE TABLE IF NOT EXISTS trade_events ( id BIGSERIAL, symbol VARCHAR(20) NOT NULL, exchange VARCHAR(20) NOT NULL, timestamp TIMESTAMPTZ NOT NULL, price DECIMAL(20,8) NOT NULL, size DECIMAL(30,8) NOT NULL, side VARCHAR(4) NOT NULL, trade_id VARCHAR(100) NOT NULL, PRIMARY KEY (timestamp, symbol, exchange, trade_id) ); """) # Convert to hypertable if not already try: await conn.execute(""" SELECT create_hypertable('trade_events', 'timestamp', if_not_exists => TRUE); """) except Exception as e: if "already a hypertable" not in str(e): logger.warning(f"Could not create hypertable for trade_events: {e}") # Create heatmap data table await conn.execute(""" CREATE TABLE IF NOT EXISTS heatmap_data ( symbol VARCHAR(20) NOT NULL, timestamp TIMESTAMPTZ NOT NULL, bucket_size DECIMAL(10,2) NOT NULL, price_bucket DECIMAL(20,8) NOT NULL, volume DECIMAL(30,8) NOT NULL, side VARCHAR(3) NOT NULL, exchange_count INTEGER NOT NULL, PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side) ); """) # Convert to hypertable if not already try: await conn.execute(""" SELECT create_hypertable('heatmap_data', 'timestamp', if_not_exists => TRUE); """) except Exception as e: if "already a hypertable" not in str(e): logger.warning(f"Could not create hypertable for heatmap_data: {e}") # Create OHLCV data table await conn.execute(""" CREATE TABLE IF NOT EXISTS ohlcv_data ( symbol VARCHAR(20) NOT NULL, timestamp TIMESTAMPTZ NOT NULL, timeframe VARCHAR(10) NOT NULL, open_price DECIMAL(20,8) NOT NULL, high_price DECIMAL(20,8) NOT NULL, low_price DECIMAL(20,8) NOT NULL, close_price DECIMAL(20,8) NOT NULL, volume DECIMAL(30,8) NOT NULL, trade_count INTEGER, PRIMARY KEY (timestamp, symbol, timeframe) ); """) # Convert to hypertable if not already try: await conn.execute(""" SELECT create_hypertable('ohlcv_data', 'timestamp', if_not_exists => TRUE); """) except Exception as e: if "already a hypertable" not in str(e): logger.warning(f"Could not create hypertable for ohlcv_data: {e}") # Create indexes for better query performance await self._create_indexes(conn) # Set up data retention policies await self._setup_retention_policies(conn) logger.info("Database schema setup completed successfully") except Exception as e: logger.error(f"Failed to setup database schema: {e}") raise DatabaseError(f"Schema setup failed: {e}") async def _create_indexes(self, conn: Connection) -> None: """Create indexes for optimized queries.""" indexes = [ # Order book snapshots indexes "CREATE INDEX IF NOT EXISTS idx_obs_symbol_time ON order_book_snapshots (symbol, timestamp DESC);", "CREATE INDEX IF NOT EXISTS idx_obs_exchange_time ON order_book_snapshots (exchange, timestamp DESC);", "CREATE INDEX IF NOT EXISTS idx_obs_symbol_exchange ON order_book_snapshots (symbol, exchange, timestamp DESC);", # Trade events indexes "CREATE INDEX IF NOT EXISTS idx_trades_symbol_time ON trade_events (symbol, timestamp DESC);", "CREATE INDEX IF NOT EXISTS idx_trades_exchange_time ON trade_events (exchange, timestamp DESC);", "CREATE INDEX IF NOT EXISTS idx_trades_price ON trade_events (symbol, price, timestamp DESC);", # Heatmap data indexes "CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_time ON heatmap_data (symbol, timestamp DESC);", "CREATE INDEX IF NOT EXISTS idx_heatmap_bucket ON heatmap_data (symbol, bucket_size, timestamp DESC);", # OHLCV indexes "CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON ohlcv_data (symbol, timeframe, timestamp DESC);" ] for index_sql in indexes: try: await conn.execute(index_sql) except Exception as e: logger.warning(f"Could not create index: {e}") async def _setup_retention_policies(self, conn: Connection) -> None: """Set up data retention policies for automatic cleanup.""" try: # Keep raw order book data for 30 days await conn.execute(""" SELECT add_retention_policy('order_book_snapshots', INTERVAL '30 days', if_not_exists => TRUE); """) # Keep trade events for 90 days await conn.execute(""" SELECT add_retention_policy('trade_events', INTERVAL '90 days', if_not_exists => TRUE); """) # Keep heatmap data for 60 days await conn.execute(""" SELECT add_retention_policy('heatmap_data', INTERVAL '60 days', if_not_exists => TRUE); """) # Keep OHLCV data for 1 year await conn.execute(""" SELECT add_retention_policy('ohlcv_data', INTERVAL '1 year', if_not_exists => TRUE); """) logger.info("Data retention policies configured") except Exception as e: logger.warning(f"Could not set up retention policies: {e}") async def store_orderbook(self, data: OrderBookSnapshot) -> bool: """Store order book snapshot in database.""" try: async with self.pool.acquire() as conn: # Calculate derived metrics mid_price = None spread = None bid_volume = sum(level.size for level in data.bids) ask_volume = sum(level.size for level in data.asks) if data.bids and data.asks: best_bid = max(data.bids, key=lambda x: x.price).price best_ask = min(data.asks, key=lambda x: x.price).price mid_price = (best_bid + best_ask) / 2 spread = best_ask - best_bid # Convert price levels to JSON bids_json = json.dumps([asdict(level) for level in data.bids]) asks_json = json.dumps([asdict(level) for level in data.asks]) await conn.execute(""" INSERT INTO order_book_snapshots (symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread, bid_volume, ask_volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) ON CONFLICT (timestamp, symbol, exchange) DO UPDATE SET bids = EXCLUDED.bids, asks = EXCLUDED.asks, sequence_id = EXCLUDED.sequence_id, mid_price = EXCLUDED.mid_price, spread = EXCLUDED.spread, bid_volume = EXCLUDED.bid_volume, ask_volume = EXCLUDED.ask_volume """, data.symbol, data.exchange, data.timestamp, bids_json, asks_json, data.sequence_id, mid_price, spread, bid_volume, ask_volume) return True except Exception as e: logger.error(f"Failed to store order book data: {e}") raise DatabaseError(f"Order book storage failed: {e}") async def store_trade(self, data: TradeEvent) -> bool: """Store trade event in database.""" try: async with self.pool.acquire() as conn: await conn.execute(""" INSERT INTO trade_events (symbol, exchange, timestamp, price, size, side, trade_id) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (timestamp, symbol, exchange, trade_id) DO NOTHING """, data.symbol, data.exchange, data.timestamp, data.price, data.size, data.side, data.trade_id) return True except Exception as e: logger.error(f"Failed to store trade data: {e}") raise DatabaseError(f"Trade storage failed: {e}") async def store_heatmap_data(self, data: HeatmapData) -> bool: """Store heatmap data in database.""" try: async with self.pool.acquire() as conn: # Prepare batch insert data insert_data = [] for point in data.data: insert_data.append(( data.symbol, data.timestamp, data.bucket_size, point.price, point.volume, point.side, 1 # exchange_count )) if insert_data: await conn.executemany(""" INSERT INTO heatmap_data (symbol, timestamp, bucket_size, price_bucket, volume, side, exchange_count) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (timestamp, symbol, bucket_size, price_bucket, side) DO UPDATE SET volume = heatmap_data.volume + EXCLUDED.volume, exchange_count = heatmap_data.exchange_count + EXCLUDED.exchange_count """, insert_data) return True except Exception as e: logger.error(f"Failed to store heatmap data: {e}") raise DatabaseError(f"Heatmap storage failed: {e}") async def get_latest_orderbook(self, symbol: str, exchange: Optional[str] = None) -> Optional[Dict]: """Get latest order book snapshot for a symbol.""" try: async with self.pool.acquire() as conn: if exchange: query = """ SELECT * FROM order_book_snapshots WHERE symbol = $1 AND exchange = $2 ORDER BY timestamp DESC LIMIT 1 """ row = await conn.fetchrow(query, symbol, exchange) else: query = """ SELECT * FROM order_book_snapshots WHERE symbol = $1 ORDER BY timestamp DESC LIMIT 1 """ row = await conn.fetchrow(query, symbol) if row: return dict(row) return None except Exception as e: logger.error(f"Failed to get latest order book: {e}") raise DatabaseError(f"Order book retrieval failed: {e}") async def get_historical_data(self, symbol: str, start: datetime, end: datetime, data_type: str = 'orderbook') -> List[Dict]: """Get historical data for a symbol within time range.""" try: async with self.pool.acquire() as conn: if data_type == 'orderbook': query = """ SELECT * FROM order_book_snapshots WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3 ORDER BY timestamp ASC """ elif data_type == 'trades': query = """ SELECT * FROM trade_events WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3 ORDER BY timestamp ASC """ elif data_type == 'heatmap': query = """ SELECT * FROM heatmap_data WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3 ORDER BY timestamp ASC """ else: raise ValueError(f"Unknown data type: {data_type}") rows = await conn.fetch(query, symbol, start, end) return [dict(row) for row in rows] except Exception as e: logger.error(f"Failed to get historical data: {e}") raise DatabaseError(f"Historical data retrieval failed: {e}") async def get_heatmap_data(self, symbol: str, bucket_size: float, start: Optional[datetime] = None) -> List[Dict]: """Get heatmap data for visualization.""" try: async with self.pool.acquire() as conn: if start: query = """ SELECT price_bucket, volume, side, timestamp FROM heatmap_data WHERE symbol = $1 AND bucket_size = $2 AND timestamp >= $3 ORDER BY timestamp DESC, price_bucket ASC """ rows = await conn.fetch(query, symbol, bucket_size, start) else: # Get latest heatmap data query = """ SELECT price_bucket, volume, side, timestamp FROM heatmap_data WHERE symbol = $1 AND bucket_size = $2 AND timestamp = ( SELECT MAX(timestamp) FROM heatmap_data WHERE symbol = $1 AND bucket_size = $2 ) ORDER BY price_bucket ASC """ rows = await conn.fetch(query, symbol, bucket_size) return [dict(row) for row in rows] except Exception as e: logger.error(f"Failed to get heatmap data: {e}") raise DatabaseError(f"Heatmap data retrieval failed: {e}") async def batch_store_orderbooks(self, data_list: List[OrderBookSnapshot]) -> bool: """Store multiple order book snapshots in a single transaction.""" if not data_list: return True try: async with self.pool.acquire() as conn: async with conn.transaction(): insert_data = [] for data in data_list: # Calculate derived metrics mid_price = None spread = None bid_volume = sum(level.size for level in data.bids) ask_volume = sum(level.size for level in data.asks) if data.bids and data.asks: best_bid = max(data.bids, key=lambda x: x.price).price best_ask = min(data.asks, key=lambda x: x.price).price mid_price = (best_bid + best_ask) / 2 spread = best_ask - best_bid # Convert price levels to JSON bids_json = json.dumps([asdict(level) for level in data.bids]) asks_json = json.dumps([asdict(level) for level in data.asks]) insert_data.append(( data.symbol, data.exchange, data.timestamp, bids_json, asks_json, data.sequence_id, mid_price, spread, bid_volume, ask_volume )) await conn.executemany(""" INSERT INTO order_book_snapshots (symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread, bid_volume, ask_volume) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) ON CONFLICT (timestamp, symbol, exchange) DO UPDATE SET bids = EXCLUDED.bids, asks = EXCLUDED.asks, sequence_id = EXCLUDED.sequence_id, mid_price = EXCLUDED.mid_price, spread = EXCLUDED.spread, bid_volume = EXCLUDED.bid_volume, ask_volume = EXCLUDED.ask_volume """, insert_data) return True except Exception as e: logger.error(f"Failed to batch store order books: {e}") raise DatabaseError(f"Batch order book storage failed: {e}") async def batch_store_trades(self, data_list: List[TradeEvent]) -> bool: """Store multiple trade events in a single transaction.""" if not data_list: return True try: async with self.pool.acquire() as conn: async with conn.transaction(): insert_data = [( data.symbol, data.exchange, data.timestamp, data.price, data.size, data.side, data.trade_id ) for data in data_list] await conn.executemany(""" INSERT INTO trade_events (symbol, exchange, timestamp, price, size, side, trade_id) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (timestamp, symbol, exchange, trade_id) DO NOTHING """, insert_data) return True except Exception as e: logger.error(f"Failed to batch store trades: {e}") raise DatabaseError(f"Batch trade storage failed: {e}") async def get_database_stats(self) -> Dict[str, Any]: """Get database statistics and health information.""" try: async with self.pool.acquire() as conn: stats = {} # Get table sizes tables = ['order_book_snapshots', 'trade_events', 'heatmap_data', 'ohlcv_data'] for table in tables: row = await conn.fetchrow(f""" SELECT COUNT(*) as row_count, pg_size_pretty(pg_total_relation_size('{table}')) as size FROM {table} """) stats[table] = dict(row) # Get connection pool stats stats['connection_pool'] = { 'size': self.pool.get_size(), 'max_size': self.pool.get_max_size(), 'min_size': self.pool.get_min_size() } return stats except Exception as e: logger.error(f"Failed to get database stats: {e}") return {}