Files
gogo2/COBY/storage/timescale_manager.py
Dobromir Popov db61f3c3bf storage manager
2025-08-04 21:50:11 +03:00

540 lines
24 KiB
Python

"""
TimescaleDB connection manager and database operations.
Provides connection pooling, schema management, and optimized time-series operations.
"""
import asyncio
import logging
from datetime import datetime
from typing import Dict, List, Optional, Any
from dataclasses import asdict
import asyncpg
from asyncpg import Pool, Connection
import json
from ..models.core import OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets
from ..utils.exceptions import DatabaseError, ConnectionError
from ..config import Config
logger = logging.getLogger(__name__)
class TimescaleManager:
"""Manages TimescaleDB connections and operations for time-series data."""
def __init__(self, config: Config):
self.config = config
self.pool: Optional[Pool] = None
self._connection_string = self._build_connection_string()
def _build_connection_string(self) -> str:
"""Build PostgreSQL connection string from config."""
return (
f"postgresql://{self.config.database.user}:{self.config.database.password}"
f"@{self.config.database.host}:{self.config.database.port}/{self.config.database.name}"
)
async def initialize(self) -> None:
"""Initialize connection pool and database schema."""
try:
logger.info("Initializing TimescaleDB connection pool...")
self.pool = await asyncpg.create_pool(
self._connection_string,
min_size=5,
max_size=self.config.database.pool_size,
command_timeout=60,
server_settings={
'jit': 'off', # Disable JIT for better performance with time-series
'timezone': 'UTC'
}
)
# Test connection
async with self.pool.acquire() as conn:
await conn.execute('SELECT 1')
logger.info("TimescaleDB connection pool initialized successfully")
# Initialize database schema
await self.setup_database_schema()
except Exception as e:
logger.error(f"Failed to initialize TimescaleDB: {e}")
raise ConnectionError(f"TimescaleDB initialization failed: {e}")
async def close(self) -> None:
"""Close connection pool."""
if self.pool:
await self.pool.close()
logger.info("TimescaleDB connection pool closed")
async def setup_database_schema(self) -> None:
"""Create database schema with hypertables and indexes."""
try:
async with self.pool.acquire() as conn:
# Enable TimescaleDB extension
await conn.execute("CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;")
# Create order book snapshots table
await conn.execute("""
CREATE TABLE IF NOT EXISTS order_book_snapshots (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bids JSONB NOT NULL,
asks JSONB NOT NULL,
sequence_id BIGINT,
mid_price DECIMAL(20,8),
spread DECIMAL(20,8),
bid_volume DECIMAL(30,8),
ask_volume DECIMAL(30,8),
PRIMARY KEY (timestamp, symbol, exchange)
);
""")
# Convert to hypertable if not already
try:
await conn.execute("""
SELECT create_hypertable('order_book_snapshots', 'timestamp',
if_not_exists => TRUE);
""")
except Exception as e:
if "already a hypertable" not in str(e):
logger.warning(f"Could not create hypertable for order_book_snapshots: {e}")
# Create trade events table
await conn.execute("""
CREATE TABLE IF NOT EXISTS trade_events (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
price DECIMAL(20,8) NOT NULL,
size DECIMAL(30,8) NOT NULL,
side VARCHAR(4) NOT NULL,
trade_id VARCHAR(100) NOT NULL,
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
);
""")
# Convert to hypertable if not already
try:
await conn.execute("""
SELECT create_hypertable('trade_events', 'timestamp',
if_not_exists => TRUE);
""")
except Exception as e:
if "already a hypertable" not in str(e):
logger.warning(f"Could not create hypertable for trade_events: {e}")
# Create heatmap data table
await conn.execute("""
CREATE TABLE IF NOT EXISTS heatmap_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bucket_size DECIMAL(10,2) NOT NULL,
price_bucket DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
side VARCHAR(3) NOT NULL,
exchange_count INTEGER NOT NULL,
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
);
""")
# Convert to hypertable if not already
try:
await conn.execute("""
SELECT create_hypertable('heatmap_data', 'timestamp',
if_not_exists => TRUE);
""")
except Exception as e:
if "already a hypertable" not in str(e):
logger.warning(f"Could not create hypertable for heatmap_data: {e}")
# Create OHLCV data table
await conn.execute("""
CREATE TABLE IF NOT EXISTS ohlcv_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
timeframe VARCHAR(10) NOT NULL,
open_price DECIMAL(20,8) NOT NULL,
high_price DECIMAL(20,8) NOT NULL,
low_price DECIMAL(20,8) NOT NULL,
close_price DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
trade_count INTEGER,
PRIMARY KEY (timestamp, symbol, timeframe)
);
""")
# Convert to hypertable if not already
try:
await conn.execute("""
SELECT create_hypertable('ohlcv_data', 'timestamp',
if_not_exists => TRUE);
""")
except Exception as e:
if "already a hypertable" not in str(e):
logger.warning(f"Could not create hypertable for ohlcv_data: {e}")
# Create indexes for better query performance
await self._create_indexes(conn)
# Set up data retention policies
await self._setup_retention_policies(conn)
logger.info("Database schema setup completed successfully")
except Exception as e:
logger.error(f"Failed to setup database schema: {e}")
raise DatabaseError(f"Schema setup failed: {e}")
async def _create_indexes(self, conn: Connection) -> None:
"""Create indexes for optimized queries."""
indexes = [
# Order book snapshots indexes
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_time ON order_book_snapshots (symbol, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_obs_exchange_time ON order_book_snapshots (exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_exchange ON order_book_snapshots (symbol, exchange, timestamp DESC);",
# Trade events indexes
"CREATE INDEX IF NOT EXISTS idx_trades_symbol_time ON trade_events (symbol, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trades_exchange_time ON trade_events (exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trades_price ON trade_events (symbol, price, timestamp DESC);",
# Heatmap data indexes
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_time ON heatmap_data (symbol, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_heatmap_bucket ON heatmap_data (symbol, bucket_size, timestamp DESC);",
# OHLCV indexes
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON ohlcv_data (symbol, timeframe, timestamp DESC);"
]
for index_sql in indexes:
try:
await conn.execute(index_sql)
except Exception as e:
logger.warning(f"Could not create index: {e}")
async def _setup_retention_policies(self, conn: Connection) -> None:
"""Set up data retention policies for automatic cleanup."""
try:
# Keep raw order book data for 30 days
await conn.execute("""
SELECT add_retention_policy('order_book_snapshots', INTERVAL '30 days',
if_not_exists => TRUE);
""")
# Keep trade events for 90 days
await conn.execute("""
SELECT add_retention_policy('trade_events', INTERVAL '90 days',
if_not_exists => TRUE);
""")
# Keep heatmap data for 60 days
await conn.execute("""
SELECT add_retention_policy('heatmap_data', INTERVAL '60 days',
if_not_exists => TRUE);
""")
# Keep OHLCV data for 1 year
await conn.execute("""
SELECT add_retention_policy('ohlcv_data', INTERVAL '1 year',
if_not_exists => TRUE);
""")
logger.info("Data retention policies configured")
except Exception as e:
logger.warning(f"Could not set up retention policies: {e}")
async def store_orderbook(self, data: OrderBookSnapshot) -> bool:
"""Store order book snapshot in database."""
try:
async with self.pool.acquire() as conn:
# Calculate derived metrics
mid_price = None
spread = None
bid_volume = sum(level.size for level in data.bids)
ask_volume = sum(level.size for level in data.asks)
if data.bids and data.asks:
best_bid = max(data.bids, key=lambda x: x.price).price
best_ask = min(data.asks, key=lambda x: x.price).price
mid_price = (best_bid + best_ask) / 2
spread = best_ask - best_bid
# Convert price levels to JSON
bids_json = json.dumps([asdict(level) for level in data.bids])
asks_json = json.dumps([asdict(level) for level in data.asks])
await conn.execute("""
INSERT INTO order_book_snapshots
(symbol, exchange, timestamp, bids, asks, sequence_id,
mid_price, spread, bid_volume, ask_volume)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (timestamp, symbol, exchange) DO UPDATE SET
bids = EXCLUDED.bids,
asks = EXCLUDED.asks,
sequence_id = EXCLUDED.sequence_id,
mid_price = EXCLUDED.mid_price,
spread = EXCLUDED.spread,
bid_volume = EXCLUDED.bid_volume,
ask_volume = EXCLUDED.ask_volume
""", data.symbol, data.exchange, data.timestamp, bids_json, asks_json,
data.sequence_id, mid_price, spread, bid_volume, ask_volume)
return True
except Exception as e:
logger.error(f"Failed to store order book data: {e}")
raise DatabaseError(f"Order book storage failed: {e}")
async def store_trade(self, data: TradeEvent) -> bool:
"""Store trade event in database."""
try:
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO trade_events
(symbol, exchange, timestamp, price, size, side, trade_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (timestamp, symbol, exchange, trade_id) DO NOTHING
""", data.symbol, data.exchange, data.timestamp, data.price,
data.size, data.side, data.trade_id)
return True
except Exception as e:
logger.error(f"Failed to store trade data: {e}")
raise DatabaseError(f"Trade storage failed: {e}")
async def store_heatmap_data(self, data: HeatmapData) -> bool:
"""Store heatmap data in database."""
try:
async with self.pool.acquire() as conn:
# Prepare batch insert data
insert_data = []
for point in data.data:
insert_data.append((
data.symbol, data.timestamp, data.bucket_size,
point.price, point.volume, point.side, 1 # exchange_count
))
if insert_data:
await conn.executemany("""
INSERT INTO heatmap_data
(symbol, timestamp, bucket_size, price_bucket, volume, side, exchange_count)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (timestamp, symbol, bucket_size, price_bucket, side)
DO UPDATE SET
volume = heatmap_data.volume + EXCLUDED.volume,
exchange_count = heatmap_data.exchange_count + EXCLUDED.exchange_count
""", insert_data)
return True
except Exception as e:
logger.error(f"Failed to store heatmap data: {e}")
raise DatabaseError(f"Heatmap storage failed: {e}")
async def get_latest_orderbook(self, symbol: str, exchange: Optional[str] = None) -> Optional[Dict]:
"""Get latest order book snapshot for a symbol."""
try:
async with self.pool.acquire() as conn:
if exchange:
query = """
SELECT * FROM order_book_snapshots
WHERE symbol = $1 AND exchange = $2
ORDER BY timestamp DESC LIMIT 1
"""
row = await conn.fetchrow(query, symbol, exchange)
else:
query = """
SELECT * FROM order_book_snapshots
WHERE symbol = $1
ORDER BY timestamp DESC LIMIT 1
"""
row = await conn.fetchrow(query, symbol)
if row:
return dict(row)
return None
except Exception as e:
logger.error(f"Failed to get latest order book: {e}")
raise DatabaseError(f"Order book retrieval failed: {e}")
async def get_historical_data(self, symbol: str, start: datetime, end: datetime,
data_type: str = 'orderbook') -> List[Dict]:
"""Get historical data for a symbol within time range."""
try:
async with self.pool.acquire() as conn:
if data_type == 'orderbook':
query = """
SELECT * FROM order_book_snapshots
WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3
ORDER BY timestamp ASC
"""
elif data_type == 'trades':
query = """
SELECT * FROM trade_events
WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3
ORDER BY timestamp ASC
"""
elif data_type == 'heatmap':
query = """
SELECT * FROM heatmap_data
WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3
ORDER BY timestamp ASC
"""
else:
raise ValueError(f"Unknown data type: {data_type}")
rows = await conn.fetch(query, symbol, start, end)
return [dict(row) for row in rows]
except Exception as e:
logger.error(f"Failed to get historical data: {e}")
raise DatabaseError(f"Historical data retrieval failed: {e}")
async def get_heatmap_data(self, symbol: str, bucket_size: float,
start: Optional[datetime] = None) -> List[Dict]:
"""Get heatmap data for visualization."""
try:
async with self.pool.acquire() as conn:
if start:
query = """
SELECT price_bucket, volume, side, timestamp
FROM heatmap_data
WHERE symbol = $1 AND bucket_size = $2 AND timestamp >= $3
ORDER BY timestamp DESC, price_bucket ASC
"""
rows = await conn.fetch(query, symbol, bucket_size, start)
else:
# Get latest heatmap data
query = """
SELECT price_bucket, volume, side, timestamp
FROM heatmap_data
WHERE symbol = $1 AND bucket_size = $2
AND timestamp = (
SELECT MAX(timestamp) FROM heatmap_data
WHERE symbol = $1 AND bucket_size = $2
)
ORDER BY price_bucket ASC
"""
rows = await conn.fetch(query, symbol, bucket_size)
return [dict(row) for row in rows]
except Exception as e:
logger.error(f"Failed to get heatmap data: {e}")
raise DatabaseError(f"Heatmap data retrieval failed: {e}")
async def batch_store_orderbooks(self, data_list: List[OrderBookSnapshot]) -> bool:
"""Store multiple order book snapshots in a single transaction."""
if not data_list:
return True
try:
async with self.pool.acquire() as conn:
async with conn.transaction():
insert_data = []
for data in data_list:
# Calculate derived metrics
mid_price = None
spread = None
bid_volume = sum(level.size for level in data.bids)
ask_volume = sum(level.size for level in data.asks)
if data.bids and data.asks:
best_bid = max(data.bids, key=lambda x: x.price).price
best_ask = min(data.asks, key=lambda x: x.price).price
mid_price = (best_bid + best_ask) / 2
spread = best_ask - best_bid
# Convert price levels to JSON
bids_json = json.dumps([asdict(level) for level in data.bids])
asks_json = json.dumps([asdict(level) for level in data.asks])
insert_data.append((
data.symbol, data.exchange, data.timestamp, bids_json, asks_json,
data.sequence_id, mid_price, spread, bid_volume, ask_volume
))
await conn.executemany("""
INSERT INTO order_book_snapshots
(symbol, exchange, timestamp, bids, asks, sequence_id,
mid_price, spread, bid_volume, ask_volume)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT (timestamp, symbol, exchange) DO UPDATE SET
bids = EXCLUDED.bids,
asks = EXCLUDED.asks,
sequence_id = EXCLUDED.sequence_id,
mid_price = EXCLUDED.mid_price,
spread = EXCLUDED.spread,
bid_volume = EXCLUDED.bid_volume,
ask_volume = EXCLUDED.ask_volume
""", insert_data)
return True
except Exception as e:
logger.error(f"Failed to batch store order books: {e}")
raise DatabaseError(f"Batch order book storage failed: {e}")
async def batch_store_trades(self, data_list: List[TradeEvent]) -> bool:
"""Store multiple trade events in a single transaction."""
if not data_list:
return True
try:
async with self.pool.acquire() as conn:
async with conn.transaction():
insert_data = [(
data.symbol, data.exchange, data.timestamp, data.price,
data.size, data.side, data.trade_id
) for data in data_list]
await conn.executemany("""
INSERT INTO trade_events
(symbol, exchange, timestamp, price, size, side, trade_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (timestamp, symbol, exchange, trade_id) DO NOTHING
""", insert_data)
return True
except Exception as e:
logger.error(f"Failed to batch store trades: {e}")
raise DatabaseError(f"Batch trade storage failed: {e}")
async def get_database_stats(self) -> Dict[str, Any]:
"""Get database statistics and health information."""
try:
async with self.pool.acquire() as conn:
stats = {}
# Get table sizes
tables = ['order_book_snapshots', 'trade_events', 'heatmap_data', 'ohlcv_data']
for table in tables:
row = await conn.fetchrow(f"""
SELECT
COUNT(*) as row_count,
pg_size_pretty(pg_total_relation_size('{table}')) as size
FROM {table}
""")
stats[table] = dict(row)
# Get connection pool stats
stats['connection_pool'] = {
'size': self.pool.get_size(),
'max_size': self.pool.get_max_size(),
'min_size': self.pool.get_min_size()
}
return stats
except Exception as e:
logger.error(f"Failed to get database stats: {e}")
return {}