540 lines
24 KiB
Python
540 lines
24 KiB
Python
"""
|
|
TimescaleDB connection manager and database operations.
|
|
Provides connection pooling, schema management, and optimized time-series operations.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Any
|
|
from dataclasses import asdict
|
|
import asyncpg
|
|
from asyncpg import Pool, Connection
|
|
import json
|
|
|
|
from ..models.core import OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets
|
|
from ..utils.exceptions import DatabaseError, ConnectionError
|
|
from ..config import Config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TimescaleManager:
|
|
"""Manages TimescaleDB connections and operations for time-series data."""
|
|
|
|
def __init__(self, config: Config):
|
|
self.config = config
|
|
self.pool: Optional[Pool] = None
|
|
self._connection_string = self._build_connection_string()
|
|
|
|
def _build_connection_string(self) -> str:
|
|
"""Build PostgreSQL connection string from config."""
|
|
return (
|
|
f"postgresql://{self.config.database.user}:{self.config.database.password}"
|
|
f"@{self.config.database.host}:{self.config.database.port}/{self.config.database.name}"
|
|
)
|
|
|
|
async def initialize(self) -> None:
|
|
"""Initialize connection pool and database schema."""
|
|
try:
|
|
logger.info("Initializing TimescaleDB connection pool...")
|
|
self.pool = await asyncpg.create_pool(
|
|
self._connection_string,
|
|
min_size=5,
|
|
max_size=self.config.database.pool_size,
|
|
command_timeout=60,
|
|
server_settings={
|
|
'jit': 'off', # Disable JIT for better performance with time-series
|
|
'timezone': 'UTC'
|
|
}
|
|
)
|
|
|
|
# Test connection
|
|
async with self.pool.acquire() as conn:
|
|
await conn.execute('SELECT 1')
|
|
|
|
logger.info("TimescaleDB connection pool initialized successfully")
|
|
|
|
# Initialize database schema
|
|
await self.setup_database_schema()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize TimescaleDB: {e}")
|
|
raise ConnectionError(f"TimescaleDB initialization failed: {e}")
|
|
|
|
async def close(self) -> None:
|
|
"""Close connection pool."""
|
|
if self.pool:
|
|
await self.pool.close()
|
|
logger.info("TimescaleDB connection pool closed")
|
|
|
|
async def setup_database_schema(self) -> None:
|
|
"""Create database schema with hypertables and indexes."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
# Enable TimescaleDB extension
|
|
await conn.execute("CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;")
|
|
|
|
# Create order book snapshots table
|
|
await conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS order_book_snapshots (
|
|
id BIGSERIAL,
|
|
symbol VARCHAR(20) NOT NULL,
|
|
exchange VARCHAR(20) NOT NULL,
|
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
bids JSONB NOT NULL,
|
|
asks JSONB NOT NULL,
|
|
sequence_id BIGINT,
|
|
mid_price DECIMAL(20,8),
|
|
spread DECIMAL(20,8),
|
|
bid_volume DECIMAL(30,8),
|
|
ask_volume DECIMAL(30,8),
|
|
PRIMARY KEY (timestamp, symbol, exchange)
|
|
);
|
|
""")
|
|
|
|
# Convert to hypertable if not already
|
|
try:
|
|
await conn.execute("""
|
|
SELECT create_hypertable('order_book_snapshots', 'timestamp',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
except Exception as e:
|
|
if "already a hypertable" not in str(e):
|
|
logger.warning(f"Could not create hypertable for order_book_snapshots: {e}")
|
|
|
|
# Create trade events table
|
|
await conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS trade_events (
|
|
id BIGSERIAL,
|
|
symbol VARCHAR(20) NOT NULL,
|
|
exchange VARCHAR(20) NOT NULL,
|
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
price DECIMAL(20,8) NOT NULL,
|
|
size DECIMAL(30,8) NOT NULL,
|
|
side VARCHAR(4) NOT NULL,
|
|
trade_id VARCHAR(100) NOT NULL,
|
|
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
|
|
);
|
|
""")
|
|
|
|
# Convert to hypertable if not already
|
|
try:
|
|
await conn.execute("""
|
|
SELECT create_hypertable('trade_events', 'timestamp',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
except Exception as e:
|
|
if "already a hypertable" not in str(e):
|
|
logger.warning(f"Could not create hypertable for trade_events: {e}")
|
|
|
|
# Create heatmap data table
|
|
await conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS heatmap_data (
|
|
symbol VARCHAR(20) NOT NULL,
|
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
bucket_size DECIMAL(10,2) NOT NULL,
|
|
price_bucket DECIMAL(20,8) NOT NULL,
|
|
volume DECIMAL(30,8) NOT NULL,
|
|
side VARCHAR(3) NOT NULL,
|
|
exchange_count INTEGER NOT NULL,
|
|
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
|
|
);
|
|
""")
|
|
|
|
# Convert to hypertable if not already
|
|
try:
|
|
await conn.execute("""
|
|
SELECT create_hypertable('heatmap_data', 'timestamp',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
except Exception as e:
|
|
if "already a hypertable" not in str(e):
|
|
logger.warning(f"Could not create hypertable for heatmap_data: {e}")
|
|
|
|
# Create OHLCV data table
|
|
await conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS ohlcv_data (
|
|
symbol VARCHAR(20) NOT NULL,
|
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
timeframe VARCHAR(10) NOT NULL,
|
|
open_price DECIMAL(20,8) NOT NULL,
|
|
high_price DECIMAL(20,8) NOT NULL,
|
|
low_price DECIMAL(20,8) NOT NULL,
|
|
close_price DECIMAL(20,8) NOT NULL,
|
|
volume DECIMAL(30,8) NOT NULL,
|
|
trade_count INTEGER,
|
|
PRIMARY KEY (timestamp, symbol, timeframe)
|
|
);
|
|
""")
|
|
|
|
# Convert to hypertable if not already
|
|
try:
|
|
await conn.execute("""
|
|
SELECT create_hypertable('ohlcv_data', 'timestamp',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
except Exception as e:
|
|
if "already a hypertable" not in str(e):
|
|
logger.warning(f"Could not create hypertable for ohlcv_data: {e}")
|
|
|
|
# Create indexes for better query performance
|
|
await self._create_indexes(conn)
|
|
|
|
# Set up data retention policies
|
|
await self._setup_retention_policies(conn)
|
|
|
|
logger.info("Database schema setup completed successfully")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to setup database schema: {e}")
|
|
raise DatabaseError(f"Schema setup failed: {e}")
|
|
|
|
async def _create_indexes(self, conn: Connection) -> None:
|
|
"""Create indexes for optimized queries."""
|
|
indexes = [
|
|
# Order book snapshots indexes
|
|
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_time ON order_book_snapshots (symbol, timestamp DESC);",
|
|
"CREATE INDEX IF NOT EXISTS idx_obs_exchange_time ON order_book_snapshots (exchange, timestamp DESC);",
|
|
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_exchange ON order_book_snapshots (symbol, exchange, timestamp DESC);",
|
|
|
|
# Trade events indexes
|
|
"CREATE INDEX IF NOT EXISTS idx_trades_symbol_time ON trade_events (symbol, timestamp DESC);",
|
|
"CREATE INDEX IF NOT EXISTS idx_trades_exchange_time ON trade_events (exchange, timestamp DESC);",
|
|
"CREATE INDEX IF NOT EXISTS idx_trades_price ON trade_events (symbol, price, timestamp DESC);",
|
|
|
|
# Heatmap data indexes
|
|
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_time ON heatmap_data (symbol, timestamp DESC);",
|
|
"CREATE INDEX IF NOT EXISTS idx_heatmap_bucket ON heatmap_data (symbol, bucket_size, timestamp DESC);",
|
|
|
|
# OHLCV indexes
|
|
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON ohlcv_data (symbol, timeframe, timestamp DESC);"
|
|
]
|
|
|
|
for index_sql in indexes:
|
|
try:
|
|
await conn.execute(index_sql)
|
|
except Exception as e:
|
|
logger.warning(f"Could not create index: {e}")
|
|
|
|
async def _setup_retention_policies(self, conn: Connection) -> None:
|
|
"""Set up data retention policies for automatic cleanup."""
|
|
try:
|
|
# Keep raw order book data for 30 days
|
|
await conn.execute("""
|
|
SELECT add_retention_policy('order_book_snapshots', INTERVAL '30 days',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
|
|
# Keep trade events for 90 days
|
|
await conn.execute("""
|
|
SELECT add_retention_policy('trade_events', INTERVAL '90 days',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
|
|
# Keep heatmap data for 60 days
|
|
await conn.execute("""
|
|
SELECT add_retention_policy('heatmap_data', INTERVAL '60 days',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
|
|
# Keep OHLCV data for 1 year
|
|
await conn.execute("""
|
|
SELECT add_retention_policy('ohlcv_data', INTERVAL '1 year',
|
|
if_not_exists => TRUE);
|
|
""")
|
|
|
|
logger.info("Data retention policies configured")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Could not set up retention policies: {e}")
|
|
|
|
async def store_orderbook(self, data: OrderBookSnapshot) -> bool:
|
|
"""Store order book snapshot in database."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
# Calculate derived metrics
|
|
mid_price = None
|
|
spread = None
|
|
bid_volume = sum(level.size for level in data.bids)
|
|
ask_volume = sum(level.size for level in data.asks)
|
|
|
|
if data.bids and data.asks:
|
|
best_bid = max(data.bids, key=lambda x: x.price).price
|
|
best_ask = min(data.asks, key=lambda x: x.price).price
|
|
mid_price = (best_bid + best_ask) / 2
|
|
spread = best_ask - best_bid
|
|
|
|
# Convert price levels to JSON
|
|
bids_json = json.dumps([asdict(level) for level in data.bids])
|
|
asks_json = json.dumps([asdict(level) for level in data.asks])
|
|
|
|
await conn.execute("""
|
|
INSERT INTO order_book_snapshots
|
|
(symbol, exchange, timestamp, bids, asks, sequence_id,
|
|
mid_price, spread, bid_volume, ask_volume)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
|
ON CONFLICT (timestamp, symbol, exchange) DO UPDATE SET
|
|
bids = EXCLUDED.bids,
|
|
asks = EXCLUDED.asks,
|
|
sequence_id = EXCLUDED.sequence_id,
|
|
mid_price = EXCLUDED.mid_price,
|
|
spread = EXCLUDED.spread,
|
|
bid_volume = EXCLUDED.bid_volume,
|
|
ask_volume = EXCLUDED.ask_volume
|
|
""", data.symbol, data.exchange, data.timestamp, bids_json, asks_json,
|
|
data.sequence_id, mid_price, spread, bid_volume, ask_volume)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to store order book data: {e}")
|
|
raise DatabaseError(f"Order book storage failed: {e}")
|
|
|
|
async def store_trade(self, data: TradeEvent) -> bool:
|
|
"""Store trade event in database."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
await conn.execute("""
|
|
INSERT INTO trade_events
|
|
(symbol, exchange, timestamp, price, size, side, trade_id)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
ON CONFLICT (timestamp, symbol, exchange, trade_id) DO NOTHING
|
|
""", data.symbol, data.exchange, data.timestamp, data.price,
|
|
data.size, data.side, data.trade_id)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to store trade data: {e}")
|
|
raise DatabaseError(f"Trade storage failed: {e}")
|
|
|
|
async def store_heatmap_data(self, data: HeatmapData) -> bool:
|
|
"""Store heatmap data in database."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
# Prepare batch insert data
|
|
insert_data = []
|
|
for point in data.data:
|
|
insert_data.append((
|
|
data.symbol, data.timestamp, data.bucket_size,
|
|
point.price, point.volume, point.side, 1 # exchange_count
|
|
))
|
|
|
|
if insert_data:
|
|
await conn.executemany("""
|
|
INSERT INTO heatmap_data
|
|
(symbol, timestamp, bucket_size, price_bucket, volume, side, exchange_count)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
ON CONFLICT (timestamp, symbol, bucket_size, price_bucket, side)
|
|
DO UPDATE SET
|
|
volume = heatmap_data.volume + EXCLUDED.volume,
|
|
exchange_count = heatmap_data.exchange_count + EXCLUDED.exchange_count
|
|
""", insert_data)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to store heatmap data: {e}")
|
|
raise DatabaseError(f"Heatmap storage failed: {e}")
|
|
|
|
async def get_latest_orderbook(self, symbol: str, exchange: Optional[str] = None) -> Optional[Dict]:
|
|
"""Get latest order book snapshot for a symbol."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
if exchange:
|
|
query = """
|
|
SELECT * FROM order_book_snapshots
|
|
WHERE symbol = $1 AND exchange = $2
|
|
ORDER BY timestamp DESC LIMIT 1
|
|
"""
|
|
row = await conn.fetchrow(query, symbol, exchange)
|
|
else:
|
|
query = """
|
|
SELECT * FROM order_book_snapshots
|
|
WHERE symbol = $1
|
|
ORDER BY timestamp DESC LIMIT 1
|
|
"""
|
|
row = await conn.fetchrow(query, symbol)
|
|
|
|
if row:
|
|
return dict(row)
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get latest order book: {e}")
|
|
raise DatabaseError(f"Order book retrieval failed: {e}")
|
|
|
|
async def get_historical_data(self, symbol: str, start: datetime, end: datetime,
|
|
data_type: str = 'orderbook') -> List[Dict]:
|
|
"""Get historical data for a symbol within time range."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
if data_type == 'orderbook':
|
|
query = """
|
|
SELECT * FROM order_book_snapshots
|
|
WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3
|
|
ORDER BY timestamp ASC
|
|
"""
|
|
elif data_type == 'trades':
|
|
query = """
|
|
SELECT * FROM trade_events
|
|
WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3
|
|
ORDER BY timestamp ASC
|
|
"""
|
|
elif data_type == 'heatmap':
|
|
query = """
|
|
SELECT * FROM heatmap_data
|
|
WHERE symbol = $1 AND timestamp >= $2 AND timestamp <= $3
|
|
ORDER BY timestamp ASC
|
|
"""
|
|
else:
|
|
raise ValueError(f"Unknown data type: {data_type}")
|
|
|
|
rows = await conn.fetch(query, symbol, start, end)
|
|
return [dict(row) for row in rows]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get historical data: {e}")
|
|
raise DatabaseError(f"Historical data retrieval failed: {e}")
|
|
|
|
async def get_heatmap_data(self, symbol: str, bucket_size: float,
|
|
start: Optional[datetime] = None) -> List[Dict]:
|
|
"""Get heatmap data for visualization."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
if start:
|
|
query = """
|
|
SELECT price_bucket, volume, side, timestamp
|
|
FROM heatmap_data
|
|
WHERE symbol = $1 AND bucket_size = $2 AND timestamp >= $3
|
|
ORDER BY timestamp DESC, price_bucket ASC
|
|
"""
|
|
rows = await conn.fetch(query, symbol, bucket_size, start)
|
|
else:
|
|
# Get latest heatmap data
|
|
query = """
|
|
SELECT price_bucket, volume, side, timestamp
|
|
FROM heatmap_data
|
|
WHERE symbol = $1 AND bucket_size = $2
|
|
AND timestamp = (
|
|
SELECT MAX(timestamp) FROM heatmap_data
|
|
WHERE symbol = $1 AND bucket_size = $2
|
|
)
|
|
ORDER BY price_bucket ASC
|
|
"""
|
|
rows = await conn.fetch(query, symbol, bucket_size)
|
|
|
|
return [dict(row) for row in rows]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get heatmap data: {e}")
|
|
raise DatabaseError(f"Heatmap data retrieval failed: {e}")
|
|
|
|
async def batch_store_orderbooks(self, data_list: List[OrderBookSnapshot]) -> bool:
|
|
"""Store multiple order book snapshots in a single transaction."""
|
|
if not data_list:
|
|
return True
|
|
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
async with conn.transaction():
|
|
insert_data = []
|
|
for data in data_list:
|
|
# Calculate derived metrics
|
|
mid_price = None
|
|
spread = None
|
|
bid_volume = sum(level.size for level in data.bids)
|
|
ask_volume = sum(level.size for level in data.asks)
|
|
|
|
if data.bids and data.asks:
|
|
best_bid = max(data.bids, key=lambda x: x.price).price
|
|
best_ask = min(data.asks, key=lambda x: x.price).price
|
|
mid_price = (best_bid + best_ask) / 2
|
|
spread = best_ask - best_bid
|
|
|
|
# Convert price levels to JSON
|
|
bids_json = json.dumps([asdict(level) for level in data.bids])
|
|
asks_json = json.dumps([asdict(level) for level in data.asks])
|
|
|
|
insert_data.append((
|
|
data.symbol, data.exchange, data.timestamp, bids_json, asks_json,
|
|
data.sequence_id, mid_price, spread, bid_volume, ask_volume
|
|
))
|
|
|
|
await conn.executemany("""
|
|
INSERT INTO order_book_snapshots
|
|
(symbol, exchange, timestamp, bids, asks, sequence_id,
|
|
mid_price, spread, bid_volume, ask_volume)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
|
ON CONFLICT (timestamp, symbol, exchange) DO UPDATE SET
|
|
bids = EXCLUDED.bids,
|
|
asks = EXCLUDED.asks,
|
|
sequence_id = EXCLUDED.sequence_id,
|
|
mid_price = EXCLUDED.mid_price,
|
|
spread = EXCLUDED.spread,
|
|
bid_volume = EXCLUDED.bid_volume,
|
|
ask_volume = EXCLUDED.ask_volume
|
|
""", insert_data)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to batch store order books: {e}")
|
|
raise DatabaseError(f"Batch order book storage failed: {e}")
|
|
|
|
async def batch_store_trades(self, data_list: List[TradeEvent]) -> bool:
|
|
"""Store multiple trade events in a single transaction."""
|
|
if not data_list:
|
|
return True
|
|
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
async with conn.transaction():
|
|
insert_data = [(
|
|
data.symbol, data.exchange, data.timestamp, data.price,
|
|
data.size, data.side, data.trade_id
|
|
) for data in data_list]
|
|
|
|
await conn.executemany("""
|
|
INSERT INTO trade_events
|
|
(symbol, exchange, timestamp, price, size, side, trade_id)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
ON CONFLICT (timestamp, symbol, exchange, trade_id) DO NOTHING
|
|
""", insert_data)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to batch store trades: {e}")
|
|
raise DatabaseError(f"Batch trade storage failed: {e}")
|
|
|
|
async def get_database_stats(self) -> Dict[str, Any]:
|
|
"""Get database statistics and health information."""
|
|
try:
|
|
async with self.pool.acquire() as conn:
|
|
stats = {}
|
|
|
|
# Get table sizes
|
|
tables = ['order_book_snapshots', 'trade_events', 'heatmap_data', 'ohlcv_data']
|
|
for table in tables:
|
|
row = await conn.fetchrow(f"""
|
|
SELECT
|
|
COUNT(*) as row_count,
|
|
pg_size_pretty(pg_total_relation_size('{table}')) as size
|
|
FROM {table}
|
|
""")
|
|
stats[table] = dict(row)
|
|
|
|
# Get connection pool stats
|
|
stats['connection_pool'] = {
|
|
'size': self.pool.get_size(),
|
|
'max_size': self.pool.get_max_size(),
|
|
'min_size': self.pool.get_min_size()
|
|
}
|
|
|
|
return stats
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get database stats: {e}")
|
|
return {} |