storage manager

This commit is contained in:
Dobromir Popov
2025-08-04 21:50:11 +03:00
parent 42cf02cf3a
commit db61f3c3bf
8 changed files with 1306 additions and 836 deletions

View File

@ -1,29 +1,103 @@
"""
Database schema management for TimescaleDB.
Database schema management and migration system.
Handles schema versioning, migrations, and database structure updates.
"""
from typing import List
from ..utils.logging import get_logger
import logging
from typing import Dict, List, Optional
from datetime import datetime
import asyncpg
logger = get_logger(__name__)
logger = logging.getLogger(__name__)
class DatabaseSchema:
"""Manages database schema creation and migrations"""
class SchemaManager:
"""Manages database schema versions and migrations."""
@staticmethod
def get_schema_creation_queries() -> List[str]:
"""Get list of queries to create the database schema"""
return [
# Create TimescaleDB extension
"CREATE EXTENSION IF NOT EXISTS timescaledb;",
# Create schema
"CREATE SCHEMA IF NOT EXISTS market_data;",
def __init__(self, connection_pool):
self.pool = connection_pool
self.current_version = "1.0.0"
async def initialize_schema_tracking(self) -> None:
"""Initialize schema version tracking table."""
try:
async with self.pool.acquire() as conn:
await conn.execute("""
CREATE TABLE IF NOT EXISTS schema_migrations (
version VARCHAR(20) PRIMARY KEY,
applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
description TEXT,
checksum VARCHAR(64)
);
""")
# Record initial schema version
await conn.execute("""
INSERT INTO schema_migrations (version, description)
VALUES ($1, $2)
ON CONFLICT (version) DO NOTHING
""", self.current_version, "Initial schema setup")
logger.info("Schema tracking initialized")
except Exception as e:
logger.error(f"Failed to initialize schema tracking: {e}")
raise
async def get_current_schema_version(self) -> Optional[str]:
"""Get the current schema version from database."""
try:
async with self.pool.acquire() as conn:
version = await conn.fetchval("""
SELECT version FROM schema_migrations
ORDER BY applied_at DESC LIMIT 1
""")
return version
except Exception as e:
logger.error(f"Failed to get schema version: {e}")
return None
async def apply_migration(self, version: str, description: str, sql_commands: List[str]) -> bool:
"""Apply a database migration."""
try:
async with self.pool.acquire() as conn:
async with conn.transaction():
# Check if migration already applied
existing = await conn.fetchval("""
SELECT version FROM schema_migrations WHERE version = $1
""", version)
if existing:
logger.info(f"Migration {version} already applied")
return True
# Apply migration commands
for sql_command in sql_commands:
await conn.execute(sql_command)
# Record migration
await conn.execute("""
INSERT INTO schema_migrations (version, description)
VALUES ($1, $2)
""", version, description)
logger.info(f"Applied migration {version}: {description}")
return True
except Exception as e:
logger.error(f"Failed to apply migration {version}: {e}")
return False
async def create_base_schema(self) -> bool:
"""Create the base database schema with all tables and indexes."""
migration_commands = [
# Enable TimescaleDB extension
"CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;",
# Order book snapshots table
"""
CREATE TABLE IF NOT EXISTS market_data.order_book_snapshots (
CREATE TABLE IF NOT EXISTS order_book_snapshots (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
@ -35,14 +109,13 @@ class DatabaseSchema:
spread DECIMAL(20,8),
bid_volume DECIMAL(30,8),
ask_volume DECIMAL(30,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange)
);
""",
# Trade events table
"""
CREATE TABLE IF NOT EXISTS market_data.trade_events (
CREATE TABLE IF NOT EXISTS trade_events (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
@ -51,14 +124,13 @@ class DatabaseSchema:
size DECIMAL(30,8) NOT NULL,
side VARCHAR(4) NOT NULL,
trade_id VARCHAR(100) NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
);
""",
# Aggregated heatmap data table
# Heatmap data table
"""
CREATE TABLE IF NOT EXISTS market_data.heatmap_data (
CREATE TABLE IF NOT EXISTS heatmap_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bucket_size DECIMAL(10,2) NOT NULL,
@ -66,15 +138,13 @@ class DatabaseSchema:
volume DECIMAL(30,8) NOT NULL,
side VARCHAR(3) NOT NULL,
exchange_count INTEGER NOT NULL,
exchanges JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
);
""",
# OHLCV data table
"""
CREATE TABLE IF NOT EXISTS market_data.ohlcv_data (
CREATE TABLE IF NOT EXISTS ohlcv_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
timeframe VARCHAR(10) NOT NULL,
@ -84,173 +154,185 @@ class DatabaseSchema:
close_price DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
trade_count INTEGER,
vwap DECIMAL(20,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, timeframe)
);
""",
# Exchange status tracking table
"""
CREATE TABLE IF NOT EXISTS market_data.exchange_status (
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
status VARCHAR(20) NOT NULL,
last_message_time TIMESTAMPTZ,
error_message TEXT,
connection_count INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, exchange)
);
""",
# System metrics table
"""
CREATE TABLE IF NOT EXISTS market_data.system_metrics (
metric_name VARCHAR(50) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
value DECIMAL(20,8) NOT NULL,
labels JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, metric_name)
);
"""
]
return await self.apply_migration(
"1.0.0",
"Create base schema with core tables",
migration_commands
)
@staticmethod
def get_hypertable_creation_queries() -> List[str]:
"""Get queries to create hypertables"""
return [
"SELECT create_hypertable('market_data.order_book_snapshots', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.trade_events', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.heatmap_data', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.ohlcv_data', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.exchange_status', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.system_metrics', 'timestamp', if_not_exists => TRUE);"
async def create_hypertables(self) -> bool:
"""Convert tables to TimescaleDB hypertables."""
hypertable_commands = [
"SELECT create_hypertable('order_book_snapshots', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('trade_events', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('heatmap_data', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('ohlcv_data', 'timestamp', if_not_exists => TRUE);"
]
return await self.apply_migration(
"1.0.1",
"Convert tables to hypertables",
hypertable_commands
)
@staticmethod
def get_index_creation_queries() -> List[str]:
"""Get queries to create indexes"""
return [
# Order book indexes
"CREATE INDEX IF NOT EXISTS idx_order_book_symbol_exchange ON market_data.order_book_snapshots (symbol, exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_order_book_timestamp ON market_data.order_book_snapshots (timestamp DESC);",
async def create_indexes(self) -> bool:
"""Create performance indexes."""
index_commands = [
# Order book snapshots indexes
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_time ON order_book_snapshots (symbol, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_obs_exchange_time ON order_book_snapshots (exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_exchange ON order_book_snapshots (symbol, exchange, timestamp DESC);",
# Trade events indexes
"CREATE INDEX IF NOT EXISTS idx_trade_events_symbol_exchange ON market_data.trade_events (symbol, exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trade_events_timestamp ON market_data.trade_events (timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trade_events_price ON market_data.trade_events (symbol, price, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trades_symbol_time ON trade_events (symbol, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trades_exchange_time ON trade_events (exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trades_price ON trade_events (symbol, price, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trades_side ON trade_events (symbol, side, timestamp DESC);",
# Heatmap data indexes
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_bucket ON market_data.heatmap_data (symbol, bucket_size, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_heatmap_timestamp ON market_data.heatmap_data (timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_time ON heatmap_data (symbol, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_heatmap_bucket ON heatmap_data (symbol, bucket_size, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_heatmap_side ON heatmap_data (symbol, side, timestamp DESC);",
# OHLCV data indexes
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON market_data.ohlcv_data (symbol, timeframe, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_ohlcv_timestamp ON market_data.ohlcv_data (timestamp DESC);",
# Exchange status indexes
"CREATE INDEX IF NOT EXISTS idx_exchange_status_exchange ON market_data.exchange_status (exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_exchange_status_timestamp ON market_data.exchange_status (timestamp DESC);",
# System metrics indexes
"CREATE INDEX IF NOT EXISTS idx_system_metrics_name ON market_data.system_metrics (metric_name, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON market_data.system_metrics (timestamp DESC);"
# OHLCV indexes
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON ohlcv_data (symbol, timeframe, timestamp DESC);"
]
return await self.apply_migration(
"1.0.2",
"Create performance indexes",
index_commands
)
@staticmethod
def get_retention_policy_queries() -> List[str]:
"""Get queries to create retention policies"""
return [
"SELECT add_retention_policy('market_data.order_book_snapshots', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.trade_events', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.heatmap_data', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.ohlcv_data', INTERVAL '365 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.exchange_status', INTERVAL '30 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.system_metrics', INTERVAL '30 days', if_not_exists => TRUE);"
async def setup_retention_policies(self) -> bool:
"""Set up data retention policies."""
retention_commands = [
"SELECT add_retention_policy('order_book_snapshots', INTERVAL '30 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('trade_events', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('heatmap_data', INTERVAL '60 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('ohlcv_data', INTERVAL '1 year', if_not_exists => TRUE);"
]
return await self.apply_migration(
"1.0.3",
"Setup data retention policies",
retention_commands
)
@staticmethod
def get_continuous_aggregate_queries() -> List[str]:
"""Get queries to create continuous aggregates"""
return [
# Hourly OHLCV aggregate
async def create_continuous_aggregates(self) -> bool:
"""Create continuous aggregates for better query performance."""
aggregate_commands = [
# 1-minute OHLCV aggregates from trades
"""
CREATE MATERIALIZED VIEW IF NOT EXISTS market_data.hourly_ohlcv
CREATE MATERIALIZED VIEW IF NOT EXISTS trades_1m
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 minute', timestamp) AS bucket,
symbol,
exchange,
time_bucket('1 hour', timestamp) AS hour,
first(price, timestamp) AS open_price,
max(price) AS high_price,
min(price) AS low_price,
last(price, timestamp) AS close_price,
sum(size) AS volume,
count(*) AS trade_count,
avg(price) AS vwap
FROM market_data.trade_events
GROUP BY symbol, exchange, hour
WITH NO DATA;
count(*) AS trade_count
FROM trade_events
GROUP BY bucket, symbol, exchange;
""",
# Add refresh policy for continuous aggregate
# 5-minute order book statistics
"""
SELECT add_continuous_aggregate_policy('market_data.hourly_ohlcv',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour',
if_not_exists => TRUE);
"""
]
@staticmethod
def get_view_creation_queries() -> List[str]:
"""Get queries to create views"""
return [
# Latest order books view
"""
CREATE OR REPLACE VIEW market_data.latest_order_books AS
SELECT DISTINCT ON (symbol, exchange)
CREATE MATERIALIZED VIEW IF NOT EXISTS orderbook_stats_5m
WITH (timescaledb.continuous) AS
SELECT
time_bucket('5 minutes', timestamp) AS bucket,
symbol,
exchange,
timestamp,
bids,
asks,
mid_price,
spread,
bid_volume,
ask_volume
FROM market_data.order_book_snapshots
ORDER BY symbol, exchange, timestamp DESC;
""",
# Latest heatmaps view
"""
CREATE OR REPLACE VIEW market_data.latest_heatmaps AS
SELECT DISTINCT ON (symbol, bucket_size, price_bucket, side)
symbol,
bucket_size,
price_bucket,
side,
timestamp,
volume,
exchange_count,
exchanges
FROM market_data.heatmap_data
ORDER BY symbol, bucket_size, price_bucket, side, timestamp DESC;
avg(mid_price) AS avg_mid_price,
avg(spread) AS avg_spread,
avg(bid_volume) AS avg_bid_volume,
avg(ask_volume) AS avg_ask_volume,
count(*) AS snapshot_count
FROM order_book_snapshots
WHERE mid_price IS NOT NULL
GROUP BY bucket, symbol, exchange;
"""
]
return await self.apply_migration(
"1.0.4",
"Create continuous aggregates",
aggregate_commands
)
@staticmethod
def get_all_creation_queries() -> List[str]:
"""Get all schema creation queries in order"""
queries = []
queries.extend(DatabaseSchema.get_schema_creation_queries())
queries.extend(DatabaseSchema.get_hypertable_creation_queries())
queries.extend(DatabaseSchema.get_index_creation_queries())
queries.extend(DatabaseSchema.get_retention_policy_queries())
queries.extend(DatabaseSchema.get_continuous_aggregate_queries())
queries.extend(DatabaseSchema.get_view_creation_queries())
return queries
async def setup_complete_schema(self) -> bool:
"""Set up the complete database schema with all components."""
try:
# Initialize schema tracking
await self.initialize_schema_tracking()
# Apply all migrations in order
migrations = [
self.create_base_schema,
self.create_hypertables,
self.create_indexes,
self.setup_retention_policies,
self.create_continuous_aggregates
]
for migration in migrations:
success = await migration()
if not success:
logger.error(f"Failed to apply migration: {migration.__name__}")
return False
logger.info("Complete database schema setup successful")
return True
except Exception as e:
logger.error(f"Failed to setup complete schema: {e}")
return False
async def get_schema_info(self) -> Dict:
"""Get information about the current schema state."""
try:
async with self.pool.acquire() as conn:
# Get applied migrations
migrations = await conn.fetch("""
SELECT version, applied_at, description
FROM schema_migrations
ORDER BY applied_at
""")
# Get table information
tables = await conn.fetch("""
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables
WHERE schemaname = 'public'
AND tablename IN ('order_book_snapshots', 'trade_events', 'heatmap_data', 'ohlcv_data')
""")
# Get hypertable information
hypertables = await conn.fetch("""
SELECT hypertable_name, num_chunks, compression_enabled
FROM timescaledb_information.hypertables
WHERE hypertable_schema = 'public'
""")
return {
"migrations": [dict(m) for m in migrations],
"tables": [dict(t) for t in tables],
"hypertables": [dict(h) for h in hypertables]
}
except Exception as e:
logger.error(f"Failed to get schema info: {e}")
return {}