storage manager
This commit is contained in:
@ -1,29 +1,103 @@
|
||||
"""
|
||||
Database schema management for TimescaleDB.
|
||||
Database schema management and migration system.
|
||||
Handles schema versioning, migrations, and database structure updates.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from ..utils.logging import get_logger
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
import asyncpg
|
||||
|
||||
logger = get_logger(__name__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DatabaseSchema:
|
||||
"""Manages database schema creation and migrations"""
|
||||
class SchemaManager:
|
||||
"""Manages database schema versions and migrations."""
|
||||
|
||||
@staticmethod
|
||||
def get_schema_creation_queries() -> List[str]:
|
||||
"""Get list of queries to create the database schema"""
|
||||
return [
|
||||
# Create TimescaleDB extension
|
||||
"CREATE EXTENSION IF NOT EXISTS timescaledb;",
|
||||
|
||||
# Create schema
|
||||
"CREATE SCHEMA IF NOT EXISTS market_data;",
|
||||
def __init__(self, connection_pool):
|
||||
self.pool = connection_pool
|
||||
self.current_version = "1.0.0"
|
||||
|
||||
async def initialize_schema_tracking(self) -> None:
|
||||
"""Initialize schema version tracking table."""
|
||||
try:
|
||||
async with self.pool.acquire() as conn:
|
||||
await conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
version VARCHAR(20) PRIMARY KEY,
|
||||
applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
description TEXT,
|
||||
checksum VARCHAR(64)
|
||||
);
|
||||
""")
|
||||
|
||||
# Record initial schema version
|
||||
await conn.execute("""
|
||||
INSERT INTO schema_migrations (version, description)
|
||||
VALUES ($1, $2)
|
||||
ON CONFLICT (version) DO NOTHING
|
||||
""", self.current_version, "Initial schema setup")
|
||||
|
||||
logger.info("Schema tracking initialized")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize schema tracking: {e}")
|
||||
raise
|
||||
|
||||
async def get_current_schema_version(self) -> Optional[str]:
|
||||
"""Get the current schema version from database."""
|
||||
try:
|
||||
async with self.pool.acquire() as conn:
|
||||
version = await conn.fetchval("""
|
||||
SELECT version FROM schema_migrations
|
||||
ORDER BY applied_at DESC LIMIT 1
|
||||
""")
|
||||
return version
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get schema version: {e}")
|
||||
return None
|
||||
|
||||
async def apply_migration(self, version: str, description: str, sql_commands: List[str]) -> bool:
|
||||
"""Apply a database migration."""
|
||||
try:
|
||||
async with self.pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
# Check if migration already applied
|
||||
existing = await conn.fetchval("""
|
||||
SELECT version FROM schema_migrations WHERE version = $1
|
||||
""", version)
|
||||
|
||||
if existing:
|
||||
logger.info(f"Migration {version} already applied")
|
||||
return True
|
||||
|
||||
# Apply migration commands
|
||||
for sql_command in sql_commands:
|
||||
await conn.execute(sql_command)
|
||||
|
||||
# Record migration
|
||||
await conn.execute("""
|
||||
INSERT INTO schema_migrations (version, description)
|
||||
VALUES ($1, $2)
|
||||
""", version, description)
|
||||
|
||||
logger.info(f"Applied migration {version}: {description}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to apply migration {version}: {e}")
|
||||
return False
|
||||
|
||||
async def create_base_schema(self) -> bool:
|
||||
"""Create the base database schema with all tables and indexes."""
|
||||
migration_commands = [
|
||||
# Enable TimescaleDB extension
|
||||
"CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;",
|
||||
|
||||
# Order book snapshots table
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS market_data.order_book_snapshots (
|
||||
CREATE TABLE IF NOT EXISTS order_book_snapshots (
|
||||
id BIGSERIAL,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
exchange VARCHAR(20) NOT NULL,
|
||||
@ -35,14 +109,13 @@ class DatabaseSchema:
|
||||
spread DECIMAL(20,8),
|
||||
bid_volume DECIMAL(30,8),
|
||||
ask_volume DECIMAL(30,8),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
PRIMARY KEY (timestamp, symbol, exchange)
|
||||
);
|
||||
""",
|
||||
|
||||
# Trade events table
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS market_data.trade_events (
|
||||
CREATE TABLE IF NOT EXISTS trade_events (
|
||||
id BIGSERIAL,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
exchange VARCHAR(20) NOT NULL,
|
||||
@ -51,14 +124,13 @@ class DatabaseSchema:
|
||||
size DECIMAL(30,8) NOT NULL,
|
||||
side VARCHAR(4) NOT NULL,
|
||||
trade_id VARCHAR(100) NOT NULL,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
|
||||
);
|
||||
""",
|
||||
|
||||
# Aggregated heatmap data table
|
||||
# Heatmap data table
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS market_data.heatmap_data (
|
||||
CREATE TABLE IF NOT EXISTS heatmap_data (
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
bucket_size DECIMAL(10,2) NOT NULL,
|
||||
@ -66,15 +138,13 @@ class DatabaseSchema:
|
||||
volume DECIMAL(30,8) NOT NULL,
|
||||
side VARCHAR(3) NOT NULL,
|
||||
exchange_count INTEGER NOT NULL,
|
||||
exchanges JSONB,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
|
||||
);
|
||||
""",
|
||||
|
||||
# OHLCV data table
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS market_data.ohlcv_data (
|
||||
CREATE TABLE IF NOT EXISTS ohlcv_data (
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
timeframe VARCHAR(10) NOT NULL,
|
||||
@ -84,173 +154,185 @@ class DatabaseSchema:
|
||||
close_price DECIMAL(20,8) NOT NULL,
|
||||
volume DECIMAL(30,8) NOT NULL,
|
||||
trade_count INTEGER,
|
||||
vwap DECIMAL(20,8),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
PRIMARY KEY (timestamp, symbol, timeframe)
|
||||
);
|
||||
""",
|
||||
|
||||
# Exchange status tracking table
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS market_data.exchange_status (
|
||||
exchange VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
status VARCHAR(20) NOT NULL,
|
||||
last_message_time TIMESTAMPTZ,
|
||||
error_message TEXT,
|
||||
connection_count INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
PRIMARY KEY (timestamp, exchange)
|
||||
);
|
||||
""",
|
||||
|
||||
# System metrics table
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS market_data.system_metrics (
|
||||
metric_name VARCHAR(50) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
value DECIMAL(20,8) NOT NULL,
|
||||
labels JSONB,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
PRIMARY KEY (timestamp, metric_name)
|
||||
);
|
||||
"""
|
||||
]
|
||||
|
||||
return await self.apply_migration(
|
||||
"1.0.0",
|
||||
"Create base schema with core tables",
|
||||
migration_commands
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_hypertable_creation_queries() -> List[str]:
|
||||
"""Get queries to create hypertables"""
|
||||
return [
|
||||
"SELECT create_hypertable('market_data.order_book_snapshots', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('market_data.trade_events', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('market_data.heatmap_data', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('market_data.ohlcv_data', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('market_data.exchange_status', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('market_data.system_metrics', 'timestamp', if_not_exists => TRUE);"
|
||||
async def create_hypertables(self) -> bool:
|
||||
"""Convert tables to TimescaleDB hypertables."""
|
||||
hypertable_commands = [
|
||||
"SELECT create_hypertable('order_book_snapshots', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('trade_events', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('heatmap_data', 'timestamp', if_not_exists => TRUE);",
|
||||
"SELECT create_hypertable('ohlcv_data', 'timestamp', if_not_exists => TRUE);"
|
||||
]
|
||||
|
||||
return await self.apply_migration(
|
||||
"1.0.1",
|
||||
"Convert tables to hypertables",
|
||||
hypertable_commands
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_index_creation_queries() -> List[str]:
|
||||
"""Get queries to create indexes"""
|
||||
return [
|
||||
# Order book indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_order_book_symbol_exchange ON market_data.order_book_snapshots (symbol, exchange, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_order_book_timestamp ON market_data.order_book_snapshots (timestamp DESC);",
|
||||
async def create_indexes(self) -> bool:
|
||||
"""Create performance indexes."""
|
||||
index_commands = [
|
||||
# Order book snapshots indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_time ON order_book_snapshots (symbol, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_obs_exchange_time ON order_book_snapshots (exchange, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_obs_symbol_exchange ON order_book_snapshots (symbol, exchange, timestamp DESC);",
|
||||
|
||||
# Trade events indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_trade_events_symbol_exchange ON market_data.trade_events (symbol, exchange, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_trade_events_timestamp ON market_data.trade_events (timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_trade_events_price ON market_data.trade_events (symbol, price, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_trades_symbol_time ON trade_events (symbol, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_trades_exchange_time ON trade_events (exchange, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_trades_price ON trade_events (symbol, price, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_trades_side ON trade_events (symbol, side, timestamp DESC);",
|
||||
|
||||
# Heatmap data indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_bucket ON market_data.heatmap_data (symbol, bucket_size, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_heatmap_timestamp ON market_data.heatmap_data (timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_time ON heatmap_data (symbol, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_heatmap_bucket ON heatmap_data (symbol, bucket_size, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_heatmap_side ON heatmap_data (symbol, side, timestamp DESC);",
|
||||
|
||||
# OHLCV data indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON market_data.ohlcv_data (symbol, timeframe, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_ohlcv_timestamp ON market_data.ohlcv_data (timestamp DESC);",
|
||||
|
||||
# Exchange status indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_exchange_status_exchange ON market_data.exchange_status (exchange, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_exchange_status_timestamp ON market_data.exchange_status (timestamp DESC);",
|
||||
|
||||
# System metrics indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_system_metrics_name ON market_data.system_metrics (metric_name, timestamp DESC);",
|
||||
"CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON market_data.system_metrics (timestamp DESC);"
|
||||
# OHLCV indexes
|
||||
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON ohlcv_data (symbol, timeframe, timestamp DESC);"
|
||||
]
|
||||
|
||||
return await self.apply_migration(
|
||||
"1.0.2",
|
||||
"Create performance indexes",
|
||||
index_commands
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_retention_policy_queries() -> List[str]:
|
||||
"""Get queries to create retention policies"""
|
||||
return [
|
||||
"SELECT add_retention_policy('market_data.order_book_snapshots', INTERVAL '90 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('market_data.trade_events', INTERVAL '90 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('market_data.heatmap_data', INTERVAL '90 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('market_data.ohlcv_data', INTERVAL '365 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('market_data.exchange_status', INTERVAL '30 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('market_data.system_metrics', INTERVAL '30 days', if_not_exists => TRUE);"
|
||||
async def setup_retention_policies(self) -> bool:
|
||||
"""Set up data retention policies."""
|
||||
retention_commands = [
|
||||
"SELECT add_retention_policy('order_book_snapshots', INTERVAL '30 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('trade_events', INTERVAL '90 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('heatmap_data', INTERVAL '60 days', if_not_exists => TRUE);",
|
||||
"SELECT add_retention_policy('ohlcv_data', INTERVAL '1 year', if_not_exists => TRUE);"
|
||||
]
|
||||
|
||||
return await self.apply_migration(
|
||||
"1.0.3",
|
||||
"Setup data retention policies",
|
||||
retention_commands
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_continuous_aggregate_queries() -> List[str]:
|
||||
"""Get queries to create continuous aggregates"""
|
||||
return [
|
||||
# Hourly OHLCV aggregate
|
||||
async def create_continuous_aggregates(self) -> bool:
|
||||
"""Create continuous aggregates for better query performance."""
|
||||
aggregate_commands = [
|
||||
# 1-minute OHLCV aggregates from trades
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS market_data.hourly_ohlcv
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS trades_1m
|
||||
WITH (timescaledb.continuous) AS
|
||||
SELECT
|
||||
time_bucket('1 minute', timestamp) AS bucket,
|
||||
symbol,
|
||||
exchange,
|
||||
time_bucket('1 hour', timestamp) AS hour,
|
||||
first(price, timestamp) AS open_price,
|
||||
max(price) AS high_price,
|
||||
min(price) AS low_price,
|
||||
last(price, timestamp) AS close_price,
|
||||
sum(size) AS volume,
|
||||
count(*) AS trade_count,
|
||||
avg(price) AS vwap
|
||||
FROM market_data.trade_events
|
||||
GROUP BY symbol, exchange, hour
|
||||
WITH NO DATA;
|
||||
count(*) AS trade_count
|
||||
FROM trade_events
|
||||
GROUP BY bucket, symbol, exchange;
|
||||
""",
|
||||
|
||||
# Add refresh policy for continuous aggregate
|
||||
# 5-minute order book statistics
|
||||
"""
|
||||
SELECT add_continuous_aggregate_policy('market_data.hourly_ohlcv',
|
||||
start_offset => INTERVAL '3 hours',
|
||||
end_offset => INTERVAL '1 hour',
|
||||
schedule_interval => INTERVAL '1 hour',
|
||||
if_not_exists => TRUE);
|
||||
"""
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def get_view_creation_queries() -> List[str]:
|
||||
"""Get queries to create views"""
|
||||
return [
|
||||
# Latest order books view
|
||||
"""
|
||||
CREATE OR REPLACE VIEW market_data.latest_order_books AS
|
||||
SELECT DISTINCT ON (symbol, exchange)
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS orderbook_stats_5m
|
||||
WITH (timescaledb.continuous) AS
|
||||
SELECT
|
||||
time_bucket('5 minutes', timestamp) AS bucket,
|
||||
symbol,
|
||||
exchange,
|
||||
timestamp,
|
||||
bids,
|
||||
asks,
|
||||
mid_price,
|
||||
spread,
|
||||
bid_volume,
|
||||
ask_volume
|
||||
FROM market_data.order_book_snapshots
|
||||
ORDER BY symbol, exchange, timestamp DESC;
|
||||
""",
|
||||
|
||||
# Latest heatmaps view
|
||||
"""
|
||||
CREATE OR REPLACE VIEW market_data.latest_heatmaps AS
|
||||
SELECT DISTINCT ON (symbol, bucket_size, price_bucket, side)
|
||||
symbol,
|
||||
bucket_size,
|
||||
price_bucket,
|
||||
side,
|
||||
timestamp,
|
||||
volume,
|
||||
exchange_count,
|
||||
exchanges
|
||||
FROM market_data.heatmap_data
|
||||
ORDER BY symbol, bucket_size, price_bucket, side, timestamp DESC;
|
||||
avg(mid_price) AS avg_mid_price,
|
||||
avg(spread) AS avg_spread,
|
||||
avg(bid_volume) AS avg_bid_volume,
|
||||
avg(ask_volume) AS avg_ask_volume,
|
||||
count(*) AS snapshot_count
|
||||
FROM order_book_snapshots
|
||||
WHERE mid_price IS NOT NULL
|
||||
GROUP BY bucket, symbol, exchange;
|
||||
"""
|
||||
]
|
||||
|
||||
return await self.apply_migration(
|
||||
"1.0.4",
|
||||
"Create continuous aggregates",
|
||||
aggregate_commands
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_all_creation_queries() -> List[str]:
|
||||
"""Get all schema creation queries in order"""
|
||||
queries = []
|
||||
queries.extend(DatabaseSchema.get_schema_creation_queries())
|
||||
queries.extend(DatabaseSchema.get_hypertable_creation_queries())
|
||||
queries.extend(DatabaseSchema.get_index_creation_queries())
|
||||
queries.extend(DatabaseSchema.get_retention_policy_queries())
|
||||
queries.extend(DatabaseSchema.get_continuous_aggregate_queries())
|
||||
queries.extend(DatabaseSchema.get_view_creation_queries())
|
||||
return queries
|
||||
async def setup_complete_schema(self) -> bool:
|
||||
"""Set up the complete database schema with all components."""
|
||||
try:
|
||||
# Initialize schema tracking
|
||||
await self.initialize_schema_tracking()
|
||||
|
||||
# Apply all migrations in order
|
||||
migrations = [
|
||||
self.create_base_schema,
|
||||
self.create_hypertables,
|
||||
self.create_indexes,
|
||||
self.setup_retention_policies,
|
||||
self.create_continuous_aggregates
|
||||
]
|
||||
|
||||
for migration in migrations:
|
||||
success = await migration()
|
||||
if not success:
|
||||
logger.error(f"Failed to apply migration: {migration.__name__}")
|
||||
return False
|
||||
|
||||
logger.info("Complete database schema setup successful")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to setup complete schema: {e}")
|
||||
return False
|
||||
|
||||
async def get_schema_info(self) -> Dict:
|
||||
"""Get information about the current schema state."""
|
||||
try:
|
||||
async with self.pool.acquire() as conn:
|
||||
# Get applied migrations
|
||||
migrations = await conn.fetch("""
|
||||
SELECT version, applied_at, description
|
||||
FROM schema_migrations
|
||||
ORDER BY applied_at
|
||||
""")
|
||||
|
||||
# Get table information
|
||||
tables = await conn.fetch("""
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
|
||||
FROM pg_tables
|
||||
WHERE schemaname = 'public'
|
||||
AND tablename IN ('order_book_snapshots', 'trade_events', 'heatmap_data', 'ohlcv_data')
|
||||
""")
|
||||
|
||||
# Get hypertable information
|
||||
hypertables = await conn.fetch("""
|
||||
SELECT hypertable_name, num_chunks, compression_enabled
|
||||
FROM timescaledb_information.hypertables
|
||||
WHERE hypertable_schema = 'public'
|
||||
""")
|
||||
|
||||
return {
|
||||
"migrations": [dict(m) for m in migrations],
|
||||
"tables": [dict(t) for t in tables],
|
||||
"hypertables": [dict(h) for h in hypertables]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get schema info: {e}")
|
||||
return {}
|
Reference in New Issue
Block a user