cob integration scaffold

This commit is contained in:
Dobromir Popov
2025-08-04 17:12:26 +03:00
parent de9fa4a421
commit 504736c0f7
14 changed files with 2665 additions and 0 deletions

11
COBY/storage/__init__.py Normal file
View File

@ -0,0 +1,11 @@
"""
Storage layer for the COBY system.
"""
from .timescale_manager import TimescaleManager
from .connection_pool import DatabaseConnectionPool
__all__ = [
'TimescaleManager',
'DatabaseConnectionPool'
]

View File

@ -0,0 +1,140 @@
"""
Database connection pool management for TimescaleDB.
"""
import asyncio
import asyncpg
from typing import Optional, Dict, Any
from contextlib import asynccontextmanager
from ..config import config
from ..utils.logging import get_logger
from ..utils.exceptions import StorageError
logger = get_logger(__name__)
class DatabaseConnectionPool:
"""Manages database connection pool for TimescaleDB"""
def __init__(self):
self._pool: Optional[asyncpg.Pool] = None
self._is_initialized = False
async def initialize(self) -> None:
"""Initialize the connection pool"""
if self._is_initialized:
return
try:
# Build connection string
dsn = (
f"postgresql://{config.database.user}:{config.database.password}"
f"@{config.database.host}:{config.database.port}/{config.database.name}"
)
# Create connection pool
self._pool = await asyncpg.create_pool(
dsn,
min_size=5,
max_size=config.database.pool_size,
max_queries=50000,
max_inactive_connection_lifetime=300,
command_timeout=config.database.pool_timeout,
server_settings={
'search_path': config.database.schema,
'timezone': 'UTC'
}
)
self._is_initialized = True
logger.info(f"Database connection pool initialized with {config.database.pool_size} connections")
# Test connection
await self.health_check()
except Exception as e:
logger.error(f"Failed to initialize database connection pool: {e}")
raise StorageError(f"Database connection failed: {e}", "DB_INIT_ERROR")
async def close(self) -> None:
"""Close the connection pool"""
if self._pool:
await self._pool.close()
self._pool = None
self._is_initialized = False
logger.info("Database connection pool closed")
@asynccontextmanager
async def get_connection(self):
"""Get a database connection from the pool"""
if not self._is_initialized:
await self.initialize()
if not self._pool:
raise StorageError("Connection pool not initialized", "POOL_NOT_READY")
async with self._pool.acquire() as connection:
try:
yield connection
except Exception as e:
logger.error(f"Database operation failed: {e}")
raise
@asynccontextmanager
async def get_transaction(self):
"""Get a database transaction"""
async with self.get_connection() as conn:
async with conn.transaction():
yield conn
async def execute_query(self, query: str, *args) -> Any:
"""Execute a query and return results"""
async with self.get_connection() as conn:
return await conn.fetch(query, *args)
async def execute_command(self, command: str, *args) -> str:
"""Execute a command and return status"""
async with self.get_connection() as conn:
return await conn.execute(command, *args)
async def execute_many(self, command: str, args_list) -> None:
"""Execute a command multiple times with different arguments"""
async with self.get_connection() as conn:
await conn.executemany(command, args_list)
async def health_check(self) -> bool:
"""Check database health"""
try:
async with self.get_connection() as conn:
result = await conn.fetchval("SELECT 1")
if result == 1:
logger.debug("Database health check passed")
return True
else:
logger.warning("Database health check returned unexpected result")
return False
except Exception as e:
logger.error(f"Database health check failed: {e}")
return False
async def get_pool_stats(self) -> Dict[str, Any]:
"""Get connection pool statistics"""
if not self._pool:
return {}
return {
'size': self._pool.get_size(),
'min_size': self._pool.get_min_size(),
'max_size': self._pool.get_max_size(),
'idle_size': self._pool.get_idle_size(),
'is_closing': self._pool.is_closing()
}
@property
def is_initialized(self) -> bool:
"""Check if pool is initialized"""
return self._is_initialized
# Global connection pool instance
db_pool = DatabaseConnectionPool()

271
COBY/storage/migrations.py Normal file
View File

@ -0,0 +1,271 @@
"""
Database migration system for schema updates.
"""
from typing import List, Dict, Any
from datetime import datetime
from ..utils.logging import get_logger
from ..utils.exceptions import StorageError
from .connection_pool import db_pool
logger = get_logger(__name__)
class Migration:
"""Base class for database migrations"""
def __init__(self, version: str, description: str):
self.version = version
self.description = description
async def up(self) -> None:
"""Apply the migration"""
raise NotImplementedError
async def down(self) -> None:
"""Rollback the migration"""
raise NotImplementedError
class MigrationManager:
"""Manages database schema migrations"""
def __init__(self):
self.migrations: List[Migration] = []
def register_migration(self, migration: Migration) -> None:
"""Register a migration"""
self.migrations.append(migration)
# Sort by version
self.migrations.sort(key=lambda m: m.version)
async def initialize_migration_table(self) -> None:
"""Create migration tracking table"""
query = """
CREATE TABLE IF NOT EXISTS market_data.schema_migrations (
version VARCHAR(50) PRIMARY KEY,
description TEXT NOT NULL,
applied_at TIMESTAMPTZ DEFAULT NOW()
);
"""
await db_pool.execute_command(query)
logger.debug("Migration table initialized")
async def get_applied_migrations(self) -> List[str]:
"""Get list of applied migration versions"""
try:
query = "SELECT version FROM market_data.schema_migrations ORDER BY version"
rows = await db_pool.execute_query(query)
return [row['version'] for row in rows]
except Exception:
# Table might not exist yet
return []
async def apply_migration(self, migration: Migration) -> bool:
"""Apply a single migration"""
try:
logger.info(f"Applying migration {migration.version}: {migration.description}")
async with db_pool.get_transaction() as conn:
# Apply the migration
await migration.up()
# Record the migration
await conn.execute(
"INSERT INTO market_data.schema_migrations (version, description) VALUES ($1, $2)",
migration.version,
migration.description
)
logger.info(f"Migration {migration.version} applied successfully")
return True
except Exception as e:
logger.error(f"Failed to apply migration {migration.version}: {e}")
return False
async def rollback_migration(self, migration: Migration) -> bool:
"""Rollback a single migration"""
try:
logger.info(f"Rolling back migration {migration.version}: {migration.description}")
async with db_pool.get_transaction() as conn:
# Rollback the migration
await migration.down()
# Remove the migration record
await conn.execute(
"DELETE FROM market_data.schema_migrations WHERE version = $1",
migration.version
)
logger.info(f"Migration {migration.version} rolled back successfully")
return True
except Exception as e:
logger.error(f"Failed to rollback migration {migration.version}: {e}")
return False
async def migrate_up(self, target_version: str = None) -> bool:
"""Apply all pending migrations up to target version"""
try:
await self.initialize_migration_table()
applied_migrations = await self.get_applied_migrations()
pending_migrations = [
m for m in self.migrations
if m.version not in applied_migrations
]
if target_version:
pending_migrations = [
m for m in pending_migrations
if m.version <= target_version
]
if not pending_migrations:
logger.info("No pending migrations to apply")
return True
logger.info(f"Applying {len(pending_migrations)} pending migrations")
for migration in pending_migrations:
if not await self.apply_migration(migration):
return False
logger.info("All migrations applied successfully")
return True
except Exception as e:
logger.error(f"Migration failed: {e}")
return False
async def migrate_down(self, target_version: str) -> bool:
"""Rollback migrations down to target version"""
try:
applied_migrations = await self.get_applied_migrations()
migrations_to_rollback = [
m for m in reversed(self.migrations)
if m.version in applied_migrations and m.version > target_version
]
if not migrations_to_rollback:
logger.info("No migrations to rollback")
return True
logger.info(f"Rolling back {len(migrations_to_rollback)} migrations")
for migration in migrations_to_rollback:
if not await self.rollback_migration(migration):
return False
logger.info("All migrations rolled back successfully")
return True
except Exception as e:
logger.error(f"Migration rollback failed: {e}")
return False
async def get_migration_status(self) -> Dict[str, Any]:
"""Get current migration status"""
try:
applied_migrations = await self.get_applied_migrations()
status = {
'total_migrations': len(self.migrations),
'applied_migrations': len(applied_migrations),
'pending_migrations': len(self.migrations) - len(applied_migrations),
'current_version': applied_migrations[-1] if applied_migrations else None,
'latest_version': self.migrations[-1].version if self.migrations else None,
'migrations': []
}
for migration in self.migrations:
status['migrations'].append({
'version': migration.version,
'description': migration.description,
'applied': migration.version in applied_migrations
})
return status
except Exception as e:
logger.error(f"Failed to get migration status: {e}")
return {}
# Example migrations
class InitialSchemaMigration(Migration):
"""Initial schema creation migration"""
def __init__(self):
super().__init__("001", "Create initial schema and tables")
async def up(self) -> None:
"""Create initial schema"""
from .schema import DatabaseSchema
queries = DatabaseSchema.get_all_creation_queries()
for query in queries:
await db_pool.execute_command(query)
async def down(self) -> None:
"""Drop initial schema"""
# Drop tables in reverse order
tables = [
'system_metrics',
'exchange_status',
'ohlcv_data',
'heatmap_data',
'trade_events',
'order_book_snapshots'
]
for table in tables:
await db_pool.execute_command(f"DROP TABLE IF EXISTS market_data.{table} CASCADE")
class AddIndexesMigration(Migration):
"""Add performance indexes migration"""
def __init__(self):
super().__init__("002", "Add performance indexes")
async def up(self) -> None:
"""Add indexes"""
from .schema import DatabaseSchema
queries = DatabaseSchema.get_index_creation_queries()
for query in queries:
await db_pool.execute_command(query)
async def down(self) -> None:
"""Drop indexes"""
indexes = [
'idx_order_book_symbol_exchange',
'idx_order_book_timestamp',
'idx_trade_events_symbol_exchange',
'idx_trade_events_timestamp',
'idx_trade_events_price',
'idx_heatmap_symbol_bucket',
'idx_heatmap_timestamp',
'idx_ohlcv_symbol_timeframe',
'idx_ohlcv_timestamp',
'idx_exchange_status_exchange',
'idx_exchange_status_timestamp',
'idx_system_metrics_name',
'idx_system_metrics_timestamp'
]
for index in indexes:
await db_pool.execute_command(f"DROP INDEX IF EXISTS market_data.{index}")
# Global migration manager
migration_manager = MigrationManager()
# Register default migrations
migration_manager.register_migration(InitialSchemaMigration())
migration_manager.register_migration(AddIndexesMigration())

256
COBY/storage/schema.py Normal file
View File

@ -0,0 +1,256 @@
"""
Database schema management for TimescaleDB.
"""
from typing import List
from ..utils.logging import get_logger
logger = get_logger(__name__)
class DatabaseSchema:
"""Manages database schema creation and migrations"""
@staticmethod
def get_schema_creation_queries() -> List[str]:
"""Get list of queries to create the database schema"""
return [
# Create TimescaleDB extension
"CREATE EXTENSION IF NOT EXISTS timescaledb;",
# Create schema
"CREATE SCHEMA IF NOT EXISTS market_data;",
# Order book snapshots table
"""
CREATE TABLE IF NOT EXISTS market_data.order_book_snapshots (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bids JSONB NOT NULL,
asks JSONB NOT NULL,
sequence_id BIGINT,
mid_price DECIMAL(20,8),
spread DECIMAL(20,8),
bid_volume DECIMAL(30,8),
ask_volume DECIMAL(30,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange)
);
""",
# Trade events table
"""
CREATE TABLE IF NOT EXISTS market_data.trade_events (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
price DECIMAL(20,8) NOT NULL,
size DECIMAL(30,8) NOT NULL,
side VARCHAR(4) NOT NULL,
trade_id VARCHAR(100) NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
);
""",
# Aggregated heatmap data table
"""
CREATE TABLE IF NOT EXISTS market_data.heatmap_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bucket_size DECIMAL(10,2) NOT NULL,
price_bucket DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
side VARCHAR(3) NOT NULL,
exchange_count INTEGER NOT NULL,
exchanges JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
);
""",
# OHLCV data table
"""
CREATE TABLE IF NOT EXISTS market_data.ohlcv_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
timeframe VARCHAR(10) NOT NULL,
open_price DECIMAL(20,8) NOT NULL,
high_price DECIMAL(20,8) NOT NULL,
low_price DECIMAL(20,8) NOT NULL,
close_price DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
trade_count INTEGER,
vwap DECIMAL(20,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, timeframe)
);
""",
# Exchange status tracking table
"""
CREATE TABLE IF NOT EXISTS market_data.exchange_status (
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
status VARCHAR(20) NOT NULL,
last_message_time TIMESTAMPTZ,
error_message TEXT,
connection_count INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, exchange)
);
""",
# System metrics table
"""
CREATE TABLE IF NOT EXISTS market_data.system_metrics (
metric_name VARCHAR(50) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
value DECIMAL(20,8) NOT NULL,
labels JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, metric_name)
);
"""
]
@staticmethod
def get_hypertable_creation_queries() -> List[str]:
"""Get queries to create hypertables"""
return [
"SELECT create_hypertable('market_data.order_book_snapshots', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.trade_events', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.heatmap_data', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.ohlcv_data', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.exchange_status', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.system_metrics', 'timestamp', if_not_exists => TRUE);"
]
@staticmethod
def get_index_creation_queries() -> List[str]:
"""Get queries to create indexes"""
return [
# Order book indexes
"CREATE INDEX IF NOT EXISTS idx_order_book_symbol_exchange ON market_data.order_book_snapshots (symbol, exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_order_book_timestamp ON market_data.order_book_snapshots (timestamp DESC);",
# Trade events indexes
"CREATE INDEX IF NOT EXISTS idx_trade_events_symbol_exchange ON market_data.trade_events (symbol, exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trade_events_timestamp ON market_data.trade_events (timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trade_events_price ON market_data.trade_events (symbol, price, timestamp DESC);",
# Heatmap data indexes
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_bucket ON market_data.heatmap_data (symbol, bucket_size, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_heatmap_timestamp ON market_data.heatmap_data (timestamp DESC);",
# OHLCV data indexes
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON market_data.ohlcv_data (symbol, timeframe, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_ohlcv_timestamp ON market_data.ohlcv_data (timestamp DESC);",
# Exchange status indexes
"CREATE INDEX IF NOT EXISTS idx_exchange_status_exchange ON market_data.exchange_status (exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_exchange_status_timestamp ON market_data.exchange_status (timestamp DESC);",
# System metrics indexes
"CREATE INDEX IF NOT EXISTS idx_system_metrics_name ON market_data.system_metrics (metric_name, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON market_data.system_metrics (timestamp DESC);"
]
@staticmethod
def get_retention_policy_queries() -> List[str]:
"""Get queries to create retention policies"""
return [
"SELECT add_retention_policy('market_data.order_book_snapshots', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.trade_events', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.heatmap_data', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.ohlcv_data', INTERVAL '365 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.exchange_status', INTERVAL '30 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.system_metrics', INTERVAL '30 days', if_not_exists => TRUE);"
]
@staticmethod
def get_continuous_aggregate_queries() -> List[str]:
"""Get queries to create continuous aggregates"""
return [
# Hourly OHLCV aggregate
"""
CREATE MATERIALIZED VIEW IF NOT EXISTS market_data.hourly_ohlcv
WITH (timescaledb.continuous) AS
SELECT
symbol,
exchange,
time_bucket('1 hour', timestamp) AS hour,
first(price, timestamp) AS open_price,
max(price) AS high_price,
min(price) AS low_price,
last(price, timestamp) AS close_price,
sum(size) AS volume,
count(*) AS trade_count,
avg(price) AS vwap
FROM market_data.trade_events
GROUP BY symbol, exchange, hour
WITH NO DATA;
""",
# Add refresh policy for continuous aggregate
"""
SELECT add_continuous_aggregate_policy('market_data.hourly_ohlcv',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour',
if_not_exists => TRUE);
"""
]
@staticmethod
def get_view_creation_queries() -> List[str]:
"""Get queries to create views"""
return [
# Latest order books view
"""
CREATE OR REPLACE VIEW market_data.latest_order_books AS
SELECT DISTINCT ON (symbol, exchange)
symbol,
exchange,
timestamp,
bids,
asks,
mid_price,
spread,
bid_volume,
ask_volume
FROM market_data.order_book_snapshots
ORDER BY symbol, exchange, timestamp DESC;
""",
# Latest heatmaps view
"""
CREATE OR REPLACE VIEW market_data.latest_heatmaps AS
SELECT DISTINCT ON (symbol, bucket_size, price_bucket, side)
symbol,
bucket_size,
price_bucket,
side,
timestamp,
volume,
exchange_count,
exchanges
FROM market_data.heatmap_data
ORDER BY symbol, bucket_size, price_bucket, side, timestamp DESC;
"""
]
@staticmethod
def get_all_creation_queries() -> List[str]:
"""Get all schema creation queries in order"""
queries = []
queries.extend(DatabaseSchema.get_schema_creation_queries())
queries.extend(DatabaseSchema.get_hypertable_creation_queries())
queries.extend(DatabaseSchema.get_index_creation_queries())
queries.extend(DatabaseSchema.get_retention_policy_queries())
queries.extend(DatabaseSchema.get_continuous_aggregate_queries())
queries.extend(DatabaseSchema.get_view_creation_queries())
return queries

View File

@ -0,0 +1,604 @@
"""
TimescaleDB storage manager implementation.
"""
import json
from datetime import datetime
from typing import List, Dict, Optional, Any
from ..interfaces.storage_manager import StorageManager
from ..models.core import OrderBookSnapshot, TradeEvent, HeatmapData, SystemMetrics, PriceLevel
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import StorageError, ValidationError
from ..utils.timing import get_current_timestamp
from .connection_pool import db_pool
from .schema import DatabaseSchema
logger = get_logger(__name__)
class TimescaleManager(StorageManager):
"""TimescaleDB implementation of StorageManager interface"""
def __init__(self):
self._schema_initialized = False
async def initialize(self) -> None:
"""Initialize the storage manager"""
await db_pool.initialize()
await self.setup_database_schema()
logger.info("TimescaleDB storage manager initialized")
async def close(self) -> None:
"""Close the storage manager"""
await db_pool.close()
logger.info("TimescaleDB storage manager closed")
def setup_database_schema(self) -> None:
"""Set up database schema and tables"""
async def _setup():
if self._schema_initialized:
return
try:
queries = DatabaseSchema.get_all_creation_queries()
for query in queries:
try:
await db_pool.execute_command(query)
logger.debug(f"Executed schema query: {query[:50]}...")
except Exception as e:
# Log but continue - some queries might fail if already exists
logger.warning(f"Schema query failed (continuing): {e}")
self._schema_initialized = True
logger.info("Database schema setup completed")
except Exception as e:
logger.error(f"Failed to setup database schema: {e}")
raise StorageError(f"Schema setup failed: {e}", "SCHEMA_SETUP_ERROR")
# Run async setup
import asyncio
if asyncio.get_event_loop().is_running():
asyncio.create_task(_setup())
else:
asyncio.run(_setup())
async def store_orderbook(self, data: OrderBookSnapshot) -> bool:
"""Store order book snapshot to database"""
try:
set_correlation_id()
# Convert price levels to JSON
bids_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in data.bids
])
asks_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in data.asks
])
query = """
INSERT INTO market_data.order_book_snapshots
(symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread, bid_volume, ask_volume)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
"""
await db_pool.execute_command(
query,
data.symbol,
data.exchange,
data.timestamp,
bids_json,
asks_json,
data.sequence_id,
float(data.mid_price) if data.mid_price else None,
float(data.spread) if data.spread else None,
float(data.bid_volume),
float(data.ask_volume)
)
logger.debug(f"Stored order book: {data.symbol}@{data.exchange}")
return True
except Exception as e:
logger.error(f"Failed to store order book: {e}")
return False
async def store_trade(self, data: TradeEvent) -> bool:
"""Store trade event to database"""
try:
set_correlation_id()
query = """
INSERT INTO market_data.trade_events
(symbol, exchange, timestamp, price, size, side, trade_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
"""
await db_pool.execute_command(
query,
data.symbol,
data.exchange,
data.timestamp,
float(data.price),
float(data.size),
data.side,
data.trade_id
)
logger.debug(f"Stored trade: {data.symbol}@{data.exchange} - {data.trade_id}")
return True
except Exception as e:
logger.error(f"Failed to store trade: {e}")
return False
async def store_heatmap(self, data: HeatmapData) -> bool:
"""Store heatmap data to database"""
try:
set_correlation_id()
# Store each heatmap point
for point in data.data:
query = """
INSERT INTO market_data.heatmap_data
(symbol, timestamp, bucket_size, price_bucket, volume, side, exchange_count, exchanges)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (timestamp, symbol, bucket_size, price_bucket, side)
DO UPDATE SET
volume = EXCLUDED.volume,
exchange_count = EXCLUDED.exchange_count,
exchanges = EXCLUDED.exchanges
"""
await db_pool.execute_command(
query,
data.symbol,
data.timestamp,
float(data.bucket_size),
float(point.price),
float(point.volume),
point.side,
1, # exchange_count - will be updated by aggregation
json.dumps([]) # exchanges - will be updated by aggregation
)
logger.debug(f"Stored heatmap: {data.symbol} with {len(data.data)} points")
return True
except Exception as e:
logger.error(f"Failed to store heatmap: {e}")
return False
async def store_metrics(self, data: SystemMetrics) -> bool:
"""Store system metrics to database"""
try:
set_correlation_id()
# Store multiple metrics
metrics = [
('cpu_usage', data.cpu_usage),
('memory_usage', data.memory_usage),
('disk_usage', data.disk_usage),
('database_connections', data.database_connections),
('redis_connections', data.redis_connections),
('active_websockets', data.active_websockets),
('messages_per_second', data.messages_per_second),
('processing_latency', data.processing_latency)
]
query = """
INSERT INTO market_data.system_metrics
(metric_name, timestamp, value, labels)
VALUES ($1, $2, $3, $4)
"""
for metric_name, value in metrics:
await db_pool.execute_command(
query,
metric_name,
data.timestamp,
float(value),
json.dumps(data.network_io)
)
logger.debug("Stored system metrics")
return True
except Exception as e:
logger.error(f"Failed to store metrics: {e}")
return False
async def get_historical_orderbooks(self, symbol: str, exchange: str,
start: datetime, end: datetime,
limit: Optional[int] = None) -> List[OrderBookSnapshot]:
"""Retrieve historical order book data"""
try:
query = """
SELECT symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread
FROM market_data.order_book_snapshots
WHERE symbol = $1 AND exchange = $2 AND timestamp >= $3 AND timestamp <= $4
ORDER BY timestamp DESC
"""
if limit:
query += f" LIMIT {limit}"
rows = await db_pool.execute_query(query, symbol, exchange, start, end)
orderbooks = []
for row in rows:
# Parse JSON bid/ask data
bids_data = json.loads(row['bids'])
asks_data = json.loads(row['asks'])
bids = [PriceLevel(price=b['price'], size=b['size'], count=b.get('count'))
for b in bids_data]
asks = [PriceLevel(price=a['price'], size=a['size'], count=a.get('count'))
for a in asks_data]
orderbook = OrderBookSnapshot(
symbol=row['symbol'],
exchange=row['exchange'],
timestamp=row['timestamp'],
bids=bids,
asks=asks,
sequence_id=row['sequence_id']
)
orderbooks.append(orderbook)
logger.debug(f"Retrieved {len(orderbooks)} historical order books")
return orderbooks
except Exception as e:
logger.error(f"Failed to get historical order books: {e}")
return []
async def get_historical_trades(self, symbol: str, exchange: str,
start: datetime, end: datetime,
limit: Optional[int] = None) -> List[TradeEvent]:
"""Retrieve historical trade data"""
try:
query = """
SELECT symbol, exchange, timestamp, price, size, side, trade_id
FROM market_data.trade_events
WHERE symbol = $1 AND exchange = $2 AND timestamp >= $3 AND timestamp <= $4
ORDER BY timestamp DESC
"""
if limit:
query += f" LIMIT {limit}"
rows = await db_pool.execute_query(query, symbol, exchange, start, end)
trades = []
for row in rows:
trade = TradeEvent(
symbol=row['symbol'],
exchange=row['exchange'],
timestamp=row['timestamp'],
price=float(row['price']),
size=float(row['size']),
side=row['side'],
trade_id=row['trade_id']
)
trades.append(trade)
logger.debug(f"Retrieved {len(trades)} historical trades")
return trades
except Exception as e:
logger.error(f"Failed to get historical trades: {e}")
return []
async def get_latest_orderbook(self, symbol: str, exchange: str) -> Optional[OrderBookSnapshot]:
"""Get latest order book snapshot"""
try:
query = """
SELECT symbol, exchange, timestamp, bids, asks, sequence_id
FROM market_data.order_book_snapshots
WHERE symbol = $1 AND exchange = $2
ORDER BY timestamp DESC
LIMIT 1
"""
rows = await db_pool.execute_query(query, symbol, exchange)
if not rows:
return None
row = rows[0]
bids_data = json.loads(row['bids'])
asks_data = json.loads(row['asks'])
bids = [PriceLevel(price=b['price'], size=b['size'], count=b.get('count'))
for b in bids_data]
asks = [PriceLevel(price=a['price'], size=a['size'], count=a.get('count'))
for a in asks_data]
return OrderBookSnapshot(
symbol=row['symbol'],
exchange=row['exchange'],
timestamp=row['timestamp'],
bids=bids,
asks=asks,
sequence_id=row['sequence_id']
)
except Exception as e:
logger.error(f"Failed to get latest order book: {e}")
return None
async def get_latest_heatmap(self, symbol: str, bucket_size: float) -> Optional[HeatmapData]:
"""Get latest heatmap data"""
try:
query = """
SELECT price_bucket, volume, side, timestamp
FROM market_data.heatmap_data
WHERE symbol = $1 AND bucket_size = $2
AND timestamp = (
SELECT MAX(timestamp)
FROM market_data.heatmap_data
WHERE symbol = $1 AND bucket_size = $2
)
ORDER BY price_bucket
"""
rows = await db_pool.execute_query(query, symbol, bucket_size)
if not rows:
return None
from ..models.core import HeatmapPoint
heatmap = HeatmapData(
symbol=symbol,
timestamp=rows[0]['timestamp'],
bucket_size=bucket_size
)
# Calculate max volume for intensity
max_volume = max(float(row['volume']) for row in rows)
for row in rows:
volume = float(row['volume'])
intensity = volume / max_volume if max_volume > 0 else 0.0
point = HeatmapPoint(
price=float(row['price_bucket']),
volume=volume,
intensity=intensity,
side=row['side']
)
heatmap.data.append(point)
return heatmap
except Exception as e:
logger.error(f"Failed to get latest heatmap: {e}")
return None
async def get_ohlcv_data(self, symbol: str, exchange: str, timeframe: str,
start: datetime, end: datetime) -> List[Dict[str, Any]]:
"""Get OHLCV candlestick data"""
try:
query = """
SELECT timestamp, open_price, high_price, low_price, close_price, volume, trade_count, vwap
FROM market_data.ohlcv_data
WHERE symbol = $1 AND exchange = $2 AND timeframe = $3
AND timestamp >= $4 AND timestamp <= $5
ORDER BY timestamp
"""
rows = await db_pool.execute_query(query, symbol, exchange, timeframe, start, end)
ohlcv_data = []
for row in rows:
ohlcv_data.append({
'timestamp': row['timestamp'],
'open': float(row['open_price']),
'high': float(row['high_price']),
'low': float(row['low_price']),
'close': float(row['close_price']),
'volume': float(row['volume']),
'trade_count': row['trade_count'],
'vwap': float(row['vwap']) if row['vwap'] else None
})
logger.debug(f"Retrieved {len(ohlcv_data)} OHLCV records")
return ohlcv_data
except Exception as e:
logger.error(f"Failed to get OHLCV data: {e}")
return []
async def batch_store_orderbooks(self, data: List[OrderBookSnapshot]) -> int:
"""Store multiple order book snapshots in batch"""
if not data:
return 0
try:
set_correlation_id()
# Prepare batch data
batch_data = []
for orderbook in data:
bids_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in orderbook.bids
])
asks_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in orderbook.asks
])
batch_data.append((
orderbook.symbol,
orderbook.exchange,
orderbook.timestamp,
bids_json,
asks_json,
orderbook.sequence_id,
float(orderbook.mid_price) if orderbook.mid_price else None,
float(orderbook.spread) if orderbook.spread else None,
float(orderbook.bid_volume),
float(orderbook.ask_volume)
))
query = """
INSERT INTO market_data.order_book_snapshots
(symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread, bid_volume, ask_volume)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
"""
await db_pool.execute_many(query, batch_data)
logger.debug(f"Batch stored {len(data)} order books")
return len(data)
except Exception as e:
logger.error(f"Failed to batch store order books: {e}")
return 0
async def batch_store_trades(self, data: List[TradeEvent]) -> int:
"""Store multiple trade events in batch"""
if not data:
return 0
try:
set_correlation_id()
# Prepare batch data
batch_data = [
(trade.symbol, trade.exchange, trade.timestamp, float(trade.price),
float(trade.size), trade.side, trade.trade_id)
for trade in data
]
query = """
INSERT INTO market_data.trade_events
(symbol, exchange, timestamp, price, size, side, trade_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
"""
await db_pool.execute_many(query, batch_data)
logger.debug(f"Batch stored {len(data)} trades")
return len(data)
except Exception as e:
logger.error(f"Failed to batch store trades: {e}")
return 0
async def cleanup_old_data(self, retention_days: int) -> int:
"""Clean up old data based on retention policy"""
try:
cutoff_time = get_current_timestamp().replace(
day=get_current_timestamp().day - retention_days
)
tables = [
'order_book_snapshots',
'trade_events',
'heatmap_data',
'exchange_status',
'system_metrics'
]
total_deleted = 0
for table in tables:
query = f"""
DELETE FROM market_data.{table}
WHERE timestamp < $1
"""
result = await db_pool.execute_command(query, cutoff_time)
# Extract number from result like "DELETE 1234"
deleted = int(result.split()[-1]) if result.split()[-1].isdigit() else 0
total_deleted += deleted
logger.debug(f"Cleaned up {deleted} records from {table}")
logger.info(f"Cleaned up {total_deleted} total records older than {retention_days} days")
return total_deleted
except Exception as e:
logger.error(f"Failed to cleanup old data: {e}")
return 0
async def get_storage_stats(self) -> Dict[str, Any]:
"""Get storage statistics"""
try:
stats = {}
# Table sizes
size_query = """
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
pg_total_relation_size(schemaname||'.'||tablename) as size_bytes
FROM pg_tables
WHERE schemaname = 'market_data'
ORDER BY size_bytes DESC
"""
size_rows = await db_pool.execute_query(size_query)
stats['table_sizes'] = [
{
'table': row['tablename'],
'size': row['size'],
'size_bytes': row['size_bytes']
}
for row in size_rows
]
# Record counts
tables = ['order_book_snapshots', 'trade_events', 'heatmap_data',
'ohlcv_data', 'exchange_status', 'system_metrics']
record_counts = {}
for table in tables:
count_query = f"SELECT COUNT(*) as count FROM market_data.{table}"
count_rows = await db_pool.execute_query(count_query)
record_counts[table] = count_rows[0]['count'] if count_rows else 0
stats['record_counts'] = record_counts
# Connection pool stats
stats['connection_pool'] = await db_pool.get_pool_stats()
return stats
except Exception as e:
logger.error(f"Failed to get storage stats: {e}")
return {}
async def health_check(self) -> bool:
"""Check storage system health"""
try:
# Check database connection
if not await db_pool.health_check():
return False
# Check if tables exist
query = """
SELECT COUNT(*) as count
FROM information_schema.tables
WHERE table_schema = 'market_data'
"""
rows = await db_pool.execute_query(query)
table_count = rows[0]['count'] if rows else 0
if table_count < 6: # We expect 6 main tables
logger.warning(f"Expected 6 tables, found {table_count}")
return False
logger.debug("Storage health check passed")
return True
except Exception as e:
logger.error(f"Storage health check failed: {e}")
return False