wip wip wip
This commit is contained in:
371
core/timescale_storage.py
Normal file
371
core/timescale_storage.py
Normal file
@@ -0,0 +1,371 @@
|
||||
"""
|
||||
TimescaleDB Storage for OHLCV Candle Data
|
||||
|
||||
Provides long-term storage for all candle data without limits.
|
||||
Replaces capped deques with unlimited database storage.
|
||||
|
||||
CRITICAL POLICY: NO SYNTHETIC DATA ALLOWED
|
||||
This module MUST ONLY store real market data from exchanges.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_values
|
||||
from contextlib import contextmanager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TimescaleDBStorage:
|
||||
"""
|
||||
TimescaleDB storage for OHLCV candle data
|
||||
|
||||
Features:
|
||||
- Unlimited storage (no caps)
|
||||
- Fast time-range queries
|
||||
- Automatic compression
|
||||
- Multi-symbol, multi-timeframe support
|
||||
"""
|
||||
|
||||
def __init__(self, connection_string: str = None):
|
||||
"""
|
||||
Initialize TimescaleDB storage
|
||||
|
||||
Args:
|
||||
connection_string: PostgreSQL connection string
|
||||
Default: postgresql://postgres:password@localhost:5432/trading_data
|
||||
"""
|
||||
self.connection_string = connection_string or \
|
||||
"postgresql://postgres:password@localhost:5432/trading_data"
|
||||
|
||||
# Test connection
|
||||
try:
|
||||
with self.get_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT version();")
|
||||
version = cur.fetchone()
|
||||
logger.info(f"Connected to TimescaleDB: {version[0]}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to TimescaleDB: {e}")
|
||||
logger.warning("TimescaleDB storage will not be available")
|
||||
raise
|
||||
|
||||
@contextmanager
|
||||
def get_connection(self):
|
||||
"""Get database connection with automatic cleanup"""
|
||||
conn = psycopg2.connect(self.connection_string)
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
raise e
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def create_tables(self):
|
||||
"""Create TimescaleDB tables and hypertables"""
|
||||
with self.get_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
# Create extension if not exists
|
||||
cur.execute("CREATE EXTENSION IF NOT EXISTS timescaledb;")
|
||||
|
||||
# Create ohlcv_candles table
|
||||
cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS ohlcv_candles (
|
||||
time TIMESTAMPTZ NOT NULL,
|
||||
symbol TEXT NOT NULL,
|
||||
timeframe TEXT NOT NULL,
|
||||
open DOUBLE PRECISION NOT NULL,
|
||||
high DOUBLE PRECISION NOT NULL,
|
||||
low DOUBLE PRECISION NOT NULL,
|
||||
close DOUBLE PRECISION NOT NULL,
|
||||
volume DOUBLE PRECISION NOT NULL,
|
||||
PRIMARY KEY (time, symbol, timeframe)
|
||||
);
|
||||
""")
|
||||
|
||||
# Convert to hypertable (if not already)
|
||||
try:
|
||||
cur.execute("""
|
||||
SELECT create_hypertable('ohlcv_candles', 'time',
|
||||
if_not_exists => TRUE);
|
||||
""")
|
||||
logger.info("Created hypertable: ohlcv_candles")
|
||||
except Exception as e:
|
||||
logger.debug(f"Hypertable may already exist: {e}")
|
||||
|
||||
# Create indexes for fast queries
|
||||
cur.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_symbol_timeframe_time
|
||||
ON ohlcv_candles (symbol, timeframe, time DESC);
|
||||
""")
|
||||
|
||||
# Enable compression (saves 10-20x space)
|
||||
try:
|
||||
cur.execute("""
|
||||
ALTER TABLE ohlcv_candles SET (
|
||||
timescaledb.compress,
|
||||
timescaledb.compress_segmentby = 'symbol,timeframe'
|
||||
);
|
||||
""")
|
||||
logger.info("Enabled compression on ohlcv_candles")
|
||||
except Exception as e:
|
||||
logger.debug(f"Compression may already be enabled: {e}")
|
||||
|
||||
# Add compression policy (compress data older than 7 days)
|
||||
try:
|
||||
cur.execute("""
|
||||
SELECT add_compression_policy('ohlcv_candles', INTERVAL '7 days');
|
||||
""")
|
||||
logger.info("Added compression policy (7 days)")
|
||||
except Exception as e:
|
||||
logger.debug(f"Compression policy may already exist: {e}")
|
||||
|
||||
logger.info("TimescaleDB tables created successfully")
|
||||
|
||||
def store_candles(self, symbol: str, timeframe: str, df: pd.DataFrame):
|
||||
"""
|
||||
Store OHLCV candles in TimescaleDB
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol (e.g., 'ETH/USDT')
|
||||
timeframe: Timeframe (e.g., '1s', '1m', '1h', '1d')
|
||||
df: DataFrame with columns: open, high, low, close, volume
|
||||
Index must be DatetimeIndex (timestamps)
|
||||
|
||||
Returns:
|
||||
int: Number of candles stored
|
||||
"""
|
||||
if df is None or df.empty:
|
||||
logger.warning(f"No data to store for {symbol} {timeframe}")
|
||||
return 0
|
||||
|
||||
try:
|
||||
# Prepare data for insertion
|
||||
data = []
|
||||
for timestamp, row in df.iterrows():
|
||||
data.append((
|
||||
timestamp,
|
||||
symbol,
|
||||
timeframe,
|
||||
float(row['open']),
|
||||
float(row['high']),
|
||||
float(row['low']),
|
||||
float(row['close']),
|
||||
float(row['volume'])
|
||||
))
|
||||
|
||||
# Insert data (ON CONFLICT DO NOTHING to avoid duplicates)
|
||||
with self.get_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
execute_values(
|
||||
cur,
|
||||
"""
|
||||
INSERT INTO ohlcv_candles
|
||||
(time, symbol, timeframe, open, high, low, close, volume)
|
||||
VALUES %s
|
||||
ON CONFLICT (time, symbol, timeframe) DO NOTHING
|
||||
""",
|
||||
data
|
||||
)
|
||||
|
||||
logger.info(f"Stored {len(data)} candles for {symbol} {timeframe}")
|
||||
return len(data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error storing candles for {symbol} {timeframe}: {e}")
|
||||
return 0
|
||||
|
||||
def get_candles(self, symbol: str, timeframe: str,
|
||||
start_time: datetime = None, end_time: datetime = None,
|
||||
limit: int = None) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
Retrieve OHLCV candles from TimescaleDB
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
timeframe: Timeframe
|
||||
start_time: Start of time range (optional)
|
||||
end_time: End of time range (optional)
|
||||
limit: Maximum number of candles to return (optional)
|
||||
|
||||
Returns:
|
||||
DataFrame with OHLCV data, indexed by timestamp
|
||||
"""
|
||||
try:
|
||||
# Build query
|
||||
query = """
|
||||
SELECT time, open, high, low, close, volume
|
||||
FROM ohlcv_candles
|
||||
WHERE symbol = %s AND timeframe = %s
|
||||
"""
|
||||
params = [symbol, timeframe]
|
||||
|
||||
# Add time range filter
|
||||
if start_time:
|
||||
query += " AND time >= %s"
|
||||
params.append(start_time)
|
||||
if end_time:
|
||||
query += " AND time <= %s"
|
||||
params.append(end_time)
|
||||
|
||||
# Order by time
|
||||
query += " ORDER BY time DESC"
|
||||
|
||||
# Add limit
|
||||
if limit:
|
||||
query += " LIMIT %s"
|
||||
params.append(limit)
|
||||
|
||||
# Execute query
|
||||
with self.get_connection() as conn:
|
||||
df = pd.read_sql(query, conn, params=params, index_col='time')
|
||||
|
||||
# Sort by time ascending (oldest first)
|
||||
if not df.empty:
|
||||
df = df.sort_index()
|
||||
|
||||
logger.debug(f"Retrieved {len(df)} candles for {symbol} {timeframe}")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving candles for {symbol} {timeframe}: {e}")
|
||||
return None
|
||||
|
||||
def get_recent_candles(self, symbol: str, timeframe: str,
|
||||
limit: int = 1000) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
Get most recent candles
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
timeframe: Timeframe
|
||||
limit: Number of recent candles to retrieve
|
||||
|
||||
Returns:
|
||||
DataFrame with recent OHLCV data
|
||||
"""
|
||||
return self.get_candles(symbol, timeframe, limit=limit)
|
||||
|
||||
def get_candles_count(self, symbol: str = None, timeframe: str = None) -> int:
|
||||
"""
|
||||
Get count of stored candles
|
||||
|
||||
Args:
|
||||
symbol: Optional symbol filter
|
||||
timeframe: Optional timeframe filter
|
||||
|
||||
Returns:
|
||||
Number of candles stored
|
||||
"""
|
||||
try:
|
||||
query = "SELECT COUNT(*) FROM ohlcv_candles WHERE 1=1"
|
||||
params = []
|
||||
|
||||
if symbol:
|
||||
query += " AND symbol = %s"
|
||||
params.append(symbol)
|
||||
if timeframe:
|
||||
query += " AND timeframe = %s"
|
||||
params.append(timeframe)
|
||||
|
||||
with self.get_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(query, params)
|
||||
count = cur.fetchone()[0]
|
||||
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting candles count: {e}")
|
||||
return 0
|
||||
|
||||
def get_storage_stats(self) -> dict:
|
||||
"""
|
||||
Get storage statistics
|
||||
|
||||
Returns:
|
||||
Dictionary with storage stats
|
||||
"""
|
||||
try:
|
||||
with self.get_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
# Total candles
|
||||
cur.execute("SELECT COUNT(*) FROM ohlcv_candles")
|
||||
total_candles = cur.fetchone()[0]
|
||||
|
||||
# Candles by symbol
|
||||
cur.execute("""
|
||||
SELECT symbol, COUNT(*) as count
|
||||
FROM ohlcv_candles
|
||||
GROUP BY symbol
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
by_symbol = dict(cur.fetchall())
|
||||
|
||||
# Candles by timeframe
|
||||
cur.execute("""
|
||||
SELECT timeframe, COUNT(*) as count
|
||||
FROM ohlcv_candles
|
||||
GROUP BY timeframe
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
by_timeframe = dict(cur.fetchall())
|
||||
|
||||
# Time range
|
||||
cur.execute("""
|
||||
SELECT MIN(time) as oldest, MAX(time) as newest
|
||||
FROM ohlcv_candles
|
||||
""")
|
||||
oldest, newest = cur.fetchone()
|
||||
|
||||
# Table size
|
||||
cur.execute("""
|
||||
SELECT pg_size_pretty(pg_total_relation_size('ohlcv_candles'))
|
||||
""")
|
||||
table_size = cur.fetchone()[0]
|
||||
|
||||
return {
|
||||
'total_candles': total_candles,
|
||||
'by_symbol': by_symbol,
|
||||
'by_timeframe': by_timeframe,
|
||||
'oldest_candle': oldest,
|
||||
'newest_candle': newest,
|
||||
'table_size': table_size
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting storage stats: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
# Global instance
|
||||
_timescale_storage = None
|
||||
|
||||
|
||||
def get_timescale_storage(connection_string: str = None) -> Optional[TimescaleDBStorage]:
|
||||
"""
|
||||
Get global TimescaleDB storage instance
|
||||
|
||||
Args:
|
||||
connection_string: PostgreSQL connection string (optional)
|
||||
|
||||
Returns:
|
||||
TimescaleDBStorage instance or None if unavailable
|
||||
"""
|
||||
global _timescale_storage
|
||||
|
||||
if _timescale_storage is None:
|
||||
try:
|
||||
_timescale_storage = TimescaleDBStorage(connection_string)
|
||||
_timescale_storage.create_tables()
|
||||
logger.info("TimescaleDB storage initialized successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"TimescaleDB storage not available: {e}")
|
||||
_timescale_storage = None
|
||||
|
||||
return _timescale_storage
|
||||
Reference in New Issue
Block a user