unified cache. LLM report
This commit is contained in:
@@ -0,0 +1,432 @@
|
||||
"""
|
||||
Unified Data Models for the storage system.
|
||||
Standardized data structures for all components.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class InferenceDataFrame:
|
||||
"""
|
||||
Complete inference data for a single timestamp.
|
||||
Contains all data needed for model inference including multi-timeframe OHLCV,
|
||||
order book data, imbalances, and technical indicators.
|
||||
"""
|
||||
symbol: str
|
||||
timestamp: datetime
|
||||
|
||||
# Multi-timeframe OHLCV data
|
||||
ohlcv_1s: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
ohlcv_1m: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
ohlcv_5m: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
ohlcv_15m: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
ohlcv_1h: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
ohlcv_1d: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
|
||||
# Order book data
|
||||
orderbook_snapshot: Optional[Dict] = None
|
||||
orderbook_1s_agg: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
|
||||
# Imbalance metrics (multi-timeframe)
|
||||
imbalances: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
|
||||
# Technical indicators (pre-calculated from latest candle)
|
||||
indicators: Dict[str, float] = field(default_factory=dict)
|
||||
|
||||
# Context window data (±N minutes around timestamp)
|
||||
context_data: Optional[pd.DataFrame] = None
|
||||
|
||||
# Metadata
|
||||
data_source: str = 'unknown' # 'cache' or 'database'
|
||||
query_latency_ms: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
'symbol': self.symbol,
|
||||
'timestamp': self.timestamp.isoformat() if self.timestamp else None,
|
||||
'ohlcv_1s': self.ohlcv_1s.to_dict('records') if not self.ohlcv_1s.empty else [],
|
||||
'ohlcv_1m': self.ohlcv_1m.to_dict('records') if not self.ohlcv_1m.empty else [],
|
||||
'ohlcv_5m': self.ohlcv_5m.to_dict('records') if not self.ohlcv_5m.empty else [],
|
||||
'ohlcv_15m': self.ohlcv_15m.to_dict('records') if not self.ohlcv_15m.empty else [],
|
||||
'ohlcv_1h': self.ohlcv_1h.to_dict('records') if not self.ohlcv_1h.empty else [],
|
||||
'ohlcv_1d': self.ohlcv_1d.to_dict('records') if not self.ohlcv_1d.empty else [],
|
||||
'orderbook_snapshot': self.orderbook_snapshot,
|
||||
'orderbook_1s_agg': self.orderbook_1s_agg.to_dict('records') if not self.orderbook_1s_agg.empty else [],
|
||||
'imbalances': self.imbalances.to_dict('records') if not self.imbalances.empty else [],
|
||||
'indicators': self.indicators,
|
||||
'context_data': self.context_data.to_dict('records') if self.context_data is not None and not self.context_data.empty else None,
|
||||
'data_source': self.data_source,
|
||||
'query_latency_ms': self.query_latency_ms
|
||||
}
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Convert to JSON string."""
|
||||
return json.dumps(self.to_dict(), default=str)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'InferenceDataFrame':
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
symbol=data['symbol'],
|
||||
timestamp=datetime.fromisoformat(data['timestamp']) if data.get('timestamp') else datetime.now(),
|
||||
ohlcv_1s=pd.DataFrame(data.get('ohlcv_1s', [])),
|
||||
ohlcv_1m=pd.DataFrame(data.get('ohlcv_1m', [])),
|
||||
ohlcv_5m=pd.DataFrame(data.get('ohlcv_5m', [])),
|
||||
ohlcv_15m=pd.DataFrame(data.get('ohlcv_15m', [])),
|
||||
ohlcv_1h=pd.DataFrame(data.get('ohlcv_1h', [])),
|
||||
ohlcv_1d=pd.DataFrame(data.get('ohlcv_1d', [])),
|
||||
orderbook_snapshot=data.get('orderbook_snapshot'),
|
||||
orderbook_1s_agg=pd.DataFrame(data.get('orderbook_1s_agg', [])),
|
||||
imbalances=pd.DataFrame(data.get('imbalances', [])),
|
||||
indicators=data.get('indicators', {}),
|
||||
context_data=pd.DataFrame(data['context_data']) if data.get('context_data') else None,
|
||||
data_source=data.get('data_source', 'unknown'),
|
||||
query_latency_ms=data.get('query_latency_ms', 0.0)
|
||||
)
|
||||
|
||||
def get_latest_price(self) -> Optional[float]:
|
||||
"""Get the latest close price from 1s data."""
|
||||
if not self.ohlcv_1s.empty:
|
||||
return float(self.ohlcv_1s.iloc[-1]['close_price'])
|
||||
return None
|
||||
|
||||
def get_timeframe_data(self, timeframe: str) -> pd.DataFrame:
|
||||
"""Get OHLCV data for a specific timeframe."""
|
||||
timeframe_map = {
|
||||
'1s': self.ohlcv_1s,
|
||||
'1m': self.ohlcv_1m,
|
||||
'5m': self.ohlcv_5m,
|
||||
'15m': self.ohlcv_15m,
|
||||
'1h': self.ohlcv_1h,
|
||||
'1d': self.ohlcv_1d
|
||||
}
|
||||
return timeframe_map.get(timeframe, pd.DataFrame())
|
||||
|
||||
def has_complete_data(self) -> bool:
|
||||
"""Check if all required data is present."""
|
||||
return (
|
||||
not self.ohlcv_1s.empty and
|
||||
not self.ohlcv_1m.empty and
|
||||
not self.imbalances.empty and
|
||||
self.orderbook_snapshot is not None
|
||||
)
|
||||
|
||||
def get_data_summary(self) -> Dict[str, Any]:
|
||||
"""Get summary of available data."""
|
||||
return {
|
||||
'symbol': self.symbol,
|
||||
'timestamp': self.timestamp.isoformat() if self.timestamp else None,
|
||||
'ohlcv_1s_rows': len(self.ohlcv_1s),
|
||||
'ohlcv_1m_rows': len(self.ohlcv_1m),
|
||||
'ohlcv_5m_rows': len(self.ohlcv_5m),
|
||||
'ohlcv_15m_rows': len(self.ohlcv_15m),
|
||||
'ohlcv_1h_rows': len(self.ohlcv_1h),
|
||||
'ohlcv_1d_rows': len(self.ohlcv_1d),
|
||||
'has_orderbook': self.orderbook_snapshot is not None,
|
||||
'orderbook_1s_agg_rows': len(self.orderbook_1s_agg),
|
||||
'imbalances_rows': len(self.imbalances),
|
||||
'indicators_count': len(self.indicators),
|
||||
'has_context_data': self.context_data is not None,
|
||||
'data_source': self.data_source,
|
||||
'query_latency_ms': self.query_latency_ms,
|
||||
'is_complete': self.has_complete_data()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrderBookDataFrame:
|
||||
"""
|
||||
Order book data with imbalances and aggregations.
|
||||
Contains raw order book, price buckets, and multi-timeframe imbalance metrics.
|
||||
"""
|
||||
symbol: str
|
||||
timestamp: datetime
|
||||
|
||||
# Raw order book (top levels)
|
||||
bids: List[Tuple[float, float]] = field(default_factory=list) # (price, size)
|
||||
asks: List[Tuple[float, float]] = field(default_factory=list) # (price, size)
|
||||
|
||||
# Aggregated data (price buckets)
|
||||
price_buckets: pd.DataFrame = field(default_factory=pd.DataFrame)
|
||||
|
||||
# Multi-timeframe imbalance metrics
|
||||
imbalance_1s: float = 0.0
|
||||
imbalance_5s: float = 0.0
|
||||
imbalance_15s: float = 0.0
|
||||
imbalance_60s: float = 0.0
|
||||
|
||||
# Volume-weighted imbalances
|
||||
volume_imbalance_1s: float = 0.0
|
||||
volume_imbalance_5s: float = 0.0
|
||||
volume_imbalance_15s: float = 0.0
|
||||
volume_imbalance_60s: float = 0.0
|
||||
|
||||
# Order book statistics
|
||||
mid_price: float = 0.0
|
||||
spread: float = 0.0
|
||||
bid_volume: float = 0.0
|
||||
ask_volume: float = 0.0
|
||||
|
||||
# Metadata
|
||||
exchange: str = 'binance'
|
||||
sequence_id: Optional[int] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
'symbol': self.symbol,
|
||||
'timestamp': self.timestamp.isoformat() if self.timestamp else None,
|
||||
'bids': self.bids,
|
||||
'asks': self.asks,
|
||||
'price_buckets': self.price_buckets.to_dict('records') if not self.price_buckets.empty else [],
|
||||
'imbalance_1s': self.imbalance_1s,
|
||||
'imbalance_5s': self.imbalance_5s,
|
||||
'imbalance_15s': self.imbalance_15s,
|
||||
'imbalance_60s': self.imbalance_60s,
|
||||
'volume_imbalance_1s': self.volume_imbalance_1s,
|
||||
'volume_imbalance_5s': self.volume_imbalance_5s,
|
||||
'volume_imbalance_15s': self.volume_imbalance_15s,
|
||||
'volume_imbalance_60s': self.volume_imbalance_60s,
|
||||
'mid_price': self.mid_price,
|
||||
'spread': self.spread,
|
||||
'bid_volume': self.bid_volume,
|
||||
'ask_volume': self.ask_volume,
|
||||
'exchange': self.exchange,
|
||||
'sequence_id': self.sequence_id
|
||||
}
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Convert to JSON string."""
|
||||
return json.dumps(self.to_dict(), default=str)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'OrderBookDataFrame':
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
symbol=data['symbol'],
|
||||
timestamp=datetime.fromisoformat(data['timestamp']) if data.get('timestamp') else datetime.now(),
|
||||
bids=data.get('bids', []),
|
||||
asks=data.get('asks', []),
|
||||
price_buckets=pd.DataFrame(data.get('price_buckets', [])),
|
||||
imbalance_1s=data.get('imbalance_1s', 0.0),
|
||||
imbalance_5s=data.get('imbalance_5s', 0.0),
|
||||
imbalance_15s=data.get('imbalance_15s', 0.0),
|
||||
imbalance_60s=data.get('imbalance_60s', 0.0),
|
||||
volume_imbalance_1s=data.get('volume_imbalance_1s', 0.0),
|
||||
volume_imbalance_5s=data.get('volume_imbalance_5s', 0.0),
|
||||
volume_imbalance_15s=data.get('volume_imbalance_15s', 0.0),
|
||||
volume_imbalance_60s=data.get('volume_imbalance_60s', 0.0),
|
||||
mid_price=data.get('mid_price', 0.0),
|
||||
spread=data.get('spread', 0.0),
|
||||
bid_volume=data.get('bid_volume', 0.0),
|
||||
ask_volume=data.get('ask_volume', 0.0),
|
||||
exchange=data.get('exchange', 'binance'),
|
||||
sequence_id=data.get('sequence_id')
|
||||
)
|
||||
|
||||
def calculate_statistics(self):
|
||||
"""Calculate order book statistics from bids and asks."""
|
||||
if self.bids and self.asks:
|
||||
# Best bid and ask
|
||||
best_bid = max(self.bids, key=lambda x: x[0])[0] if self.bids else 0
|
||||
best_ask = min(self.asks, key=lambda x: x[0])[0] if self.asks else 0
|
||||
|
||||
# Mid price and spread
|
||||
if best_bid > 0 and best_ask > 0:
|
||||
self.mid_price = (best_bid + best_ask) / 2
|
||||
self.spread = best_ask - best_bid
|
||||
|
||||
# Total volumes
|
||||
self.bid_volume = sum(size for _, size in self.bids)
|
||||
self.ask_volume = sum(size for _, size in self.asks)
|
||||
|
||||
def get_best_bid(self) -> Optional[Tuple[float, float]]:
|
||||
"""Get best bid (highest price)."""
|
||||
if self.bids:
|
||||
return max(self.bids, key=lambda x: x[0])
|
||||
return None
|
||||
|
||||
def get_best_ask(self) -> Optional[Tuple[float, float]]:
|
||||
"""Get best ask (lowest price)."""
|
||||
if self.asks:
|
||||
return min(self.asks, key=lambda x: x[0])
|
||||
return None
|
||||
|
||||
def get_spread_bps(self) -> float:
|
||||
"""Get spread in basis points."""
|
||||
if self.mid_price > 0 and self.spread > 0:
|
||||
return (self.spread / self.mid_price) * 10000
|
||||
return 0.0
|
||||
|
||||
def get_imbalance_summary(self) -> Dict[str, float]:
|
||||
"""Get summary of all imbalance metrics."""
|
||||
return {
|
||||
'imbalance_1s': self.imbalance_1s,
|
||||
'imbalance_5s': self.imbalance_5s,
|
||||
'imbalance_15s': self.imbalance_15s,
|
||||
'imbalance_60s': self.imbalance_60s,
|
||||
'volume_imbalance_1s': self.volume_imbalance_1s,
|
||||
'volume_imbalance_5s': self.volume_imbalance_5s,
|
||||
'volume_imbalance_15s': self.volume_imbalance_15s,
|
||||
'volume_imbalance_60s': self.volume_imbalance_60s
|
||||
}
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""Check if order book data is valid."""
|
||||
if not self.bids or not self.asks:
|
||||
return False
|
||||
|
||||
best_bid = self.get_best_bid()
|
||||
best_ask = self.get_best_ask()
|
||||
|
||||
if not best_bid or not best_ask:
|
||||
return False
|
||||
|
||||
# Bid must be less than ask
|
||||
return best_bid[0] < best_ask[0]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TradeEvent:
|
||||
"""Individual trade event."""
|
||||
symbol: str
|
||||
timestamp: datetime
|
||||
price: float
|
||||
size: float
|
||||
side: str # 'buy' or 'sell'
|
||||
trade_id: str
|
||||
exchange: str = 'binance'
|
||||
is_buyer_maker: bool = False
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return asdict(self)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Convert to JSON string."""
|
||||
return json.dumps(self.to_dict(), default=str)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'TradeEvent':
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
symbol=data['symbol'],
|
||||
timestamp=datetime.fromisoformat(data['timestamp']) if isinstance(data['timestamp'], str) else data['timestamp'],
|
||||
price=float(data['price']),
|
||||
size=float(data['size']),
|
||||
side=data['side'],
|
||||
trade_id=str(data['trade_id']),
|
||||
exchange=data.get('exchange', 'binance'),
|
||||
is_buyer_maker=data.get('is_buyer_maker', False)
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OHLCVCandle:
|
||||
"""Single OHLCV candlestick."""
|
||||
symbol: str
|
||||
timestamp: datetime
|
||||
timeframe: str
|
||||
open_price: float
|
||||
high_price: float
|
||||
low_price: float
|
||||
close_price: float
|
||||
volume: float
|
||||
trade_count: int = 0
|
||||
|
||||
# Technical indicators (optional)
|
||||
rsi_14: Optional[float] = None
|
||||
macd: Optional[float] = None
|
||||
macd_signal: Optional[float] = None
|
||||
macd_histogram: Optional[float] = None
|
||||
bb_upper: Optional[float] = None
|
||||
bb_middle: Optional[float] = None
|
||||
bb_lower: Optional[float] = None
|
||||
ema_12: Optional[float] = None
|
||||
ema_26: Optional[float] = None
|
||||
sma_20: Optional[float] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return asdict(self)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Convert to JSON string."""
|
||||
return json.dumps(self.to_dict(), default=str)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'OHLCVCandle':
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
symbol=data['symbol'],
|
||||
timestamp=datetime.fromisoformat(data['timestamp']) if isinstance(data['timestamp'], str) else data['timestamp'],
|
||||
timeframe=data['timeframe'],
|
||||
open_price=float(data['open_price']),
|
||||
high_price=float(data['high_price']),
|
||||
low_price=float(data['low_price']),
|
||||
close_price=float(data['close_price']),
|
||||
volume=float(data['volume']),
|
||||
trade_count=int(data.get('trade_count', 0)),
|
||||
rsi_14=float(data['rsi_14']) if data.get('rsi_14') is not None else None,
|
||||
macd=float(data['macd']) if data.get('macd') is not None else None,
|
||||
macd_signal=float(data['macd_signal']) if data.get('macd_signal') is not None else None,
|
||||
macd_histogram=float(data['macd_histogram']) if data.get('macd_histogram') is not None else None,
|
||||
bb_upper=float(data['bb_upper']) if data.get('bb_upper') is not None else None,
|
||||
bb_middle=float(data['bb_middle']) if data.get('bb_middle') is not None else None,
|
||||
bb_lower=float(data['bb_lower']) if data.get('bb_lower') is not None else None,
|
||||
ema_12=float(data['ema_12']) if data.get('ema_12') is not None else None,
|
||||
ema_26=float(data['ema_26']) if data.get('ema_26') is not None else None,
|
||||
sma_20=float(data['sma_20']) if data.get('sma_20') is not None else None
|
||||
)
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""Check if candle data is valid."""
|
||||
# High must be >= low
|
||||
if self.high_price < self.low_price:
|
||||
return False
|
||||
|
||||
# High must be >= open and close
|
||||
if self.high_price < self.open_price or self.high_price < self.close_price:
|
||||
return False
|
||||
|
||||
# Low must be <= open and close
|
||||
if self.low_price > self.open_price or self.low_price > self.close_price:
|
||||
return False
|
||||
|
||||
# Volume must be non-negative
|
||||
if self.volume < 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_price_change(self) -> float:
|
||||
"""Get price change (close - open)."""
|
||||
return self.close_price - self.open_price
|
||||
|
||||
def get_price_change_percent(self) -> float:
|
||||
"""Get price change percentage."""
|
||||
if self.open_price > 0:
|
||||
return ((self.close_price - self.open_price) / self.open_price) * 100
|
||||
return 0.0
|
||||
|
||||
def get_range(self) -> float:
|
||||
"""Get price range (high - low)."""
|
||||
return self.high_price - self.low_price
|
||||
|
||||
def is_bullish(self) -> bool:
|
||||
"""Check if candle is bullish (close > open)."""
|
||||
return self.close_price > self.open_price
|
||||
|
||||
def is_bearish(self) -> bool:
|
||||
"""Check if candle is bearish (close < open)."""
|
||||
return self.close_price < self.open_price
|
||||
|
||||
Reference in New Issue
Block a user