COBY : specs + task 1
This commit is contained in:
448
.kiro/specs/multi-exchange-data-aggregation/design.md
Normal file
448
.kiro/specs/multi-exchange-data-aggregation/design.md
Normal file
@ -0,0 +1,448 @@
|
||||
# Design Document
|
||||
|
||||
## Overview
|
||||
|
||||
The Multi-Exchange Data Aggregation System is a comprehensive data collection and processing subsystem designed to serve as the foundational data layer for the trading orchestrator. The system will collect real-time order book and OHLCV data from the top 10 cryptocurrency exchanges, aggregate it into standardized formats, store it in a TimescaleDB time-series database, and provide both live data feeds and historical replay capabilities.
|
||||
|
||||
The system follows a microservices architecture with containerized components, ensuring scalability, maintainability, and seamless integration with the existing trading infrastructure.
|
||||
|
||||
We implement it in the `.\COBY` subfolder for easy integration with the existing system
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Architecture
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph "Exchange Connectors"
|
||||
E1[Binance WebSocket]
|
||||
E2[Coinbase WebSocket]
|
||||
E3[Kraken WebSocket]
|
||||
E4[Bybit WebSocket]
|
||||
E5[OKX WebSocket]
|
||||
E6[Huobi WebSocket]
|
||||
E7[KuCoin WebSocket]
|
||||
E8[Gate.io WebSocket]
|
||||
E9[Bitfinex WebSocket]
|
||||
E10[MEXC WebSocket]
|
||||
end
|
||||
|
||||
subgraph "Data Processing Layer"
|
||||
DP[Data Processor]
|
||||
AGG[Aggregation Engine]
|
||||
NORM[Data Normalizer]
|
||||
end
|
||||
|
||||
subgraph "Storage Layer"
|
||||
TSDB[(TimescaleDB)]
|
||||
CACHE[Redis Cache]
|
||||
end
|
||||
|
||||
subgraph "API Layer"
|
||||
LIVE[Live Data API]
|
||||
REPLAY[Replay API]
|
||||
WEB[Web Dashboard]
|
||||
end
|
||||
|
||||
subgraph "Integration Layer"
|
||||
ORCH[Orchestrator Interface]
|
||||
ADAPTER[Data Adapter]
|
||||
end
|
||||
|
||||
E1 --> DP
|
||||
E2 --> DP
|
||||
E3 --> DP
|
||||
E4 --> DP
|
||||
E5 --> DP
|
||||
E6 --> DP
|
||||
E7 --> DP
|
||||
E8 --> DP
|
||||
E9 --> DP
|
||||
E10 --> DP
|
||||
|
||||
DP --> NORM
|
||||
NORM --> AGG
|
||||
AGG --> TSDB
|
||||
AGG --> CACHE
|
||||
|
||||
CACHE --> LIVE
|
||||
TSDB --> REPLAY
|
||||
LIVE --> WEB
|
||||
REPLAY --> WEB
|
||||
|
||||
LIVE --> ADAPTER
|
||||
REPLAY --> ADAPTER
|
||||
ADAPTER --> ORCH
|
||||
```
|
||||
|
||||
### Component Architecture
|
||||
|
||||
The system is organized into several key components:
|
||||
|
||||
1. **Exchange Connectors**: WebSocket clients for each exchange
|
||||
2. **Data Processing Engine**: Normalizes and validates incoming data
|
||||
3. **Aggregation Engine**: Creates price buckets and heatmaps
|
||||
4. **Storage Layer**: TimescaleDB for persistence, Redis for caching
|
||||
5. **API Layer**: REST and WebSocket APIs for data access
|
||||
6. **Web Dashboard**: Real-time visualization interface
|
||||
7. **Integration Layer**: Orchestrator-compatible interface
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### Exchange Connector Interface
|
||||
|
||||
```python
|
||||
class ExchangeConnector:
|
||||
"""Base interface for exchange WebSocket connectors"""
|
||||
|
||||
async def connect(self) -> bool
|
||||
async def disconnect(self) -> None
|
||||
async def subscribe_orderbook(self, symbol: str) -> None
|
||||
async def subscribe_trades(self, symbol: str) -> None
|
||||
def get_connection_status(self) -> ConnectionStatus
|
||||
def add_data_callback(self, callback: Callable) -> None
|
||||
```
|
||||
|
||||
### Data Processing Interface
|
||||
|
||||
```python
|
||||
class DataProcessor:
|
||||
"""Processes and normalizes raw exchange data"""
|
||||
|
||||
def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot
|
||||
def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent
|
||||
def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool
|
||||
def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics
|
||||
```
|
||||
|
||||
### Aggregation Engine Interface
|
||||
|
||||
```python
|
||||
class AggregationEngine:
|
||||
"""Aggregates data into price buckets and heatmaps"""
|
||||
|
||||
def create_price_buckets(self, orderbook: OrderBookSnapshot, bucket_size: float) -> PriceBuckets
|
||||
def update_heatmap(self, symbol: str, buckets: PriceBuckets) -> HeatmapData
|
||||
def calculate_imbalances(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics
|
||||
def aggregate_across_exchanges(self, symbol: str) -> ConsolidatedOrderBook
|
||||
```
|
||||
|
||||
### Storage Interface
|
||||
|
||||
```python
|
||||
class StorageManager:
|
||||
"""Manages data persistence and retrieval"""
|
||||
|
||||
async def store_orderbook(self, data: OrderBookSnapshot) -> bool
|
||||
async def store_trade(self, data: TradeEvent) -> bool
|
||||
async def get_historical_data(self, symbol: str, start: datetime, end: datetime) -> List[Dict]
|
||||
async def get_latest_data(self, symbol: str) -> Dict
|
||||
def setup_database_schema(self) -> None
|
||||
```
|
||||
|
||||
### Replay Interface
|
||||
|
||||
```python
|
||||
class ReplayManager:
|
||||
"""Provides historical data replay functionality"""
|
||||
|
||||
def create_replay_session(self, start_time: datetime, end_time: datetime, speed: float) -> str
|
||||
async def start_replay(self, session_id: str) -> None
|
||||
async def pause_replay(self, session_id: str) -> None
|
||||
async def stop_replay(self, session_id: str) -> None
|
||||
def get_replay_status(self, session_id: str) -> ReplayStatus
|
||||
```
|
||||
|
||||
## Data Models
|
||||
|
||||
### Core Data Structures
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class OrderBookSnapshot:
|
||||
"""Standardized order book snapshot"""
|
||||
symbol: str
|
||||
exchange: str
|
||||
timestamp: datetime
|
||||
bids: List[PriceLevel]
|
||||
asks: List[PriceLevel]
|
||||
sequence_id: Optional[int] = None
|
||||
|
||||
@dataclass
|
||||
class PriceLevel:
|
||||
"""Individual price level in order book"""
|
||||
price: float
|
||||
size: float
|
||||
count: Optional[int] = None
|
||||
|
||||
@dataclass
|
||||
class TradeEvent:
|
||||
"""Standardized trade event"""
|
||||
symbol: str
|
||||
exchange: str
|
||||
timestamp: datetime
|
||||
price: float
|
||||
size: float
|
||||
side: str # 'buy' or 'sell'
|
||||
trade_id: str
|
||||
|
||||
@dataclass
|
||||
class PriceBuckets:
|
||||
"""Aggregated price buckets for heatmap"""
|
||||
symbol: str
|
||||
timestamp: datetime
|
||||
bucket_size: float
|
||||
bid_buckets: Dict[float, float] # price -> volume
|
||||
ask_buckets: Dict[float, float] # price -> volume
|
||||
|
||||
@dataclass
|
||||
class HeatmapData:
|
||||
"""Heatmap visualization data"""
|
||||
symbol: str
|
||||
timestamp: datetime
|
||||
bucket_size: float
|
||||
data: List[HeatmapPoint]
|
||||
|
||||
@dataclass
|
||||
class HeatmapPoint:
|
||||
"""Individual heatmap data point"""
|
||||
price: float
|
||||
volume: float
|
||||
intensity: float # 0.0 to 1.0
|
||||
side: str # 'bid' or 'ask'
|
||||
```
|
||||
|
||||
### Database Schema
|
||||
|
||||
#### TimescaleDB Tables
|
||||
|
||||
```sql
|
||||
-- Order book snapshots table
|
||||
CREATE TABLE order_book_snapshots (
|
||||
id BIGSERIAL,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
exchange VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
bids JSONB NOT NULL,
|
||||
asks JSONB NOT NULL,
|
||||
sequence_id BIGINT,
|
||||
mid_price DECIMAL(20,8),
|
||||
spread DECIMAL(20,8),
|
||||
bid_volume DECIMAL(30,8),
|
||||
ask_volume DECIMAL(30,8),
|
||||
PRIMARY KEY (timestamp, symbol, exchange)
|
||||
);
|
||||
|
||||
-- Convert to hypertable
|
||||
SELECT create_hypertable('order_book_snapshots', 'timestamp');
|
||||
|
||||
-- Trade events table
|
||||
CREATE TABLE trade_events (
|
||||
id BIGSERIAL,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
exchange VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
price DECIMAL(20,8) NOT NULL,
|
||||
size DECIMAL(30,8) NOT NULL,
|
||||
side VARCHAR(4) NOT NULL,
|
||||
trade_id VARCHAR(100) NOT NULL,
|
||||
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
|
||||
);
|
||||
|
||||
-- Convert to hypertable
|
||||
SELECT create_hypertable('trade_events', 'timestamp');
|
||||
|
||||
-- Aggregated heatmap data table
|
||||
CREATE TABLE heatmap_data (
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
bucket_size DECIMAL(10,2) NOT NULL,
|
||||
price_bucket DECIMAL(20,8) NOT NULL,
|
||||
volume DECIMAL(30,8) NOT NULL,
|
||||
side VARCHAR(3) NOT NULL,
|
||||
exchange_count INTEGER NOT NULL,
|
||||
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
|
||||
);
|
||||
|
||||
-- Convert to hypertable
|
||||
SELECT create_hypertable('heatmap_data', 'timestamp');
|
||||
|
||||
-- OHLCV data table
|
||||
CREATE TABLE ohlcv_data (
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
timestamp TIMESTAMPTZ NOT NULL,
|
||||
timeframe VARCHAR(10) NOT NULL,
|
||||
open_price DECIMAL(20,8) NOT NULL,
|
||||
high_price DECIMAL(20,8) NOT NULL,
|
||||
low_price DECIMAL(20,8) NOT NULL,
|
||||
close_price DECIMAL(20,8) NOT NULL,
|
||||
volume DECIMAL(30,8) NOT NULL,
|
||||
trade_count INTEGER,
|
||||
PRIMARY KEY (timestamp, symbol, timeframe)
|
||||
);
|
||||
|
||||
-- Convert to hypertable
|
||||
SELECT create_hypertable('ohlcv_data', 'timestamp');
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Connection Management
|
||||
|
||||
The system implements robust error handling for exchange connections:
|
||||
|
||||
1. **Exponential Backoff**: Failed connections retry with increasing delays
|
||||
2. **Circuit Breaker**: Temporarily disable problematic exchanges
|
||||
3. **Graceful Degradation**: Continue operation with available exchanges
|
||||
4. **Health Monitoring**: Continuous monitoring of connection status
|
||||
|
||||
### Data Validation
|
||||
|
||||
All incoming data undergoes validation:
|
||||
|
||||
1. **Schema Validation**: Ensure data structure compliance
|
||||
2. **Range Validation**: Check price and volume ranges
|
||||
3. **Timestamp Validation**: Verify temporal consistency
|
||||
4. **Duplicate Detection**: Prevent duplicate data storage
|
||||
|
||||
### Database Resilience
|
||||
|
||||
Database operations include comprehensive error handling:
|
||||
|
||||
1. **Connection Pooling**: Maintain multiple database connections
|
||||
2. **Transaction Management**: Ensure data consistency
|
||||
3. **Retry Logic**: Automatic retry for transient failures
|
||||
4. **Backup Strategies**: Regular data backups and recovery procedures
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Unit Testing
|
||||
|
||||
Each component will have comprehensive unit tests:
|
||||
|
||||
1. **Exchange Connectors**: Mock WebSocket responses
|
||||
2. **Data Processing**: Test normalization and validation
|
||||
3. **Aggregation Engine**: Verify bucket calculations
|
||||
4. **Storage Layer**: Test database operations
|
||||
5. **API Layer**: Test endpoint responses
|
||||
|
||||
### Integration Testing
|
||||
|
||||
End-to-end testing scenarios:
|
||||
|
||||
1. **Multi-Exchange Data Flow**: Test complete data pipeline
|
||||
2. **Database Integration**: Verify TimescaleDB operations
|
||||
3. **API Integration**: Test orchestrator interface compatibility
|
||||
4. **Performance Testing**: Load testing with high-frequency data
|
||||
|
||||
### Performance Testing
|
||||
|
||||
Performance benchmarks and testing:
|
||||
|
||||
1. **Throughput Testing**: Measure data processing capacity
|
||||
2. **Latency Testing**: Measure end-to-end data latency
|
||||
3. **Memory Usage**: Monitor memory consumption patterns
|
||||
4. **Database Performance**: Query performance optimization
|
||||
|
||||
### Monitoring and Observability
|
||||
|
||||
Comprehensive monitoring system:
|
||||
|
||||
1. **Metrics Collection**: Prometheus-compatible metrics
|
||||
2. **Logging**: Structured logging with correlation IDs
|
||||
3. **Alerting**: Real-time alerts for system issues
|
||||
4. **Dashboards**: Grafana dashboards for system monitoring
|
||||
|
||||
## Deployment Architecture
|
||||
|
||||
### Docker Containerization
|
||||
|
||||
The system will be deployed using Docker containers:
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
timescaledb:
|
||||
image: timescale/timescaledb:latest-pg14
|
||||
environment:
|
||||
POSTGRES_DB: market_data
|
||||
POSTGRES_USER: market_user
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||
volumes:
|
||||
- timescale_data:/var/lib/postgresql/data
|
||||
ports:
|
||||
- "5432:5432"
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
|
||||
data-aggregator:
|
||||
build: ./data-aggregator
|
||||
environment:
|
||||
- DB_HOST=timescaledb
|
||||
- REDIS_HOST=redis
|
||||
- LOG_LEVEL=INFO
|
||||
depends_on:
|
||||
- timescaledb
|
||||
- redis
|
||||
|
||||
web-dashboard:
|
||||
build: ./web-dashboard
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- API_HOST=data-aggregator
|
||||
depends_on:
|
||||
- data-aggregator
|
||||
|
||||
volumes:
|
||||
timescale_data:
|
||||
redis_data:
|
||||
```
|
||||
|
||||
### Configuration Management
|
||||
|
||||
Environment-based configuration:
|
||||
|
||||
```python
|
||||
# config.py
|
||||
@dataclass
|
||||
class Config:
|
||||
# Database settings
|
||||
db_host: str = os.getenv('DB_HOST', 'localhost')
|
||||
db_port: int = int(os.getenv('DB_PORT', '5432'))
|
||||
db_name: str = os.getenv('DB_NAME', 'market_data')
|
||||
db_user: str = os.getenv('DB_USER', 'market_user')
|
||||
db_password: str = os.getenv('DB_PASSWORD', '')
|
||||
|
||||
# Redis settings
|
||||
redis_host: str = os.getenv('REDIS_HOST', 'localhost')
|
||||
redis_port: int = int(os.getenv('REDIS_PORT', '6379'))
|
||||
|
||||
# Exchange settings
|
||||
exchanges: List[str] = field(default_factory=lambda: [
|
||||
'binance', 'coinbase', 'kraken', 'bybit', 'okx',
|
||||
'huobi', 'kucoin', 'gateio', 'bitfinex', 'mexc'
|
||||
])
|
||||
|
||||
# Aggregation settings
|
||||
btc_bucket_size: float = 10.0 # $10 USD buckets for BTC
|
||||
eth_bucket_size: float = 1.0 # $1 USD buckets for ETH
|
||||
|
||||
# Performance settings
|
||||
max_connections_per_exchange: int = 5
|
||||
data_buffer_size: int = 10000
|
||||
batch_write_size: int = 1000
|
||||
|
||||
# API settings
|
||||
api_host: str = os.getenv('API_HOST', '0.0.0.0')
|
||||
api_port: int = int(os.getenv('API_PORT', '8080'))
|
||||
websocket_port: int = int(os.getenv('WS_PORT', '8081'))
|
||||
```
|
||||
|
||||
This design provides a robust, scalable foundation for multi-exchange data aggregation that seamlessly integrates with the existing trading orchestrator while providing the flexibility for future enhancements and additional exchange integrations.
|
Reference in New Issue
Block a user