153 Commits

Author SHA1 Message Date
42cf02cf3a web dash 2025-08-04 20:28:48 +03:00
fd6ec4eb40 api 2025-08-04 18:38:51 +03:00
ff75af566c caching 2025-08-04 17:55:00 +03:00
8ee9b7a90c wip 2025-08-04 17:40:30 +03:00
de77b0afa8 bucket aggregation 2025-08-04 17:28:55 +03:00
504736c0f7 cob integration scaffold 2025-08-04 17:12:26 +03:00
de9fa4a421 COBY : specs + task 1 2025-08-04 15:50:54 +03:00
e223bc90e9 inference_enabled, cleanup 2025-08-04 14:24:39 +03:00
29382ac0db price vector predictions 2025-07-29 23:45:57 +03:00
3fad2caeb8 decision model card 2025-07-29 23:42:46 +03:00
a204362df2 model cards back 2025-07-29 23:14:00 +03:00
ab5784b890 normalize by unified price range 2025-07-29 22:05:28 +03:00
aa2a1bf7ee fixed CNN training 2025-07-29 20:11:22 +03:00
b1ae557843 models overhaul 2025-07-29 19:22:04 +03:00
0b5fa07498 ui fixes 2025-07-29 19:02:44 +03:00
ac4068c168 suppress_callback_exceptions 2025-07-29 18:20:07 +03:00
5f7032937e UI dash fix 2025-07-29 17:49:25 +03:00
3a532a1220 PnL in reward, show leveraged power in dash (broken) 2025-07-29 17:42:00 +03:00
d35530a9e9 win uni toggle 2025-07-29 16:10:45 +03:00
ecbbabc0c1 inf/trn toggles UI 2025-07-29 15:51:18 +03:00
ff41f0a278 training wip 2025-07-29 15:25:36 +03:00
b3e3a7673f TZ wip, UI model stats fix 2025-07-29 15:12:48 +03:00
afde58bc40 wip model CP storage/loading,
models are aware of current position
fix kill stale procc task
2025-07-29 14:51:40 +03:00
f34b2a46a2 better decision details 2025-07-29 09:49:09 +03:00
e2ededcdf0 fuse decision fusion 2025-07-29 09:09:11 +03:00
f4ac504963 fix model toggle 2025-07-29 00:52:58 +03:00
b44216ae1e UI: fix models info 2025-07-29 00:46:16 +03:00
aefc460082 wip dqn state 2025-07-29 00:25:31 +03:00
ea4db519de more info at signals 2025-07-29 00:20:07 +03:00
e1e453c204 dqn model data fix 2025-07-29 00:09:13 +03:00
548c0d5e0f ui state, models toggle 2025-07-28 23:49:47 +03:00
a341fade80 wip 2025-07-28 22:09:15 +03:00
bc4b72c6de add decision fusion. training but not enabled.
reports cleanup
2025-07-28 18:22:13 +03:00
233bb9935c fixed trading and leverage 2025-07-28 16:57:02 +03:00
db23ad10da trading risk management 2025-07-28 16:42:11 +03:00
44821b2a89 UI and stability 2025-07-28 14:05:37 +03:00
25b2d3840a ui fix 2025-07-28 12:15:26 +03:00
fb72c93743 stability 2025-07-28 12:10:52 +03:00
9219b78241 UI 2025-07-28 11:44:01 +03:00
7c508ab536 cob 2025-07-28 11:12:42 +03:00
1084b7f5b5 cob buffered 2025-07-28 10:31:24 +03:00
619e39ac9b binance WS api enhanced 2025-07-28 10:26:47 +03:00
f5416c4f1e cob update fix 2025-07-28 09:46:49 +03:00
240d2b7877 stats, standartized data provider 2025-07-28 08:35:08 +03:00
6efaa27c33 dix price ccalls 2025-07-28 00:14:03 +03:00
b4076241c9 training wip 2025-07-27 23:45:57 +03:00
39267697f3 predict price direction 2025-07-27 23:20:47 +03:00
dfa18035f1 untrack sqlite 2025-07-27 22:46:19 +03:00
368c49df50 device fix , TZ fix 2025-07-27 22:13:28 +03:00
9e1684f9f8 cb ws 2025-07-27 20:56:37 +03:00
bd986f4534 beef up DQN model, fix training issues 2025-07-27 20:48:44 +03:00
1894d453c9 timezones 2025-07-27 20:43:28 +03:00
1636082ba3 CNN adapter retired 2025-07-27 20:38:04 +03:00
d333681447 wip train 2025-07-27 20:34:51 +03:00
ff66cb8b79 fix TA warning 2025-07-27 20:11:37 +03:00
64dbfa3780 training fix 2025-07-27 20:08:33 +03:00
86373fd5a7 training 2025-07-27 19:45:16 +03:00
87c0dc8ac4 wip training and inference stats 2025-07-27 19:20:23 +03:00
2a21878ed5 wip training 2025-07-27 19:07:34 +03:00
e2c495d83c cleanup 2025-07-27 18:31:30 +03:00
a94b80c1f4 decouple external API and local data consumption 2025-07-27 17:28:07 +03:00
fec6acb783 wip UI clear session 2025-07-27 17:21:16 +03:00
74e98709ad stats 2025-07-27 00:31:50 +03:00
13155197f8 inference works 2025-07-27 00:24:32 +03:00
36a8e256a8 fix DQN RL inference, rebuild model 2025-07-26 23:57:03 +03:00
87942d3807 cleanup and removed dummy data 2025-07-26 23:35:14 +03:00
3eb6335169 inrefence predictions fix 2025-07-26 23:34:36 +03:00
7c61c12b70 stability fixes, lower updates 2025-07-26 22:32:45 +03:00
9576c52039 optimize updates, remove fifo for simple cache 2025-07-26 22:17:29 +03:00
c349ff6f30 fifo n1 que 2025-07-26 21:34:16 +03:00
a3828c708c fix netwrk rebuild 2025-07-25 23:59:51 +03:00
43ed694917 fix checkpoints wip 2025-07-25 23:59:28 +03:00
50c6dae485 UI 2025-07-25 23:37:34 +03:00
22524b0389 cache fix 2025-07-25 22:46:23 +03:00
dd9f4b63ba sqlite for checkpoints, cleanup 2025-07-25 22:34:13 +03:00
130a52fb9b improved reward/penalty 2025-07-25 14:15:43 +03:00
26eeb9b35b ACTUAL TRAINING WORKING (WIP) 2025-07-25 14:08:25 +03:00
1f60c80d67 device tensor fix 2025-07-25 13:59:33 +03:00
78b4bb0f06 wip, training still disabled 2025-07-24 16:20:37 +03:00
045780758a wip symbols tidy up 2025-07-24 16:08:58 +03:00
d17af5ca4b inference data storage 2025-07-24 15:31:57 +03:00
fa07265a16 wip training 2025-07-24 15:27:32 +03:00
b3edd21f1b cnn training stats on dash 2025-07-24 14:28:28 +03:00
5437495003 wip cnn training and cob 2025-07-23 23:33:36 +03:00
8677c4c01c cob wip 2025-07-23 23:10:54 +03:00
8ba52640bd wip cob test 2025-07-23 22:56:28 +03:00
4765b1b1e1 cob data providers tests 2025-07-23 22:49:54 +03:00
c30267bf0b COB tests and data analysis 2025-07-23 22:39:10 +03:00
94ee7389c4 CNN training first working 2025-07-23 22:39:00 +03:00
26e6ba2e1d integrate CNN, fix COB data 2025-07-23 22:12:10 +03:00
45a62443a0 checkpoint manager 2025-07-23 22:11:19 +03:00
bab39fa68f dash inference fixes 2025-07-23 17:37:11 +03:00
2a0f8f5199 integratoin fixes - COB and CNN 2025-07-23 17:33:43 +03:00
f1d63f9da6 integrating new CNN model 2025-07-23 16:59:35 +03:00
1be270cc5c using new data probider and StandardizedCNN 2025-07-23 16:27:16 +03:00
735ee255bc new cnn model 2025-07-23 16:13:41 +03:00
dbb918ea92 wip 2025-07-23 15:52:40 +03:00
2b3c6abdeb refine design 2025-07-23 15:00:08 +03:00
55ea3bce93 feat: Добавяне на подобрена реализация на оркестратора съгласно изискванията в дизайнерския документ
Co-authored-by: aider (openai/Qwen/Qwen3-Coder-480B-A35B-Instruct) <aider@aider.chat>
2025-07-23 14:08:27 +03:00
56b35bd362 more design 2025-07-23 13:48:31 +03:00
f759eac04b updated design 2025-07-23 13:39:50 +03:00
df17a99247 wip 2025-07-23 13:39:41 +03:00
944a7b79e6 aider 2025-07-23 13:09:19 +03:00
8ad153aab5 aider 2025-07-23 11:23:15 +03:00
f515035ea0 use hyperbolic direactly instead of openrouter 2025-07-23 11:15:31 +03:00
3914ba40cf aider openrouter 2025-07-23 11:08:41 +03:00
7c8f52c07a aider 2025-07-23 10:28:19 +03:00
b0bc6c2a65 misc 2025-07-23 10:17:09 +03:00
630bc644fa wip 2025-07-22 20:23:17 +03:00
9b72b18eb7 references 2025-07-22 16:53:36 +03:00
1d224e5b8c references 2025-07-22 16:28:16 +03:00
a68df64b83 code structure 2025-07-22 16:23:13 +03:00
cc0c783411 cp man 2025-07-22 16:13:42 +03:00
c63dc11c14 cleanup 2025-07-22 16:08:58 +03:00
1a54fb1d56 fix model mappings,dash updates, trading 2025-07-22 15:44:59 +03:00
3e35b9cddb leverage calc fix 2025-07-20 22:41:37 +03:00
0838a828ce refactoring cob ws 2025-07-20 21:23:27 +03:00
330f0de053 COB WS fix 2025-07-20 20:38:42 +03:00
9c56ea238e dynamic profitabiliy reward 2025-07-20 18:08:37 +03:00
a2c07a1f3e dash working 2025-07-20 14:27:11 +03:00
0bb4409c30 fix syntax 2025-07-20 12:39:34 +03:00
12865fd3ef replay system 2025-07-20 12:37:02 +03:00
469269e809 working with errors 2025-07-20 01:52:36 +03:00
92919cb1ef adjust weights 2025-07-17 21:50:27 +03:00
23f0caea74 safety measures - 5 consequtive losses 2025-07-17 21:06:49 +03:00
26d440f772 artificially doule fees to promote more profitable trades 2025-07-17 19:22:35 +03:00
6d55061e86 wip training 2025-07-17 02:51:20 +03:00
c3010a6737 dash fixes 2025-07-17 02:25:52 +03:00
6b9482d2be pivots 2025-07-17 02:15:24 +03:00
b4e592b406 kiro tasks 2025-07-17 01:02:16 +03:00
f73cd17dfc kiro design and requirements 2025-07-17 00:57:50 +03:00
8023dae18f wip 2025-07-15 11:12:30 +03:00
e586d850f1 trading sim agin while training 2025-07-15 03:04:34 +03:00
0b07825be0 limit max positions 2025-07-15 02:27:33 +03:00
439611cf88 trading works! 2025-07-15 01:10:37 +03:00
24230f7f79 leverae tweak 2025-07-15 00:51:42 +03:00
154fa75c93 revert broken changes - indentations 2025-07-15 00:39:26 +03:00
a7905ce4e9 test bybit opening/closing orders 2025-07-15 00:03:59 +03:00
5b2dd3b0b8 bybit ballance working 2025-07-14 23:20:01 +03:00
02804ee64f bybit REST api 2025-07-14 22:57:02 +03:00
ee2e6478d8 bybit 2025-07-14 22:23:27 +03:00
4a55c5ff03 deribit 2025-07-14 17:56:09 +03:00
d53a2ba75d live position sync for LIMIT orders 2025-07-14 14:50:30 +03:00
f861559319 work with order execution - we are forced to do limit orders over the API 2025-07-14 13:36:07 +03:00
d7205a9745 lock with timeout 2025-07-14 13:03:42 +03:00
ab232a1262 in the bussiness -but wip 2025-07-14 12:58:16 +03:00
c651ae585a mexc debug files 2025-07-14 12:32:06 +03:00
0c54899fef MEXC INTEGRATION WORKS!!! 2025-07-14 11:23:13 +03:00
d42c9ada8c mexc interface integrations REST API fixes 2025-07-14 11:15:11 +03:00
e74f1393c4 training fixes and enhancements wip 2025-07-14 10:00:42 +03:00
e76b1b16dc training fixes 2025-07-14 00:47:44 +03:00
ebf65494a8 try to fix input dimentions 2025-07-13 23:41:47 +03:00
bcc13a5db3 training wip 2025-07-13 11:29:01 +03:00
376 changed files with 76908 additions and 120007 deletions

25
.aider.conf.yml Normal file
View File

@ -0,0 +1,25 @@
# Aider configuration file
# For more information, see: https://aider.chat/docs/config/aider_conf.html
# Configure for Hyperbolic API (OpenAI-compatible endpoint)
# hyperbolic
model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct
openai-api-base: https://api.hyperbolic.xyz/v1
openai-api-key: "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJkb2Jyb21pci5wb3BvdkB5YWhvby5jb20iLCJpYXQiOjE3NTMyMzE0MjZ9.fCbv2pUmDO9xxjVqfSKru4yz1vtrNvuGIXHibWZWInE"
# setx OPENAI_API_BASE https://api.hyperbolic.xyz/v1
# setx OPENAI_API_KEY eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJkb2Jyb21pci5wb3BvdkB5YWhvby5jb20iLCJpYXQiOjE3NTMyMzE0MjZ9.fCbv2pUmDO9xxjVqfSKru4yz1vtrNvuGIXHibWZWInE
# Environment variables for litellm to recognize Hyperbolic provider
set-env:
#setx HYPERBOLIC_API_KEY eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJkb2Jyb21pci5wb3BvdkB5YWhvby5jb20iLCJpYXQiOjE3NTMyMzE0MjZ9.fCbv2pUmDO9xxjVqfSKru4yz1vtrNvuGIXHibWZWInE
- HYPERBOLIC_API_KEY=eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJkb2Jyb21pci5wb3BvdkB5YWhvby5jb20iLCJpYXQiOjE3NTMyMzE0MjZ9.fCbv2pUmDO9xxjVqfSKru4yz1vtrNvuGIXHibWZWInE
# - HYPERBOLIC_API_BASE=https://api.hyperbolic.xyz/v1
# Set encoding to UTF-8 (default)
encoding: utf-8
gitignore: false
# The metadata file is still needed to inform aider about the
# context window and costs for this custom model.
model-metadata-file: .aider.model.metadata.json

View File

@ -0,0 +1,7 @@
{
"hyperbolic/Qwen/Qwen3-Coder-480B-A35B-Instruct": {
"context_window": 262144,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002
}
}

4
.env
View File

@ -1,6 +1,10 @@
# MEXC API Configuration (Spot Trading)
MEXC_API_KEY=mx0vglhVPZeIJ32Qw1
MEXC_SECRET_KEY=3bfe4bd99d5541e4a1bca87ab257cc7e
DERBIT_API_CLIENTID=me1yf6K0
DERBIT_API_SECRET=PxdvEHmJ59FrguNVIt45-iUBj3lPXbmlA7OQUeINE9s
BYBIT_API_KEY=GQ50IkgZKkR3ljlbPx
BYBIT_API_SECRET=0GWpva5lYrhzsUqZCidQpO5TxYwaEmdiEDyc
#3bfe4bd99d5541e4a1bca87ab257cc7e 45d0b3c26f2644f19bfb98b07741b2f5
# BASE ENDPOINTS: https://api.mexc.com wss://wbs-api.mexc.com/ws !!! DO NOT CHANGE THIS

11
.gitignore vendored
View File

@ -16,7 +16,7 @@ models/trading_agent_final.pt.backup
*.pt
*.backup
logs/
trade_logs/
# trade_logs/
*.csv
cache/
realtime_chart.log
@ -42,3 +42,12 @@ data/cnn_training/cnn_training_data*
testcases/*
testcases/negative/case_index.json
chrome_user_data/*
.aider*
!.aider.conf.yml
!.aider.model.metadata.json
.env
.env
training_data/*
data/trading_system.db
/data/trading_system.db

View File

@ -0,0 +1,448 @@
# Design Document
## Overview
The Multi-Exchange Data Aggregation System is a comprehensive data collection and processing subsystem designed to serve as the foundational data layer for the trading orchestrator. The system will collect real-time order book and OHLCV data from the top 10 cryptocurrency exchanges, aggregate it into standardized formats, store it in a TimescaleDB time-series database, and provide both live data feeds and historical replay capabilities.
The system follows a microservices architecture with containerized components, ensuring scalability, maintainability, and seamless integration with the existing trading infrastructure.
We implement it in the `.\COBY` subfolder for easy integration with the existing system
## Architecture
### High-Level Architecture
```mermaid
graph TB
subgraph "Exchange Connectors"
E1[Binance WebSocket]
E2[Coinbase WebSocket]
E3[Kraken WebSocket]
E4[Bybit WebSocket]
E5[OKX WebSocket]
E6[Huobi WebSocket]
E7[KuCoin WebSocket]
E8[Gate.io WebSocket]
E9[Bitfinex WebSocket]
E10[MEXC WebSocket]
end
subgraph "Data Processing Layer"
DP[Data Processor]
AGG[Aggregation Engine]
NORM[Data Normalizer]
end
subgraph "Storage Layer"
TSDB[(TimescaleDB)]
CACHE[Redis Cache]
end
subgraph "API Layer"
LIVE[Live Data API]
REPLAY[Replay API]
WEB[Web Dashboard]
end
subgraph "Integration Layer"
ORCH[Orchestrator Interface]
ADAPTER[Data Adapter]
end
E1 --> DP
E2 --> DP
E3 --> DP
E4 --> DP
E5 --> DP
E6 --> DP
E7 --> DP
E8 --> DP
E9 --> DP
E10 --> DP
DP --> NORM
NORM --> AGG
AGG --> TSDB
AGG --> CACHE
CACHE --> LIVE
TSDB --> REPLAY
LIVE --> WEB
REPLAY --> WEB
LIVE --> ADAPTER
REPLAY --> ADAPTER
ADAPTER --> ORCH
```
### Component Architecture
The system is organized into several key components:
1. **Exchange Connectors**: WebSocket clients for each exchange
2. **Data Processing Engine**: Normalizes and validates incoming data
3. **Aggregation Engine**: Creates price buckets and heatmaps
4. **Storage Layer**: TimescaleDB for persistence, Redis for caching
5. **API Layer**: REST and WebSocket APIs for data access
6. **Web Dashboard**: Real-time visualization interface
7. **Integration Layer**: Orchestrator-compatible interface
## Components and Interfaces
### Exchange Connector Interface
```python
class ExchangeConnector:
"""Base interface for exchange WebSocket connectors"""
async def connect(self) -> bool
async def disconnect(self) -> None
async def subscribe_orderbook(self, symbol: str) -> None
async def subscribe_trades(self, symbol: str) -> None
def get_connection_status(self) -> ConnectionStatus
def add_data_callback(self, callback: Callable) -> None
```
### Data Processing Interface
```python
class DataProcessor:
"""Processes and normalizes raw exchange data"""
def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot
def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent
def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool
def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics
```
### Aggregation Engine Interface
```python
class AggregationEngine:
"""Aggregates data into price buckets and heatmaps"""
def create_price_buckets(self, orderbook: OrderBookSnapshot, bucket_size: float) -> PriceBuckets
def update_heatmap(self, symbol: str, buckets: PriceBuckets) -> HeatmapData
def calculate_imbalances(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics
def aggregate_across_exchanges(self, symbol: str) -> ConsolidatedOrderBook
```
### Storage Interface
```python
class StorageManager:
"""Manages data persistence and retrieval"""
async def store_orderbook(self, data: OrderBookSnapshot) -> bool
async def store_trade(self, data: TradeEvent) -> bool
async def get_historical_data(self, symbol: str, start: datetime, end: datetime) -> List[Dict]
async def get_latest_data(self, symbol: str) -> Dict
def setup_database_schema(self) -> None
```
### Replay Interface
```python
class ReplayManager:
"""Provides historical data replay functionality"""
def create_replay_session(self, start_time: datetime, end_time: datetime, speed: float) -> str
async def start_replay(self, session_id: str) -> None
async def pause_replay(self, session_id: str) -> None
async def stop_replay(self, session_id: str) -> None
def get_replay_status(self, session_id: str) -> ReplayStatus
```
## Data Models
### Core Data Structures
```python
@dataclass
class OrderBookSnapshot:
"""Standardized order book snapshot"""
symbol: str
exchange: str
timestamp: datetime
bids: List[PriceLevel]
asks: List[PriceLevel]
sequence_id: Optional[int] = None
@dataclass
class PriceLevel:
"""Individual price level in order book"""
price: float
size: float
count: Optional[int] = None
@dataclass
class TradeEvent:
"""Standardized trade event"""
symbol: str
exchange: str
timestamp: datetime
price: float
size: float
side: str # 'buy' or 'sell'
trade_id: str
@dataclass
class PriceBuckets:
"""Aggregated price buckets for heatmap"""
symbol: str
timestamp: datetime
bucket_size: float
bid_buckets: Dict[float, float] # price -> volume
ask_buckets: Dict[float, float] # price -> volume
@dataclass
class HeatmapData:
"""Heatmap visualization data"""
symbol: str
timestamp: datetime
bucket_size: float
data: List[HeatmapPoint]
@dataclass
class HeatmapPoint:
"""Individual heatmap data point"""
price: float
volume: float
intensity: float # 0.0 to 1.0
side: str # 'bid' or 'ask'
```
### Database Schema
#### TimescaleDB Tables
```sql
-- Order book snapshots table
CREATE TABLE order_book_snapshots (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bids JSONB NOT NULL,
asks JSONB NOT NULL,
sequence_id BIGINT,
mid_price DECIMAL(20,8),
spread DECIMAL(20,8),
bid_volume DECIMAL(30,8),
ask_volume DECIMAL(30,8),
PRIMARY KEY (timestamp, symbol, exchange)
);
-- Convert to hypertable
SELECT create_hypertable('order_book_snapshots', 'timestamp');
-- Trade events table
CREATE TABLE trade_events (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
price DECIMAL(20,8) NOT NULL,
size DECIMAL(30,8) NOT NULL,
side VARCHAR(4) NOT NULL,
trade_id VARCHAR(100) NOT NULL,
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
);
-- Convert to hypertable
SELECT create_hypertable('trade_events', 'timestamp');
-- Aggregated heatmap data table
CREATE TABLE heatmap_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bucket_size DECIMAL(10,2) NOT NULL,
price_bucket DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
side VARCHAR(3) NOT NULL,
exchange_count INTEGER NOT NULL,
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
);
-- Convert to hypertable
SELECT create_hypertable('heatmap_data', 'timestamp');
-- OHLCV data table
CREATE TABLE ohlcv_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
timeframe VARCHAR(10) NOT NULL,
open_price DECIMAL(20,8) NOT NULL,
high_price DECIMAL(20,8) NOT NULL,
low_price DECIMAL(20,8) NOT NULL,
close_price DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
trade_count INTEGER,
PRIMARY KEY (timestamp, symbol, timeframe)
);
-- Convert to hypertable
SELECT create_hypertable('ohlcv_data', 'timestamp');
```
## Error Handling
### Connection Management
The system implements robust error handling for exchange connections:
1. **Exponential Backoff**: Failed connections retry with increasing delays
2. **Circuit Breaker**: Temporarily disable problematic exchanges
3. **Graceful Degradation**: Continue operation with available exchanges
4. **Health Monitoring**: Continuous monitoring of connection status
### Data Validation
All incoming data undergoes validation:
1. **Schema Validation**: Ensure data structure compliance
2. **Range Validation**: Check price and volume ranges
3. **Timestamp Validation**: Verify temporal consistency
4. **Duplicate Detection**: Prevent duplicate data storage
### Database Resilience
Database operations include comprehensive error handling:
1. **Connection Pooling**: Maintain multiple database connections
2. **Transaction Management**: Ensure data consistency
3. **Retry Logic**: Automatic retry for transient failures
4. **Backup Strategies**: Regular data backups and recovery procedures
## Testing Strategy
### Unit Testing
Each component will have comprehensive unit tests:
1. **Exchange Connectors**: Mock WebSocket responses
2. **Data Processing**: Test normalization and validation
3. **Aggregation Engine**: Verify bucket calculations
4. **Storage Layer**: Test database operations
5. **API Layer**: Test endpoint responses
### Integration Testing
End-to-end testing scenarios:
1. **Multi-Exchange Data Flow**: Test complete data pipeline
2. **Database Integration**: Verify TimescaleDB operations
3. **API Integration**: Test orchestrator interface compatibility
4. **Performance Testing**: Load testing with high-frequency data
### Performance Testing
Performance benchmarks and testing:
1. **Throughput Testing**: Measure data processing capacity
2. **Latency Testing**: Measure end-to-end data latency
3. **Memory Usage**: Monitor memory consumption patterns
4. **Database Performance**: Query performance optimization
### Monitoring and Observability
Comprehensive monitoring system:
1. **Metrics Collection**: Prometheus-compatible metrics
2. **Logging**: Structured logging with correlation IDs
3. **Alerting**: Real-time alerts for system issues
4. **Dashboards**: Grafana dashboards for system monitoring
## Deployment Architecture
### Docker Containerization
The system will be deployed using Docker containers:
```yaml
# docker-compose.yml
version: '3.8'
services:
timescaledb:
image: timescale/timescaledb:latest-pg14
environment:
POSTGRES_DB: market_data
POSTGRES_USER: market_user
POSTGRES_PASSWORD: ${DB_PASSWORD}
volumes:
- timescale_data:/var/lib/postgresql/data
ports:
- "5432:5432"
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
data-aggregator:
build: ./data-aggregator
environment:
- DB_HOST=timescaledb
- REDIS_HOST=redis
- LOG_LEVEL=INFO
depends_on:
- timescaledb
- redis
web-dashboard:
build: ./web-dashboard
ports:
- "8080:8080"
environment:
- API_HOST=data-aggregator
depends_on:
- data-aggregator
volumes:
timescale_data:
redis_data:
```
### Configuration Management
Environment-based configuration:
```python
# config.py
@dataclass
class Config:
# Database settings
db_host: str = os.getenv('DB_HOST', 'localhost')
db_port: int = int(os.getenv('DB_PORT', '5432'))
db_name: str = os.getenv('DB_NAME', 'market_data')
db_user: str = os.getenv('DB_USER', 'market_user')
db_password: str = os.getenv('DB_PASSWORD', '')
# Redis settings
redis_host: str = os.getenv('REDIS_HOST', 'localhost')
redis_port: int = int(os.getenv('REDIS_PORT', '6379'))
# Exchange settings
exchanges: List[str] = field(default_factory=lambda: [
'binance', 'coinbase', 'kraken', 'bybit', 'okx',
'huobi', 'kucoin', 'gateio', 'bitfinex', 'mexc'
])
# Aggregation settings
btc_bucket_size: float = 10.0 # $10 USD buckets for BTC
eth_bucket_size: float = 1.0 # $1 USD buckets for ETH
# Performance settings
max_connections_per_exchange: int = 5
data_buffer_size: int = 10000
batch_write_size: int = 1000
# API settings
api_host: str = os.getenv('API_HOST', '0.0.0.0')
api_port: int = int(os.getenv('API_PORT', '8080'))
websocket_port: int = int(os.getenv('WS_PORT', '8081'))
```
This design provides a robust, scalable foundation for multi-exchange data aggregation that seamlessly integrates with the existing trading orchestrator while providing the flexibility for future enhancements and additional exchange integrations.

View File

@ -0,0 +1,103 @@
# Requirements Document
## Introduction
This document outlines the requirements for a comprehensive data collection and aggregation subsystem that will serve as a foundational component for the trading orchestrator. The system will collect, aggregate, and store real-time order book and OHLCV data from multiple cryptocurrency exchanges, providing both live data feeds and historical replay capabilities for model training and backtesting.
## Requirements
### Requirement 1
**User Story:** As a trading system developer, I want to collect real-time order book data from top 10 cryptocurrency exchanges, so that I can have comprehensive market data for analysis and trading decisions.
#### Acceptance Criteria
1. WHEN the system starts THEN it SHALL establish WebSocket connections to up to 10 major cryptocurrency exchanges
2. WHEN order book updates are received THEN the system SHALL process and store raw order book events in real-time
3. WHEN processing order book data THEN the system SHALL handle connection failures gracefully and automatically reconnect
4. WHEN multiple exchanges provide data THEN the system SHALL normalize data formats to a consistent structure
5. IF an exchange connection fails THEN the system SHALL log the failure and attempt reconnection with exponential backoff
### Requirement 2
**User Story:** As a trading analyst, I want order book data aggregated into price buckets with heatmap visualization, so that I can quickly identify market depth and liquidity patterns.
#### Acceptance Criteria
1. WHEN processing BTC order book data THEN the system SHALL aggregate orders into $10 USD price range buckets
2. WHEN processing ETH order book data THEN the system SHALL aggregate orders into $1 USD price range buckets
3. WHEN aggregating order data THEN the system SHALL maintain separate bid and ask heatmaps
4. WHEN building heatmaps THEN the system SHALL update distribution data at high frequency (sub-second)
5. WHEN displaying heatmaps THEN the system SHALL show volume intensity using color gradients or progress bars
### Requirement 3
**User Story:** As a system architect, I want all market data stored in a TimescaleDB database, so that I can efficiently query time-series data and maintain historical records.
#### Acceptance Criteria
1. WHEN the system initializes THEN it SHALL connect to a TimescaleDB instance running in a Docker container
2. WHEN storing order book events THEN the system SHALL use TimescaleDB's time-series optimized storage
3. WHEN storing OHLCV data THEN the system SHALL create appropriate time-series tables with proper indexing
4. WHEN writing to database THEN the system SHALL batch writes for optimal performance
5. IF database connection fails THEN the system SHALL queue data in memory and retry with backoff strategy
### Requirement 4
**User Story:** As a trading system operator, I want a web-based dashboard to monitor real-time order book heatmaps, so that I can visualize market conditions across multiple exchanges.
#### Acceptance Criteria
1. WHEN accessing the web dashboard THEN it SHALL display real-time order book heatmaps for BTC and ETH
2. WHEN viewing heatmaps THEN the dashboard SHALL show aggregated data from all connected exchanges
3. WHEN displaying progress bars THEN they SHALL always show aggregated values across price buckets
4. WHEN updating the display THEN the dashboard SHALL refresh data at least once per second
5. WHEN an exchange goes offline THEN the dashboard SHALL indicate the status change visually
### Requirement 5
**User Story:** As a model trainer, I want a replay interface that can provide historical data in the same format as live data, so that I can train models on past market events.
#### Acceptance Criteria
1. WHEN requesting historical data THEN the replay interface SHALL provide data in the same structure as live feeds
2. WHEN replaying data THEN the system SHALL maintain original timing relationships between events
3. WHEN using replay mode THEN the interface SHALL support configurable playback speeds
4. WHEN switching between live and replay modes THEN the orchestrator SHALL receive data through the same interface
5. IF replay data is requested for unavailable time periods THEN the system SHALL return appropriate error messages
### Requirement 6
**User Story:** As a trading system integrator, I want the data aggregation system to follow the same interface as the current orchestrator data provider, so that I can seamlessly integrate it into existing workflows.
#### Acceptance Criteria
1. WHEN the orchestrator requests data THEN the aggregation system SHALL provide data in the expected format
2. WHEN integrating with existing systems THEN the interface SHALL be compatible with current data provider contracts
3. WHEN providing aggregated data THEN the system SHALL include metadata about data sources and quality
4. WHEN the orchestrator switches data sources THEN it SHALL work without code changes
5. IF data quality issues are detected THEN the system SHALL provide quality indicators in the response
### Requirement 7
**User Story:** As a system administrator, I want the data collection system to be containerized and easily deployable, so that I can manage it alongside other system components.
#### Acceptance Criteria
1. WHEN deploying the system THEN it SHALL run in Docker containers with proper resource allocation
2. WHEN starting services THEN TimescaleDB SHALL be automatically provisioned in its own container
3. WHEN configuring the system THEN all settings SHALL be externalized through environment variables or config files
4. WHEN monitoring the system THEN it SHALL provide health check endpoints for container orchestration
5. IF containers need to be restarted THEN the system SHALL recover gracefully without data loss
### Requirement 8
**User Story:** As a performance engineer, I want the system to handle high-frequency data efficiently, so that it can process order book updates from multiple exchanges without latency issues.
#### Acceptance Criteria
1. WHEN processing order book updates THEN the system SHALL handle at least 10 updates per second per exchange
2. WHEN aggregating data THEN processing latency SHALL be less than 10 milliseconds per update
3. WHEN storing data THEN the system SHALL use efficient batching to minimize database overhead
4. WHEN memory usage grows THEN the system SHALL implement appropriate cleanup and garbage collection
5. IF processing falls behind THEN the system SHALL prioritize recent data and log performance warnings

View File

@ -0,0 +1,184 @@
# Implementation Plan
- [x] 1. Set up project structure and core interfaces
- Create directory structure in `.\COBY` subfolder for the multi-exchange data aggregation system
- Define base interfaces and data models for exchange connectors, data processing, and storage
- Implement configuration management system with environment variable support
- _Requirements: 1.1, 6.1, 7.3_
- [ ] 2. Implement TimescaleDB integration and database schema
- Create TimescaleDB connection manager with connection pooling
- Implement database schema creation with hypertables for time-series optimization
- Write database operations for storing order book snapshots and trade events
- Create database migration system for schema updates
- _Requirements: 3.1, 3.2, 3.3, 3.4_
- [ ] 3. Create base exchange connector framework
- Implement abstract base class for exchange WebSocket connectors
- Create connection management with exponential backoff and circuit breaker patterns
- Implement WebSocket message handling with proper error recovery
- Add connection status monitoring and health checks
- _Requirements: 1.1, 1.3, 1.4, 8.5_
- [ ] 4. Implement Binance exchange connector
- Create Binance-specific WebSocket connector extending the base framework
- Implement order book depth stream subscription and processing
- Add trade stream subscription for volume analysis
- Implement data normalization from Binance format to standard format
- Write unit tests for Binance connector functionality
- _Requirements: 1.1, 1.2, 1.4, 6.2_
- [ ] 5. Create data processing and normalization engine
- Implement data processor for normalizing raw exchange data
- Create validation logic for order book and trade data
- Implement data quality checks and filtering
- Add metrics calculation for order book statistics
- Write comprehensive unit tests for data processing logic
- _Requirements: 1.4, 6.3, 8.1_
- [ ] 6. Implement price bucket aggregation system
- Create aggregation engine for converting order book data to price buckets
- Implement configurable bucket sizes ($10 for BTC, $1 for ETH)
- Create heatmap data structure generation from price buckets
- Implement real-time aggregation with high-frequency updates
- Add volume-weighted aggregation calculations
- _Requirements: 2.1, 2.2, 2.3, 2.4, 8.1, 8.2_
- [ ] 7. Build Redis caching layer
- Implement Redis connection manager with connection pooling
- Create caching strategies for latest order book data and heatmaps
- Implement cache invalidation and TTL management
- Add cache performance monitoring and metrics
- Write tests for caching functionality
- _Requirements: 8.2, 8.3_
- [ ] 8. Create live data API endpoints
- Implement REST API for accessing current order book data
- Create WebSocket API for real-time data streaming
- Add endpoints for heatmap data retrieval
- Implement API rate limiting and authentication
- Create comprehensive API documentation
- _Requirements: 4.1, 4.2, 4.4, 6.3_
- [ ] 9. Implement web dashboard for visualization
- Create HTML/CSS/JavaScript dashboard for real-time heatmap visualization
- Implement WebSocket client for receiving real-time updates
- Create progress bar visualization for aggregated price buckets
- Add exchange status indicators and connection monitoring
- Implement responsive design for different screen sizes
- _Requirements: 4.1, 4.2, 4.3, 4.5_
- [ ] 10. Build historical data replay system
- Create replay manager for historical data playback
- Implement configurable playback speeds and time range selection
- Create replay session management with start/pause/stop controls
- Implement data streaming interface compatible with live data format
- Add replay status monitoring and progress tracking
- _Requirements: 5.1, 5.2, 5.3, 5.4, 5.5_
- [ ] 11. Create orchestrator integration interface
- Implement data adapter that matches existing orchestrator interface
- Create compatibility layer for seamless integration with current data provider
- Add data quality indicators and metadata in responses
- Implement switching mechanism between live and replay modes
- Write integration tests with existing orchestrator code
- _Requirements: 6.1, 6.2, 6.3, 6.4, 6.5_
- [ ] 12. Add additional exchange connectors (Coinbase, Kraken)
- Implement Coinbase Pro WebSocket connector with proper authentication
- Create Kraken WebSocket connector with their specific message format
- Add exchange-specific data normalization for both exchanges
- Implement proper error handling for each exchange's quirks
- Write unit tests for both new exchange connectors
- _Requirements: 1.1, 1.2, 1.4_
- [ ] 13. Implement remaining exchange connectors (Bybit, OKX, Huobi)
- Create Bybit WebSocket connector with unified trading account support
- Implement OKX connector with their V5 API WebSocket streams
- Add Huobi Global connector with proper symbol mapping
- Ensure all connectors follow the same interface and error handling patterns
- Write comprehensive tests for all three exchange connectors
- _Requirements: 1.1, 1.2, 1.4_
- [ ] 14. Complete exchange connector suite (KuCoin, Gate.io, Bitfinex, MEXC)
- Implement KuCoin connector with proper token-based authentication
- Create Gate.io connector with their WebSocket v4 API
- Add Bitfinex connector with proper channel subscription management
- Implement MEXC connector with their WebSocket streams
- Ensure all 10 exchanges are properly integrated and tested
- _Requirements: 1.1, 1.2, 1.4_
- [ ] 15. Implement cross-exchange data consolidation
- Create consolidation engine that merges order book data from multiple exchanges
- Implement weighted aggregation based on exchange liquidity and reliability
- Add conflict resolution for price discrepancies between exchanges
- Create consolidated heatmap that shows combined market depth
- Write tests for multi-exchange aggregation scenarios
- _Requirements: 2.5, 4.2_
- [ ] 16. Add performance monitoring and optimization
- Implement comprehensive metrics collection for all system components
- Create performance monitoring dashboard with key system metrics
- Add latency tracking for end-to-end data processing
- Implement memory usage monitoring and garbage collection optimization
- Create alerting system for performance degradation
- _Requirements: 8.1, 8.2, 8.3, 8.4, 8.5_
- [ ] 17. Create Docker containerization and deployment
- Write Dockerfiles for all system components
- Create docker-compose configuration for local development
- Implement health check endpoints for container orchestration
- Add environment variable configuration for all services
- Create deployment scripts and documentation
- _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5_
- [ ] 18. Implement comprehensive testing suite
- Create integration tests for complete data pipeline from exchanges to storage
- Implement load testing for high-frequency data scenarios
- Add end-to-end tests for web dashboard functionality
- Create performance benchmarks and regression tests
- Write documentation for running and maintaining tests
- _Requirements: 8.1, 8.2, 8.3, 8.4_
- [ ] 19. Add system monitoring and alerting
- Implement structured logging with correlation IDs across all components
- Create Prometheus metrics exporters for system monitoring
- Add Grafana dashboards for system visualization
- Implement alerting rules for system failures and performance issues
- Create runbook documentation for common operational scenarios
- _Requirements: 7.4, 8.5_
- [ ] 20. Final integration and system testing
- Integrate the complete system with existing trading orchestrator
- Perform end-to-end testing with real market data
- Validate replay functionality with historical data scenarios
- Test failover scenarios and system resilience
- Create user documentation and operational guides
- _Requirements: 6.1, 6.2, 6.4, 5.1, 5.2_

View File

@ -0,0 +1,713 @@
# Multi-Modal Trading System Design Document
## Overview
The Multi-Modal Trading System is designed as an advanced algorithmic trading platform that combines Convolutional Neural Networks (CNN) and Reinforcement Learning (RL) models orchestrated by a decision-making module. The system processes multi-timeframe and multi-symbol market data (primarily ETH and BTC) to generate trading actions.
This design document outlines the architecture, components, data flow, and implementation details for the system based on the requirements and existing codebase.
## Architecture
The system follows a modular architecture with clear separation of concerns:
```mermaid
graph TD
A[Data Provider] --> B[Data Processor] (calculates pivot points)
B --> C[CNN Model]
B --> D[RL(DQN) Model]
C --> E[Orchestrator]
D --> E
E --> F[Trading Executor]
E --> G[Dashboard]
F --> G
H[Risk Manager] --> F
H --> G
```
### Key Components
1. **Data Provider**: Centralized component responsible for collecting, processing, and distributing market data from multiple sources.
2. **Data Processor**: Processes raw market data, calculates technical indicators, and identifies pivot points.
3. **CNN Model**: Analyzes patterns in market data and predicts pivot points across multiple timeframes.
4. **RL Model**: Learns optimal trading strategies based on market data and CNN predictions.
5. **Orchestrator**: Makes final trading decisions based on inputs from both CNN and RL models.
6. **Trading Executor**: Executes trading actions through brokerage APIs.
7. **Risk Manager**: Implements risk management features like stop-loss and position sizing.
8. **Dashboard**: Provides a user interface for monitoring and controlling the system.
## Components and Interfaces
### 1. Data Provider
The Data Provider is the foundation of the system, responsible for collecting, processing, and distributing market data to all other components.
#### Key Classes and Interfaces
- **DataProvider**: Central class that manages data collection, processing, and distribution.
- **MarketTick**: Data structure for standardized market tick data.
- **DataSubscriber**: Interface for components that subscribe to market data.
- **PivotBounds**: Data structure for pivot-based normalization bounds.
#### Implementation Details
The DataProvider class will:
- Collect data from multiple sources (Binance, MEXC)
- Support multiple timeframes (1s, 1m, 1h, 1d)
- Support multiple symbols (ETH, BTC)
- Calculate technical indicators
- Identify pivot points
- Normalize data
- Distribute data to subscribers
- Calculate any other algoritmic manipulations/calculations on the data
- Cache up to 3x the model inputs (300 ticks OHLCV, etc) data so we can do a proper backtesting in up to 2x time in the future
Based on the existing implementation in `core/data_provider.py`, we'll enhance it to:
- Improve pivot point calculation using reccursive Williams Market Structure
- Optimize data caching for better performance
- Enhance real-time data streaming
- Implement better error handling and fallback mechanisms
### BASE FOR ALL MODELS ###
- ***INPUTS***: COB+OHCLV data frame as described:
- OHCLV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC
- COB: for each 1s OHCLV we have +- 20 buckets of COB ammounts in USD
- 1,5,15 and 60s MA of the COB imbalance counting +- 5 COB buckets
- ***OUTPUTS***:
- suggested trade action (BUY/SELL/HOLD). Paired with confidence
- immediate price movement drection vector (-1: vertical down, 1: vertical up, 0: horizontal) - linear; with it's own confidence
# Standardized input for all models:
{
'primary_symbol': 'ETH/USDT',
'reference_symbol': 'BTC/USDT',
'eth_data': {'ETH_1s': df, 'ETH_1m': df, 'ETH_1h': df, 'ETH_1d': df},
'btc_data': {'BTC_1s': df},
'current_prices': {'ETH': price, 'BTC': price},
'data_completeness': {...}
}
### 2. CNN Model
The CNN Model is responsible for analyzing patterns in market data and predicting pivot points across multiple timeframes.
#### Key Classes and Interfaces
- **CNNModel**: Main class for the CNN model.
- **PivotPointPredictor**: Interface for predicting pivot points.
- **CNNTrainer**: Class for training the CNN model.
- ***INPUTS***: COB+OHCLV+Old Pivots (5 levels of pivots)
- ***OUTPUTS***: next pivot point for each level as price-time vector. (can be plotted as trend line) + suggested trade action (BUY/SELL)
#### Implementation Details
The CNN Model will:
- Accept multi-timeframe and multi-symbol data as input
- Output predicted pivot points for each timeframe (1s, 1m, 1h, 1d)
- Provide confidence scores for each prediction
- Make hidden layer states available for the RL model
Architecture:
- Input layer: Multi-channel input for different timeframes and symbols
- Convolutional layers: Extract patterns from time series data
- LSTM/GRU layers: Capture temporal dependencies
- Attention mechanism: Focus on relevant parts of the input
- Output layer: Predict pivot points and confidence scores
Training:
- Use programmatically calculated pivot points as ground truth
- Train on historical data
- Update model when new pivot points are detected
- Use backpropagation to optimize weights
### 3. RL Model
The RL Model is responsible for learning optimal trading strategies based on market data and CNN predictions.
#### Key Classes and Interfaces
- **RLModel**: Main class for the RL model.
- **TradingActionGenerator**: Interface for generating trading actions.
- **RLTrainer**: Class for training the RL model.
#### Implementation Details
The RL Model will:
- Accept market data, CNN model predictions (output), and CNN hidden layer states as input
- Output trading action recommendations (buy/sell)
- Provide confidence scores for each action
- Learn from past experiences to adapt to the current market environment
Architecture:
- State representation: Market data, CNN model predictions (output), CNN hidden layer states
- Action space: Buy, Sell
- Reward function: PnL, risk-adjusted returns
- Policy network: Deep neural network
- Value network: Estimate expected returns
Training:
- Use reinforcement learning algorithms (DQN, PPO, A3C)
- Train on historical data
- Update model based on trading outcomes
- Use experience replay to improve sample efficiency
### 4. Orchestrator
The Orchestrator serves as the central coordination hub of the multi-modal trading system, responsible for data subscription management, model inference coordination, output storage, training pipeline orchestration, and inference-training feedback loop management.
#### Key Classes and Interfaces
- **Orchestrator**: Main class for the orchestrator.
- **DataSubscriptionManager**: Manages subscriptions to multiple data streams with different refresh rates.
- **ModelInferenceCoordinator**: Coordinates inference across all models.
- **ModelOutputStore**: Stores and manages model outputs for cross-model feeding.
- **TrainingPipelineManager**: Manages training pipelines for all models.
- **DecisionMaker**: Interface for making trading decisions.
- **MoEGateway**: Mixture of Experts gateway for model integration.
#### Core Responsibilities
##### 1. Data Subscription and Management
The Orchestrator subscribes to the Data Provider and manages multiple data streams with varying refresh rates:
- **10Hz COB (Cumulative Order Book) Data**: High-frequency order book updates for real-time market depth analysis
- **OHLCV Data**: Traditional candlestick data at multiple timeframes (1s, 1m, 1h, 1d)
- **Market Tick Data**: Individual trade executions and price movements
- **Technical Indicators**: Calculated indicators that update at different frequencies
- **Pivot Points**: Market structure analysis data
**Data Stream Management**:
- Maintains separate buffers for each data type with appropriate retention policies
- Ensures thread-safe access to data streams from multiple models
- Implements intelligent caching to serve "last updated" data efficiently
- Maintains full base dataframe that stays current for any model requesting data
- Handles data synchronization across different refresh rates
**Enhanced 1s Timeseries Data Combination**:
- Combines OHLCV data with COB (Cumulative Order Book) data for 1s timeframes
- Implements price bucket aggregation: ±20 buckets around current price
- ETH: $1 bucket size (e.g., $3000-$3040 range = 40 buckets) when current price is 3020
- BTC: $10 bucket size (e.g., $50000-$50400 range = 40 buckets) when price is 50200
- Creates unified base data input that includes:
- Traditional OHLCV metrics (Open, High, Low, Close, Volume)
- Order book depth and liquidity at each price level
- Bid/ask imbalances for the +-5 buckets with Moving Averages for 5,15, and 60s
- Volume-weighted average prices within buckets
- Order flow dynamics and market microstructure data
##### 2. Model Inference Coordination
The Orchestrator coordinates inference across all models in the system:
**Inference Pipeline**:
- Triggers model inference when relevant data updates occur
- Manages inference scheduling based on data availability and model requirements
- Coordinates parallel inference execution for independent models
- Handles model dependencies (e.g., RL model waiting for CNN hidden states)
**Model Input Management**:
- Assembles appropriate input data for each model based on their requirements
- Ensures models receive the most current data available at inference time
- Manages feature engineering and data preprocessing for each model
- Handles different input formats and requirements across models
##### 3. Model Output Storage and Cross-Feeding
The Orchestrator maintains a centralized store for all model outputs and manages cross-model data feeding:
**Output Storage**:
- Stores CNN predictions, confidence scores, and hidden layer states
- Stores RL action recommendations and value estimates
- Stores outputs from all models in extensible format supporting future models (LSTM, Transformer, etc.)
- Maintains historical output sequences for temporal analysis
- Implements efficient retrieval mechanisms for real-time access
- Uses standardized ModelOutput format for easy extension and cross-model compatibility
**Cross-Model Feeding**:
- Feeds CNN hidden layer states into RL model inputs
- Provides CNN predictions as context for RL decision-making
- Includes "last predictions" from each available model as part of base data input
- Stores model outputs that become inputs for subsequent inference cycles
- Manages circular dependencies and feedback loops between models
- Supports dynamic model addition without requiring system architecture changes
##### 4. Training Pipeline Management
The Orchestrator coordinates training for all models by managing the prediction-result feedback loop:
**Training Coordination**:
- Calls each model's training pipeline when new inference results are available
- Provides previous predictions alongside new results for supervised learning
- Manages training data collection and labeling
- Coordinates online learning updates based on real-time performance
**Training Data Management**:
- Maintains training datasets with prediction-result pairs
- Implements data quality checks and filtering
- Manages training data retention and archival policies
- Provides training data statistics and monitoring
**Performance Tracking**:
- Tracks prediction accuracy for each model over time
- Monitors model performance degradation and triggers retraining
- Maintains performance metrics for model comparison and selection
**Training progress and checkpoints persistance**
- it uses the checkpoint manager to store check points of each model over time as training progresses and we have improvements
- checkpoint manager has capability to ensure only top 5 to 10 best checkpoints are stored for each model deleting the least performant ones. it stores metadata along the CPs to decide the performance
- we automatically load the best CP at startup if we have stored ones
##### 5. Inference Data Validation and Storage
The Orchestrator implements comprehensive inference data validation and persistent storage:
**Input Data Validation**:
- Validates complete OHLCV dataframes for all required timeframes before inference
- Checks input data dimensions against model requirements
- Logs missing components and prevents prediction on incomplete data
- Raises validation errors with specific details about expected vs actual dimensions
**Inference History Storage**:
- Stores complete input data packages with each prediction in persistent storage
- Includes timestamp, symbol, input features, prediction outputs, confidence scores, and model internal states
- Maintains compressed storage to minimize footprint while preserving accessibility
- Implements efficient query mechanisms by symbol, timeframe, and date range
**Storage Management**:
- Applies configurable retention policies to manage storage limits
- Archives or removes oldest entries when limits are reached
- Prioritizes keeping most recent and valuable training examples during storage pressure
- Provides data completeness metrics and validation results in logs
##### 6. Inference-Training Feedback Loop
The Orchestrator manages the continuous learning cycle through inference-training feedback:
**Prediction Outcome Evaluation**:
- Evaluates prediction accuracy against actual price movements after sufficient time has passed
- Creates training examples using stored inference data paired with actual market outcomes
- Feeds prediction-result pairs back to respective models for learning
**Adaptive Learning Signals**:
- Provides positive reinforcement signals for accurate predictions
- Delivers corrective training signals for inaccurate predictions to help models learn from mistakes
- Retrieves last inference data for each model to compare predictions against actual outcomes
**Continuous Improvement Tracking**:
- Tracks and reports accuracy improvements or degradations over time
- Monitors model learning progress through the feedback loop
- Alerts administrators when data flow issues are detected with specific error details and remediation suggestions
##### 5. Decision Making and Trading Actions
Beyond coordination, the Orchestrator makes final trading decisions:
**Decision Integration**:
- Combines outputs from CNN and RL models using Mixture of Experts approach
- Applies confidence-based filtering to avoid uncertain trades
- Implements configurable thresholds for buy/sell decisions
- Considers market conditions and risk parameters
#### Implementation Details
**Architecture**:
```python
class Orchestrator:
def __init__(self):
self.data_subscription_manager = DataSubscriptionManager()
self.model_inference_coordinator = ModelInferenceCoordinator()
self.model_output_store = ModelOutputStore()
self.training_pipeline_manager = TrainingPipelineManager()
self.decision_maker = DecisionMaker()
self.moe_gateway = MoEGateway()
async def run(self):
# Subscribe to data streams
await self.data_subscription_manager.subscribe_to_data_provider()
# Start inference coordination loop
await self.model_inference_coordinator.start()
# Start training pipeline management
await self.training_pipeline_manager.start()
```
**Data Flow Management**:
- Implements event-driven architecture for data updates
- Uses async/await patterns for non-blocking operations
- Maintains data freshness timestamps for each stream
- Implements backpressure handling for high-frequency data
**Model Coordination**:
- Manages model lifecycle (loading, inference, training, updating)
- Implements model versioning and rollback capabilities
- Handles model failures and fallback mechanisms
- Provides model performance monitoring and alerting
**Training Integration**:
- Implements incremental learning strategies
- Manages training batch composition and scheduling
- Provides training progress monitoring and control
- Handles training failures and recovery
### 5. Trading Executor
The Trading Executor is responsible for executing trading actions through brokerage APIs.
#### Key Classes and Interfaces
- **TradingExecutor**: Main class for the trading executor.
- **BrokerageAPI**: Interface for interacting with brokerages.
- **OrderManager**: Class for managing orders.
#### Implementation Details
The Trading Executor will:
- Accept trading actions from the orchestrator
- Execute orders through brokerage APIs
- Manage order lifecycle
- Handle errors and retries
- Provide feedback on order execution
Supported brokerages:
- MEXC
- Binance
- Bybit (future extension)
Order types:
- Market orders
- Limit orders
- Stop-loss orders
### 6. Risk Manager
The Risk Manager is responsible for implementing risk management features like stop-loss and position sizing.
#### Key Classes and Interfaces
- **RiskManager**: Main class for the risk manager.
- **StopLossManager**: Class for managing stop-loss orders.
- **PositionSizer**: Class for determining position sizes.
#### Implementation Details
The Risk Manager will:
- Implement configurable stop-loss functionality
- Implement configurable position sizing based on risk parameters
- Implement configurable maximum drawdown limits
- Provide real-time risk metrics
- Provide alerts for high-risk situations
Risk parameters:
- Maximum position size
- Maximum drawdown
- Risk per trade
- Maximum leverage
### 7. Dashboard
The Dashboard provides a user interface for monitoring and controlling the system.
#### Key Classes and Interfaces
- **Dashboard**: Main class for the dashboard.
- **ChartManager**: Class for managing charts.
- **ControlPanel**: Class for managing controls.
#### Implementation Details
The Dashboard will:
- Display real-time market data for all symbols and timeframes
- Display OHLCV charts for all timeframes
- Display CNN pivot point predictions and confidence levels
- Display RL and orchestrator trading actions and confidence levels
- Display system status and model performance metrics
- Provide start/stop toggles for all system processes
- Provide sliders to adjust buy/sell thresholds for the orchestrator
Implementation:
- Web-based dashboard using Flask/Dash
- Real-time updates using WebSockets
- Interactive charts using Plotly
- Server-side processing for all models
## Data Models
### Market Data
```python
@dataclass
class MarketTick:
symbol: str
timestamp: datetime
price: float
volume: float
quantity: float
side: str # 'buy' or 'sell'
trade_id: str
is_buyer_maker: bool
raw_data: Dict[str, Any] = field(default_factory=dict)
```
### OHLCV Data
```python
@dataclass
class OHLCVBar:
symbol: str
timestamp: datetime
open: float
high: float
low: float
close: float
volume: float
timeframe: str
indicators: Dict[str, float] = field(default_factory=dict)
```
### Pivot Points
```python
@dataclass
class PivotPoint:
symbol: str
timestamp: datetime
price: float
type: str # 'high' or 'low'
level: int # Pivot level (1, 2, 3, etc.)
confidence: float = 1.0
```
### Trading Actions
```python
@dataclass
class TradingAction:
symbol: str
timestamp: datetime
action: str # 'buy' or 'sell'
confidence: float
source: str # 'rl', 'cnn', 'orchestrator'
price: Optional[float] = None
quantity: Optional[float] = None
reason: Optional[str] = None
```
### Model Predictions
```python
@dataclass
class ModelOutput:
"""Extensible model output format supporting all model types"""
model_type: str # 'cnn', 'rl', 'lstm', 'transformer', 'orchestrator'
model_name: str # Specific model identifier
symbol: str
timestamp: datetime
confidence: float
predictions: Dict[str, Any] # Model-specific predictions
hidden_states: Optional[Dict[str, Any]] = None # For cross-model feeding
metadata: Dict[str, Any] = field(default_factory=dict) # Additional info
```
```python
@dataclass
class CNNPrediction:
symbol: str
timestamp: datetime
pivot_points: List[PivotPoint]
hidden_states: Dict[str, Any]
confidence: float
```
```python
@dataclass
class RLPrediction:
symbol: str
timestamp: datetime
action: str # 'buy' or 'sell'
confidence: float
expected_reward: float
```
### Enhanced Base Data Input
```python
@dataclass
class BaseDataInput:
"""Unified base data input for all models"""
symbol: str
timestamp: datetime
ohlcv_data: Dict[str, OHLCVBar] # Multi-timeframe OHLCV
cob_data: Optional[Dict[str, float]] = None # COB buckets for 1s timeframe
technical_indicators: Dict[str, float] = field(default_factory=dict)
pivot_points: List[PivotPoint] = field(default_factory=list)
last_predictions: Dict[str, ModelOutput] = field(default_factory=dict) # From all models
market_microstructure: Dict[str, Any] = field(default_factory=dict) # Order flow, etc.
```
### COB Data Structure
```python
@dataclass
class COBData:
"""Cumulative Order Book data for price buckets"""
symbol: str
timestamp: datetime
current_price: float
bucket_size: float # $1 for ETH, $10 for BTC
price_buckets: Dict[float, Dict[str, float]] # price -> {bid_volume, ask_volume, etc.}
bid_ask_imbalance: Dict[float, float] # price -> imbalance ratio
volume_weighted_prices: Dict[float, float] # price -> VWAP within bucket
order_flow_metrics: Dict[str, float] # Various order flow indicators
```
### Data Collection Errors
- Implement retry mechanisms for API failures
- Use fallback data sources when primary sources are unavailable
- Log all errors with detailed information
- Notify users through the dashboard
### Model Errors
- Implement model validation before deployment
- Use fallback models when primary models fail
- Log all errors with detailed information
- Notify users through the dashboard
### Trading Errors
- Implement order validation before submission
- Use retry mechanisms for order failures
- Implement circuit breakers for extreme market conditions
- Log all errors with detailed information
- Notify users through the dashboard
## Testing Strategy
### Unit Testing
- Test individual components in isolation
- Use mock objects for dependencies
- Focus on edge cases and error handling
### Integration Testing
- Test interactions between components
- Use real data for testing
- Focus on data flow and error propagation
### System Testing
- Test the entire system end-to-end
- Use real data for testing
- Focus on performance and reliability
### Backtesting
- Test trading strategies on historical data
- Measure performance metrics (PnL, Sharpe ratio, etc.)
- Compare against benchmarks
### Live Testing
- Test the system in a live environment with small position sizes
- Monitor performance and stability
- Gradually increase position sizes as confidence grows
## Implementation Plan
The implementation will follow a phased approach:
1. **Phase 1: Data Provider**
- Implement the enhanced data provider
- Implement pivot point calculation
- Implement technical indicator calculation
- Implement data normalization
2. **Phase 2: CNN Model**
- Implement the CNN model architecture
- Implement the training pipeline
- Implement the inference pipeline
- Implement the pivot point prediction
3. **Phase 3: RL Model**
- Implement the RL model architecture
- Implement the training pipeline
- Implement the inference pipeline
- Implement the trading action generation
4. **Phase 4: Orchestrator**
- Implement the orchestrator architecture
- Implement the decision-making logic
- Implement the MoE gateway
- Implement the confidence-based filtering
5. **Phase 5: Trading Executor**
- Implement the trading executor
- Implement the brokerage API integrations
- Implement the order management
- Implement the error handling
6. **Phase 6: Risk Manager**
- Implement the risk manager
- Implement the stop-loss functionality
- Implement the position sizing
- Implement the risk metrics
7. **Phase 7: Dashboard**
- Implement the dashboard UI
- Implement the chart management
- Implement the control panel
- Implement the real-time updates
8. **Phase 8: Integration and Testing**
- Integrate all components
- Implement comprehensive testing
- Fix bugs and optimize performance
- Deploy to production
## Monitoring and Visualization
### TensorBoard Integration (Future Enhancement)
A comprehensive TensorBoard integration has been designed to provide detailed training visualization and monitoring capabilities:
#### Features
- **Training Metrics Visualization**: Real-time tracking of model losses, rewards, and performance metrics
- **Feature Distribution Analysis**: Histograms and statistics of input features to validate data quality
- **State Quality Monitoring**: Tracking of comprehensive state building (13,400 features) success rates
- **Reward Component Analysis**: Detailed breakdown of reward calculations including PnL, confidence, volatility, and order flow
- **Model Performance Comparison**: Side-by-side comparison of CNN, RL, and orchestrator performance
#### Implementation Status
- **Completed**: TensorBoardLogger utility class with comprehensive logging methods
- **Completed**: Integration points in enhanced_rl_training_integration.py
- **Completed**: Enhanced run_tensorboard.py with improved visualization options
- **Status**: Ready for deployment when system stability is achieved
#### Usage
```bash
# Start TensorBoard dashboard
python run_tensorboard.py
# Access at http://localhost:6006
# View training metrics, feature distributions, and model performance
```
#### Benefits
- Real-time validation of training process
- Early detection of training issues
- Feature importance analysis
- Model performance comparison
- Historical training progress tracking
**Note**: TensorBoard integration is currently deprioritized in favor of system stability and core model improvements. It will be activated once the core training system is stable and performing optimally.
## Conclusion
This design document outlines the architecture, components, data flow, and implementation details for the Multi-Modal Trading System. The system is designed to be modular, extensible, and robust, with a focus on performance, reliability, and user experience.
The implementation will follow a phased approach, with each phase building on the previous one. The system will be thoroughly tested at each phase to ensure that it meets the requirements and performs as expected.
The final system will provide traders with a powerful tool for analyzing market data, identifying trading opportunities, and executing trades with confidence.

View File

@ -0,0 +1,175 @@
# Requirements Document
## Introduction
The Multi-Modal Trading System is an advanced algorithmic trading platform that combines Convolutional Neural Networks (CNN) and Reinforcement Learning (RL) models orchestrated by a decision-making module. The system processes multi-timeframe and multi-symbol market data (primarily ETH and BTC) to generate trading actions. The system is designed to adapt to current market conditions through continuous learning from past experiences, with the CNN module trained on historical data to predict pivot points and the RL module optimizing trading decisions based on these predictions and market data.
## Requirements
### Requirement 1: Data Collection and Processing
**User Story:** As a trader, I want the system to collect and process multi-timeframe and multi-symbol market data, so that the models have comprehensive market information for making accurate trading decisions.
#### Acceptance Criteria
0. NEVER USE GENERATED/SYNTHETIC DATA or mock implementations and UI. If somethings is not implemented yet, it should be obvious.
1. WHEN the system starts THEN it SHALL collect and process data for both ETH and BTC symbols.
2. WHEN collecting data THEN the system SHALL store the following for the primary symbol (ETH):
- 300 seconds of raw tick data - price and COB snapshot for all prices +- 1% on fine reslolution buckets (1$ for ETH, 10$ for BTC)
- 300 seconds of 1-second OHLCV data + 1s aggregated COB data
- 300 bars of OHLCV + indicators for each timeframe (1s, 1m, 1h, 1d)
3. WHEN collecting data THEN the system SHALL store similar data for the reference symbol (BTC).
4. WHEN processing data THEN the system SHALL calculate standard technical indicators for all timeframes.
5. WHEN processing data THEN the system SHALL calculate pivot points for all timeframes according to the specified methodology.
6. WHEN new data arrives THEN the system SHALL update its data cache in real-time.
7. IF tick data is not available THEN the system SHALL substitute with the lowest available timeframe data.
8. WHEN normalizing data THEN the system SHALL normalize to the max and min of the highest timeframe to maintain relationships between different timeframes.
9. data is cached for longer (let's start with double the model inputs so 600 bars) to support performing backtesting when we know the current predictions outcomes so we can generate test cases.
10. In general all models have access to the whole data we collect in a central data provider implementation. only some are specialized. All models should also take as input the last output of evey other model (also cached in the data provider). there should be a room for adding more models in the other models data input so we can extend the system without having to loose existing models and trained W&B
### Requirement 2: CNN Model Implementation
**User Story:** As a trader, I want the system to implement a CNN model that can identify patterns and predict pivot points across multiple timeframes, so that I can anticipate market direction changes.
#### Acceptance Criteria
1. WHEN the CNN model is initialized THEN it SHALL accept multi-timeframe and multi-symbol data as input.
2. WHEN processing input data THEN the CNN model SHALL output predicted pivot points for each timeframe (1s, 1m, 1h, 1d).
3. WHEN predicting pivot points THEN the CNN model SHALL provide both the predicted pivot point value and the timestamp when it is expected to occur.
4. WHEN a pivot point is detected THEN the system SHALL trigger a training round for the CNN model using historical data.
5. WHEN training the CNN model THEN the system SHALL use programmatically calculated pivot points from historical data as ground truth.
6. WHEN outputting predictions THEN the CNN model SHALL include a confidence score for each prediction.
7. WHEN calculating pivot points THEN the system SHALL implement both standard pivot points and the recursive Williams market structure pivot points as described.
8. WHEN processing data THEN the CNN model SHALL make available its hidden layer states for use by the RL model.
### Requirement 3: RL Model Implementation
**User Story:** As a trader, I want the system to implement an RL model that can learn optimal trading strategies based on market data and CNN predictions, so that the system can adapt to changing market conditions.
#### Acceptance Criteria
1. WHEN the RL model is initialized THEN it SHALL accept market data, CNN predictions, and CNN hidden layer states as input.
2. WHEN processing input data THEN the RL model SHALL output trading action recommendations (buy/sell).
3. WHEN evaluating trading actions THEN the RL model SHALL learn from past experiences to adapt to the current market environment.
4. WHEN making decisions THEN the RL model SHALL consider the confidence levels of CNN predictions.
5. WHEN uncertain about market direction THEN the RL model SHALL learn to avoid entering positions.
6. WHEN training the RL model THEN the system SHALL use a reward function that incentivizes high risk/reward setups.
7. WHEN outputting trading actions THEN the RL model SHALL provide a confidence score for each action.
8. WHEN a trading action is executed THEN the system SHALL store the input data for future training.
### Requirement 4: Orchestrator Implementation
**User Story:** As a trader, I want the system to implement an orchestrator that can make final trading decisions based on inputs from both CNN and RL models, so that the system can make more balanced and informed trading decisions.
#### Acceptance Criteria
1. WHEN the orchestrator is initialized THEN it SHALL accept inputs from both CNN and RL models.
2. WHEN processing model inputs THEN the orchestrator SHALL output final trading actions (buy/sell).
3. WHEN making decisions THEN the orchestrator SHALL consider the confidence levels of both CNN and RL models.
4. WHEN uncertain about market direction THEN the orchestrator SHALL learn to avoid entering positions.
5. WHEN implementing the orchestrator THEN the system SHALL use a Mixture of Experts (MoE) approach to allow for future model integration.
6. WHEN outputting trading actions THEN the orchestrator SHALL provide a confidence score for each action.
7. WHEN a trading action is executed THEN the system SHALL store the input data for future training.
8. WHEN implementing the orchestrator THEN the system SHALL allow for configurable thresholds for entering and exiting positions.
### Requirement 5: Training Pipeline
**User Story:** As a developer, I want the system to implement a unified training pipeline for both CNN and RL models, so that the models can be trained efficiently and consistently.
#### Acceptance Criteria
1. WHEN training models THEN the system SHALL use a unified data provider to prepare data for all models.
2. WHEN a pivot point is detected THEN the system SHALL trigger a training round for the CNN model.
3. WHEN training the CNN model THEN the system SHALL use programmatically calculated pivot points from historical data as ground truth.
4. WHEN training the RL model THEN the system SHALL use a reward function that incentivizes high risk/reward setups.
5. WHEN training models THEN the system SHALL run the training process on the server without requiring the dashboard to be open.
6. WHEN training models THEN the system SHALL provide real-time feedback on training progress through the dashboard.
7. WHEN training models THEN the system SHALL store model checkpoints for future use.
8. WHEN training models THEN the system SHALL provide metrics on model performance.
### Requirement 6: Dashboard Implementation
**User Story:** As a trader, I want the system to implement a comprehensive dashboard that displays real-time data, model predictions, and trading actions, so that I can monitor the system's performance and make informed decisions.
#### Acceptance Criteria
1. WHEN the dashboard is initialized THEN it SHALL display real-time market data for all symbols and timeframes.
2. WHEN displaying market data THEN the dashboard SHALL show OHLCV charts for all timeframes.
3. WHEN displaying model predictions THEN the dashboard SHALL show CNN pivot point predictions and confidence levels.
4. WHEN displaying trading actions THEN the dashboard SHALL show RL and orchestrator trading actions and confidence levels.
5. WHEN displaying system status THEN the dashboard SHALL show training progress and model performance metrics.
6. WHEN implementing controls THEN the dashboard SHALL provide start/stop toggles for all system processes.
7. WHEN implementing controls THEN the dashboard SHALL provide sliders to adjust buy/sell thresholds for the orchestrator.
8. WHEN implementing the dashboard THEN the system SHALL ensure all processes run on the server without requiring the dashboard to be open.
### Requirement 7: Risk Management
**User Story:** As a trader, I want the system to implement risk management features, so that I can protect my capital from significant losses.
#### Acceptance Criteria
1. WHEN implementing risk management THEN the system SHALL provide configurable stop-loss functionality.
2. WHEN a stop-loss is triggered THEN the system SHALL automatically close the position.
3. WHEN implementing risk management THEN the system SHALL provide configurable position sizing based on risk parameters.
4. WHEN implementing risk management THEN the system SHALL provide configurable maximum drawdown limits.
5. WHEN maximum drawdown limits are reached THEN the system SHALL automatically stop trading.
6. WHEN implementing risk management THEN the system SHALL provide real-time risk metrics through the dashboard.
7. WHEN implementing risk management THEN the system SHALL allow for different risk parameters for different market conditions.
8. WHEN implementing risk management THEN the system SHALL provide alerts for high-risk situations.
### Requirement 8: System Architecture and Integration
**User Story:** As a developer, I want the system to implement a clean and modular architecture, so that the system is easy to maintain and extend.
#### Acceptance Criteria
1. WHEN implementing the system architecture THEN the system SHALL use a unified data provider to prepare data for all models.
2. WHEN implementing the system architecture THEN the system SHALL use a modular approach to allow for easy extension.
3. WHEN implementing the system architecture THEN the system SHALL use a clean separation of concerns between data collection, model training, and trading execution.
4. WHEN implementing the system architecture THEN the system SHALL use a unified interface for all models.
5. WHEN implementing the system architecture THEN the system SHALL use a unified interface for all data providers.
6. WHEN implementing the system architecture THEN the system SHALL use a unified interface for all trading executors.
7. WHEN implementing the system architecture THEN the system SHALL use a unified interface for all risk management components.
8. WHEN implementing the system architecture THEN the system SHALL use a unified interface for all dashboard components.
### Requirement 9: Model Inference Data Validation and Storage
**User Story:** As a trading system developer, I want to ensure that all model predictions include complete input data validation and persistent storage, so that I can verify models receive correct inputs and track their performance over time.
#### Acceptance Criteria
1. WHEN a model makes a prediction THEN the system SHALL validate that the input data contains complete OHLCV dataframes for all required timeframes
2. WHEN input data is incomplete THEN the system SHALL log the missing components and SHALL NOT proceed with prediction
3. WHEN input validation passes THEN the system SHALL store the complete input data package with the prediction in persistent storage
4. IF input data dimensions are incorrect THEN the system SHALL raise a validation error with specific details about expected vs actual dimensions
5. WHEN a model completes inference THEN the system SHALL store the complete input data, model outputs, confidence scores, and metadata in a persistent inference history
6. WHEN storing inference data THEN the system SHALL include timestamp, symbol, input features, prediction outputs, and model internal states
7. IF inference history storage fails THEN the system SHALL log the error and continue operation without breaking the prediction flow
### Requirement 10: Inference-Training Feedback Loop
**User Story:** As a machine learning engineer, I want the system to automatically train models using their previous inference data compared to actual market outcomes, so that models continuously improve their accuracy through real-world feedback.
#### Acceptance Criteria
1. WHEN sufficient time has passed after a prediction THEN the system SHALL evaluate the prediction accuracy against actual price movements
2. WHEN a prediction outcome is determined THEN the system SHALL create a training example using the stored inference data and actual outcome
3. WHEN training examples are created THEN the system SHALL feed them back to the respective models for learning
4. IF the prediction was accurate THEN the system SHALL reinforce the model's decision pathway through positive training signals
5. IF the prediction was inaccurate THEN the system SHALL provide corrective training signals to help the model learn from mistakes
6. WHEN the system needs training data THEN it SHALL retrieve the last inference data for each model to compare predictions against actual market outcomes
7. WHEN models are trained on inference feedback THEN the system SHALL track and report accuracy improvements or degradations over time
### Requirement 11: Inference History Management and Monitoring
**User Story:** As a system administrator, I want comprehensive logging and monitoring of the inference-training feedback loop with configurable retention policies, so that I can track model learning progress and manage storage efficiently.
#### Acceptance Criteria
1. WHEN inference data is stored THEN the system SHALL log the storage operation with data completeness metrics and validation results
2. WHEN training occurs based on previous inference THEN the system SHALL log the training outcome and model performance changes
3. WHEN the system detects data flow issues THEN it SHALL alert administrators with specific error details and suggested remediation
4. WHEN inference history reaches configured limits THEN the system SHALL archive or remove oldest entries based on retention policy
5. WHEN storing inference data THEN the system SHALL compress data to minimize storage footprint while maintaining accessibility
6. WHEN retrieving historical inference data THEN the system SHALL provide efficient query mechanisms by symbol, timeframe, and date range
7. IF storage space is critically low THEN the system SHALL prioritize keeping the most recent and most valuable training examples

View File

@ -0,0 +1,382 @@
# Implementation Plan
## Enhanced Data Provider and COB Integration
- [ ] 1. Enhance the existing DataProvider class with standardized model inputs
- Extend the current implementation in core/data_provider.py
- Implement standardized COB+OHLCV data frame for all models
- Create unified input format: 300 frames OHLCV (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC
- Integrate with existing multi_exchange_cob_provider.py for COB data
- _Requirements: 1.1, 1.2, 1.3, 1.6_
- [ ] 1.1. Implement standardized COB+OHLCV data frame for all models
- Create BaseDataInput class with standardized format for all models
- Implement OHLCV: 300 frames of (1s, 1m, 1h, 1d) ETH + 300s of 1s BTC
- Add COB: ±20 buckets of COB amounts in USD for each 1s OHLCV
- Include 1s, 5s, 15s, and 60s MA of COB imbalance counting ±5 COB buckets
- Ensure all models receive identical input format for consistency
- _Requirements: 1.2, 1.3, 8.1_
- [ ] 1.2. Implement extensible model output storage
- Create standardized ModelOutput data structure
- Support CNN, RL, LSTM, Transformer, and future model types
- Include model-specific predictions and cross-model hidden states
- Add metadata support for extensible model information
- _Requirements: 1.10, 8.2_
- [ ] 1.3. Enhance Williams Market Structure pivot point calculation
- Extend existing williams_market_structure.py implementation
- Improve recursive pivot point calculation accuracy
- Add unit tests to verify pivot point detection
- Integrate with COB data for enhanced pivot detection
- _Requirements: 1.5, 2.7_
- [-] 1.4. Optimize real-time data streaming with COB integration
- Enhance existing WebSocket connections in enhanced_cob_websocket.py
- Implement 10Hz COB data streaming alongside OHLCV data
- Add data synchronization across different refresh rates
- Ensure thread-safe access to multi-rate data streams
- _Requirements: 1.6, 8.5_
- [ ] 1.5. Fix WebSocket COB data processing errors
- Fix 'NoneType' object has no attribute 'append' errors in COB data processing
- Ensure proper initialization of data structures in MultiExchangeCOBProvider
- Add validation and defensive checks before accessing data structures
- Implement proper error handling for WebSocket data processing
- _Requirements: 1.1, 1.6, 8.5_
- [ ] 1.6. Enhance error handling in COB data processing
- Add validation for incoming WebSocket data
- Implement reconnection logic with exponential backoff
- Add detailed logging for debugging COB data issues
- Ensure system continues operation with last valid data during failures
- _Requirements: 1.6, 8.5_
## Enhanced CNN Model Implementation
- [ ] 2. Enhance the existing CNN model with standardized inputs/outputs
- Extend the current implementation in NN/models/enhanced_cnn.py
- Accept standardized COB+OHLCV data frame: 300 frames (1s,1m,1h,1d) ETH + 300s 1s BTC
- Include COB ±20 buckets and MA (1s,5s,15s,60s) of COB imbalance ±5 buckets
- Output BUY/SELL trading action with confidence scores - _Requirements: 2.1, 2.2, 2.8, 1.10_
- [x] 2.1. Implement CNN inference with standardized input format
- Accept BaseDataInput with standardized COB+OHLCV format
- Process 300 frames of multi-timeframe data with COB buckets
- Output BUY/SELL recommendations with confidence scores
- Make hidden layer states available for cross-model feeding
- Optimize inference performance for real-time processing
- _Requirements: 2.2, 2.6, 2.8, 4.3_
- [x] 2.2. Enhance CNN training pipeline with checkpoint management
- Integrate with checkpoint manager for training progress persistence
- Store top 5-10 best checkpoints based on performance metrics
- Automatically load best checkpoint at startup
- Implement training triggers based on orchestrator feedback
- Store metadata with checkpoints for performance tracking
- _Requirements: 2.4, 2.5, 5.2, 5.3, 5.7_
- [ ] 2.3. Implement CNN model evaluation and checkpoint optimization
- Create evaluation methods using standardized input/output format
- Implement performance metrics for checkpoint ranking
- Add validation against historical trading outcomes
- Support automatic checkpoint cleanup (keep only top performers)
- Track model improvement over time through checkpoint metadata
- _Requirements: 2.5, 5.8, 4.4_
## Enhanced RL Model Implementation
- [ ] 3. Enhance the existing RL model with standardized inputs/outputs
- Extend the current implementation in NN/models/dqn_agent.py
- Accept standardized COB+OHLCV data frame: 300 frames (1s,1m,1h,1d) ETH + 300s 1s BTC
- Include COB ±20 buckets and MA (1s,5s,15s,60s) of COB imbalance ±5 buckets
- Output BUY/SELL trading action with confidence scores
- _Requirements: 3.1, 3.2, 3.7, 1.10_
- [ ] 3.1. Implement RL inference with standardized input format
- Accept BaseDataInput with standardized COB+OHLCV format
- Process CNN hidden states and predictions as part of state input
- Output BUY/SELL recommendations with confidence scores
- Include expected rewards and value estimates in output
- Optimize inference performance for real-time processing
- _Requirements: 3.2, 3.7, 4.3_
- [ ] 3.2. Enhance RL training pipeline with checkpoint management
- Integrate with checkpoint manager for training progress persistence
- Store top 5-10 best checkpoints based on trading performance metrics
- Automatically load best checkpoint at startup
- Implement experience replay with profitability-based prioritization
- Store metadata with checkpoints for performance tracking
- _Requirements: 3.3, 3.5, 5.4, 5.7, 4.4_
- [ ] 3.3. Implement RL model evaluation and checkpoint optimization
- Create evaluation methods using standardized input/output format
- Implement trading performance metrics for checkpoint ranking
- Add validation against historical trading opportunities
- Support automatic checkpoint cleanup (keep only top performers)
- Track model improvement over time through checkpoint metadata
- _Requirements: 3.3, 5.8, 4.4_
## Enhanced Orchestrator Implementation
- [ ] 4. Enhance the existing orchestrator with centralized coordination
- Extend the current implementation in core/orchestrator.py
- Implement DataSubscriptionManager for multi-rate data streams
- Add ModelInferenceCoordinator for cross-model coordination
- Create ModelOutputStore for extensible model output management
- Add TrainingPipelineManager for continuous learning coordination
- _Requirements: 4.1, 4.2, 4.5, 8.1_
- [ ] 4.1. Implement data subscription and management system
- Create DataSubscriptionManager class
- Subscribe to 10Hz COB data, OHLCV, market ticks, and technical indicators
- Implement intelligent caching for "last updated" data serving
- Maintain synchronized base dataframe across different refresh rates
- Add thread-safe access to multi-rate data streams
- _Requirements: 4.1, 1.6, 8.5_
- [ ] 4.2. Implement model inference coordination
- Create ModelInferenceCoordinator class
- Trigger model inference based on data availability and requirements
- Coordinate parallel inference execution for independent models
- Handle model dependencies (e.g., RL waiting for CNN hidden states)
- Assemble appropriate input data for each model type
- _Requirements: 4.2, 3.1, 2.1_
- [ ] 4.3. Implement model output storage and cross-feeding
- Create ModelOutputStore class using standardized ModelOutput format
- Store CNN predictions, confidence scores, and hidden layer states
- Store RL action recommendations and value estimates
- Support extensible storage for LSTM, Transformer, and future models
- Implement cross-model feeding of hidden states and predictions
- Include "last predictions" from all models in base data input
- _Requirements: 4.3, 1.10, 8.2_
- [ ] 4.4. Implement training pipeline management
- Create TrainingPipelineManager class
- Call each model's training pipeline with prediction-result pairs
- Manage training data collection and labeling
- Coordinate online learning updates based on real-time performance
- Track prediction accuracy and trigger retraining when needed
- _Requirements: 4.4, 5.2, 5.4, 5.7_
- [ ] 4.5. Implement enhanced decision-making with MoE
- Create enhanced DecisionMaker class
- Implement Mixture of Experts approach for model integration
- Apply confidence-based filtering to avoid uncertain trades
- Support configurable thresholds for buy/sell decisions
- Consider market conditions and risk parameters in decisions
- _Requirements: 4.5, 4.8, 6.7_
- [ ] 4.6. Implement extensible model integration architecture
- Create MoEGateway class supporting dynamic model addition
- Support CNN, RL, LSTM, Transformer model types without architecture changes
- Implement model versioning and rollback capabilities
- Handle model failures and fallback mechanisms
- Provide model performance monitoring and alerting
- _Requirements: 4.6, 8.2, 8.3_
## Model Inference Data Validation and Storage
- [x] 5. Implement comprehensive inference data validation system
- Create InferenceDataValidator class for input validation
- Validate complete OHLCV dataframes for all required timeframes
- Check input data dimensions against model requirements
- Log missing components and prevent prediction on incomplete data
- _Requirements: 9.1, 9.2, 9.3, 9.4_
- [ ] 5.1. Implement input data validation for all models
- Create validation methods for CNN, RL, and future model inputs
- Validate OHLCV data completeness (300 frames for 1s, 1m, 1h, 1d)
- Validate COB data structure (±20 buckets, MA calculations)
- Raise specific validation errors with expected vs actual dimensions
- Ensure validation occurs before any model inference
- _Requirements: 9.1, 9.4_
- [x] 5.2. Implement persistent inference history storage
- Create InferenceHistoryStore class for persistent storage
- Store complete input data packages with each prediction
- Include timestamp, symbol, input features, prediction outputs, confidence scores
- Store model internal states for cross-model feeding
- Implement compressed storage to minimize footprint
- _Requirements: 9.5, 9.6_
- [x] 5.3. Implement inference history query and retrieval system
- Create efficient query mechanisms by symbol, timeframe, and date range
- Implement data retrieval for training pipeline consumption
- Add data completeness metrics and validation results in storage
- Handle storage failures gracefully without breaking prediction flow
- _Requirements: 9.7, 11.6_
## Inference-Training Feedback Loop Implementation
- [ ] 6. Implement prediction outcome evaluation system
- Create PredictionOutcomeEvaluator class
- Evaluate prediction accuracy against actual price movements
- Create training examples using stored inference data and actual outcomes
- Feed prediction-result pairs back to respective models
- _Requirements: 10.1, 10.2, 10.3_
- [ ] 6.1. Implement adaptive learning signal generation
- Create positive reinforcement signals for accurate predictions
- Generate corrective training signals for inaccurate predictions
- Retrieve last inference data for each model for outcome comparison
- Implement model-specific learning signal formats
- _Requirements: 10.4, 10.5, 10.6_
- [ ] 6.2. Implement continuous improvement tracking
- Track and report accuracy improvements/degradations over time
- Monitor model learning progress through feedback loop
- Create performance metrics for inference-training effectiveness
- Generate alerts for learning regression or stagnation
- _Requirements: 10.7_
## Inference History Management and Monitoring
- [ ] 7. Implement comprehensive inference logging and monitoring
- Create InferenceMonitor class for logging and alerting
- Log inference data storage operations with completeness metrics
- Log training outcomes and model performance changes
- Alert administrators on data flow issues with specific error details
- _Requirements: 11.1, 11.2, 11.3_
- [ ] 7.1. Implement configurable retention policies
- Create RetentionPolicyManager class
- Archive or remove oldest entries when limits are reached
- Prioritize keeping most recent and valuable training examples
- Implement storage space monitoring and alerts
- _Requirements: 11.4, 11.7_
- [ ] 7.2. Implement efficient historical data management
- Compress inference data to minimize storage footprint
- Maintain accessibility for training and analysis
- Implement efficient query mechanisms for historical analysis
- Add data archival and restoration capabilities
- _Requirements: 11.5, 11.6_
## Trading Executor Implementation
- [ ] 5. Design and implement the trading executor
- Create a TradingExecutor class that accepts trading actions from the orchestrator
- Implement order execution through brokerage APIs
- Add order lifecycle management
- _Requirements: 7.1, 7.2, 8.6_
- [ ] 5.1. Implement brokerage API integrations
- Create a BrokerageAPI interface
- Implement concrete classes for MEXC and Binance
- Add error handling and retry mechanisms
- _Requirements: 7.1, 7.2, 8.6_
- [ ] 5.2. Implement order management
- Create an OrderManager class
- Implement methods for creating, updating, and canceling orders
- Add order tracking and status updates
- _Requirements: 7.1, 7.2, 8.6_
- [ ] 5.3. Implement error handling
- Add comprehensive error handling for API failures
- Implement circuit breakers for extreme market conditions
- Add logging and notification mechanisms
- _Requirements: 7.1, 7.2, 8.6_
## Risk Manager Implementation
- [ ] 6. Design and implement the risk manager
- Create a RiskManager class
- Implement risk parameter management
- Add risk metric calculation
- _Requirements: 7.1, 7.3, 7.4_
- [ ] 6.1. Implement stop-loss functionality
- Create a StopLossManager class
- Implement methods for creating and managing stop-loss orders
- Add mechanisms to automatically close positions when stop-loss is triggered
- _Requirements: 7.1, 7.2_
- [ ] 6.2. Implement position sizing
- Create a PositionSizer class
- Implement methods for calculating position sizes based on risk parameters
- Add validation to ensure position sizes are within limits
- _Requirements: 7.3, 7.7_
- [ ] 6.3. Implement risk metrics
- Add methods to calculate risk metrics (drawdown, VaR, etc.)
- Implement real-time risk monitoring
- Add alerts for high-risk situations
- _Requirements: 7.4, 7.5, 7.6, 7.8_
## Dashboard Implementation
- [ ] 7. Design and implement the dashboard UI
- Create a Dashboard class
- Implement the web-based UI using Flask/Dash
- Add real-time updates using WebSockets
- _Requirements: 6.1, 6.8_
- [ ] 7.1. Implement chart management
- Create a ChartManager class
- Implement methods for creating and updating charts
- Add interactive features (zoom, pan, etc.)
- _Requirements: 6.1, 6.2_
- [ ] 7.2. Implement control panel
- Create a ControlPanel class
- Implement start/stop toggles for system processes
- Add sliders for adjusting buy/sell thresholds
- _Requirements: 6.6, 6.7_
- [ ] 7.3. Implement system status display
- Add methods to display training progress
- Implement model performance metrics visualization
- Add real-time system status updates
- _Requirements: 6.5, 5.6_
- [ ] 7.4. Implement server-side processing
- Ensure all processes run on the server without requiring the dashboard to be open
- Implement background tasks for model training and inference
- Add mechanisms to persist system state
- _Requirements: 6.8, 5.5_
## Integration and Testing
- [ ] 8. Integrate all components
- Connect the data provider to the CNN and RL models
- Connect the CNN and RL models to the orchestrator
- Connect the orchestrator to the trading executor
- _Requirements: 8.1, 8.2, 8.3_
- [ ] 8.1. Implement comprehensive unit tests
- Create unit tests for each component
- Implement test fixtures and mocks
- Add test coverage reporting
- _Requirements: 8.1, 8.2, 8.3_
- [ ] 8.2. Implement integration tests
- Create tests for component interactions
- Implement end-to-end tests
- Add performance benchmarks
- _Requirements: 8.1, 8.2, 8.3_
- [ ] 8.3. Implement backtesting framework
- Create a backtesting environment
- Implement methods to replay historical data
- Add performance metrics calculation
- _Requirements: 5.8, 8.1_
- [ ] 8.4. Optimize performance
- Profile the system to identify bottlenecks
- Implement optimizations for critical paths
- Add caching and parallelization where appropriate
- _Requirements: 8.1, 8.2, 8.3_

View File

@ -0,0 +1,350 @@
# Design Document
## Overview
The UI Stability Fix implements a comprehensive solution to resolve critical stability issues between the dashboard UI and training processes. The design focuses on complete process isolation, proper async/await handling, resource conflict resolution, and robust error handling. The solution ensures that the dashboard can operate independently without affecting training system stability.
## Architecture
### High-Level Architecture
```mermaid
graph TB
subgraph "Training Process"
TP[Training Process]
TM[Training Models]
TD[Training Data]
TL[Training Logs]
end
subgraph "Dashboard Process"
DP[Dashboard Process]
DU[Dashboard UI]
DC[Dashboard Cache]
DL[Dashboard Logs]
end
subgraph "Shared Resources"
SF[Shared Files]
SC[Shared Config]
SM[Shared Models]
SD[Shared Data]
end
TP --> SF
DP --> SF
TP --> SC
DP --> SC
TP --> SM
DP --> SM
TP --> SD
DP --> SD
TP -.->|No Direct Connection| DP
```
### Process Isolation Design
The system will implement complete process isolation using:
1. **Separate Python Processes**: Dashboard and training run as independent processes
2. **Inter-Process Communication**: File-based communication for status and data sharing
3. **Resource Partitioning**: Separate resource allocation for each process
4. **Independent Lifecycle Management**: Each process can start, stop, and restart independently
### Async/Await Error Resolution
The design addresses async issues through:
1. **Proper Event Loop Management**: Single event loop per process with proper lifecycle
2. **Async Context Isolation**: Separate async contexts for different components
3. **Coroutine Handling**: Proper awaiting of all async operations
4. **Exception Propagation**: Proper async exception handling and propagation
## Components and Interfaces
### 1. Process Manager
**Purpose**: Manages the lifecycle of both dashboard and training processes
**Interface**:
```python
class ProcessManager:
def start_training_process(self) -> bool
def start_dashboard_process(self, port: int = 8050) -> bool
def stop_training_process(self) -> bool
def stop_dashboard_process(self) -> bool
def get_process_status(self) -> Dict[str, str]
def restart_process(self, process_name: str) -> bool
```
**Implementation Details**:
- Uses subprocess.Popen for process creation
- Monitors process health with periodic checks
- Handles process output logging and error capture
- Implements graceful shutdown with timeout handling
### 2. Isolated Dashboard
**Purpose**: Provides a completely isolated dashboard that doesn't interfere with training
**Interface**:
```python
class IsolatedDashboard:
def __init__(self, config: Dict[str, Any])
def start_server(self, host: str, port: int) -> None
def stop_server(self) -> None
def update_data_from_files(self) -> None
def get_training_status(self) -> Dict[str, Any]
```
**Implementation Details**:
- Runs in separate process with own event loop
- Reads data from shared files instead of direct memory access
- Uses file-based communication for training status
- Implements proper async/await patterns for all operations
### 3. Isolated Training Process
**Purpose**: Runs training completely isolated from UI components
**Interface**:
```python
class IsolatedTrainingProcess:
def __init__(self, config: Dict[str, Any])
def start_training(self) -> None
def stop_training(self) -> None
def get_training_metrics(self) -> Dict[str, Any]
def save_status_to_file(self) -> None
```
**Implementation Details**:
- No UI dependencies or imports
- Writes status and metrics to shared files
- Implements proper resource cleanup
- Uses separate logging configuration
### 4. Shared Data Manager
**Purpose**: Manages data sharing between processes through files
**Interface**:
```python
class SharedDataManager:
def write_training_status(self, status: Dict[str, Any]) -> None
def read_training_status(self) -> Dict[str, Any]
def write_market_data(self, data: Dict[str, Any]) -> None
def read_market_data(self) -> Dict[str, Any]
def write_model_metrics(self, metrics: Dict[str, Any]) -> None
def read_model_metrics(self) -> Dict[str, Any]
```
**Implementation Details**:
- Uses JSON files for structured data
- Implements file locking to prevent corruption
- Provides atomic write operations
- Includes data validation and error handling
### 5. Resource Manager
**Purpose**: Manages resource allocation and prevents conflicts
**Interface**:
```python
class ResourceManager:
def allocate_gpu_resources(self, process_name: str) -> bool
def release_gpu_resources(self, process_name: str) -> None
def check_memory_usage(self) -> Dict[str, float]
def enforce_resource_limits(self) -> None
```
**Implementation Details**:
- Monitors GPU memory usage per process
- Implements resource quotas and limits
- Provides resource conflict detection
- Includes automatic resource cleanup
### 6. Async Handler
**Purpose**: Properly handles all async operations in the dashboard
**Interface**:
```python
class AsyncHandler:
def __init__(self, loop: asyncio.AbstractEventLoop)
async def handle_orchestrator_connection(self) -> None
async def handle_cob_integration(self) -> None
async def handle_trading_decisions(self, decision: Dict) -> None
def run_async_safely(self, coro: Coroutine) -> Any
```
**Implementation Details**:
- Manages single event loop per process
- Provides proper exception handling for async operations
- Implements timeout handling for long-running operations
- Includes async context management
## Data Models
### Process Status Model
```python
@dataclass
class ProcessStatus:
name: str
pid: int
status: str # 'running', 'stopped', 'error'
start_time: datetime
last_heartbeat: datetime
memory_usage: float
cpu_usage: float
error_message: Optional[str] = None
```
### Training Status Model
```python
@dataclass
class TrainingStatus:
is_running: bool
current_epoch: int
total_epochs: int
loss: float
accuracy: float
last_update: datetime
model_path: str
error_message: Optional[str] = None
```
### Dashboard State Model
```python
@dataclass
class DashboardState:
is_connected: bool
last_data_update: datetime
active_connections: int
error_count: int
performance_metrics: Dict[str, float]
```
## Error Handling
### Exception Hierarchy
```python
class UIStabilityError(Exception):
"""Base exception for UI stability issues"""
pass
class ProcessCommunicationError(UIStabilityError):
"""Error in inter-process communication"""
pass
class AsyncOperationError(UIStabilityError):
"""Error in async operation handling"""
pass
class ResourceConflictError(UIStabilityError):
"""Error due to resource conflicts"""
pass
```
### Error Recovery Strategies
1. **Automatic Retry**: For transient network and file I/O errors
2. **Graceful Degradation**: Fallback to basic functionality when components fail
3. **Process Restart**: Automatic restart of failed processes
4. **Circuit Breaker**: Temporary disable of failing components
5. **Rollback**: Revert to last known good state
### Error Monitoring
- Centralized error logging with structured format
- Real-time error rate monitoring
- Automatic alerting for critical errors
- Error trend analysis and reporting
## Testing Strategy
### Unit Tests
- Test each component in isolation
- Mock external dependencies
- Verify error handling paths
- Test async operation handling
### Integration Tests
- Test inter-process communication
- Verify resource sharing mechanisms
- Test process lifecycle management
- Validate error recovery scenarios
### System Tests
- End-to-end stability testing
- Load testing with concurrent processes
- Failure injection testing
- Performance regression testing
### Monitoring Tests
- Health check endpoint testing
- Metrics collection validation
- Alert system testing
- Dashboard functionality testing
## Performance Considerations
### Resource Optimization
- Minimize memory footprint of each process
- Optimize file I/O operations for data sharing
- Implement efficient data serialization
- Use connection pooling for external services
### Scalability
- Support multiple dashboard instances
- Handle increased data volume gracefully
- Implement efficient caching strategies
- Optimize for high-frequency updates
### Monitoring
- Real-time performance metrics collection
- Resource usage tracking per process
- Response time monitoring
- Throughput measurement
## Security Considerations
### Process Isolation
- Separate user contexts for processes
- Limited file system access permissions
- Network access restrictions
- Resource usage limits
### Data Protection
- Secure file sharing mechanisms
- Data validation and sanitization
- Access control for shared resources
- Audit logging for sensitive operations
### Communication Security
- Encrypted inter-process communication
- Authentication for API endpoints
- Input validation for all interfaces
- Rate limiting for external requests
## Deployment Strategy
### Development Environment
- Local process management scripts
- Development-specific configuration
- Enhanced logging and debugging
- Hot-reload capabilities
### Production Environment
- Systemd service management
- Production configuration templates
- Log rotation and archiving
- Monitoring and alerting setup
### Migration Plan
1. Deploy new process management components
2. Update configuration files
3. Test process isolation functionality
4. Gradually migrate existing deployments
5. Monitor stability improvements
6. Remove legacy components

View File

@ -0,0 +1,111 @@
# Requirements Document
## Introduction
The UI Stability Fix addresses critical issues where loading the dashboard UI crashes the training process and causes unhandled exceptions. The system currently suffers from async/await handling problems, threading conflicts, resource contention, and improper separation of concerns between the UI and training processes. This fix will ensure the dashboard can run independently without affecting the training system's stability.
## Requirements
### Requirement 1: Async/Await Error Resolution
**User Story:** As a developer, I want the dashboard to properly handle async operations, so that unhandled exceptions don't crash the entire system.
#### Acceptance Criteria
1. WHEN the dashboard initializes THEN it SHALL properly handle all async operations without throwing "An asyncio.Future, a coroutine or an awaitable is required" errors.
2. WHEN connecting to the orchestrator THEN the system SHALL use proper async/await patterns for all coroutine calls.
3. WHEN starting COB integration THEN the system SHALL properly manage event loops without conflicts.
4. WHEN handling trading decisions THEN async callbacks SHALL be properly awaited and handled.
5. WHEN the dashboard starts THEN it SHALL not create multiple conflicting event loops.
6. WHEN async operations fail THEN the system SHALL handle exceptions gracefully without crashing.
### Requirement 2: Process Isolation
**User Story:** As a user, I want the dashboard and training processes to run independently, so that UI issues don't affect training stability.
#### Acceptance Criteria
1. WHEN the dashboard starts THEN it SHALL run in a completely separate process from the training system.
2. WHEN the dashboard crashes THEN the training process SHALL continue running unaffected.
3. WHEN the training process encounters issues THEN the dashboard SHALL remain functional.
4. WHEN both processes are running THEN they SHALL communicate only through well-defined interfaces (files, APIs, or message queues).
5. WHEN either process restarts THEN the other process SHALL continue operating normally.
6. WHEN resources are accessed THEN there SHALL be no direct shared memory or threading conflicts between processes.
### Requirement 3: Resource Contention Resolution
**User Story:** As a system administrator, I want to eliminate resource conflicts between UI and training, so that both can operate efficiently without interference.
#### Acceptance Criteria
1. WHEN both dashboard and training are running THEN they SHALL not compete for the same GPU resources.
2. WHEN accessing data files THEN proper file locking SHALL prevent corruption or access conflicts.
3. WHEN using network resources THEN rate limiting SHALL prevent API conflicts between processes.
4. WHEN accessing model files THEN proper synchronization SHALL prevent read/write conflicts.
5. WHEN logging THEN separate log files SHALL be used to prevent write conflicts.
6. WHEN using temporary files THEN separate directories SHALL be used for each process.
### Requirement 4: Threading Safety
**User Story:** As a developer, I want all threading operations to be safe and properly managed, so that race conditions and deadlocks don't occur.
#### Acceptance Criteria
1. WHEN the dashboard uses threads THEN all shared data SHALL be properly synchronized.
2. WHEN background updates run THEN they SHALL not interfere with main UI thread operations.
3. WHEN stopping threads THEN proper cleanup SHALL occur without hanging or deadlocks.
4. WHEN accessing shared resources THEN proper locking mechanisms SHALL be used.
5. WHEN threads encounter exceptions THEN they SHALL be handled without crashing the main process.
6. WHEN the dashboard shuts down THEN all threads SHALL be properly terminated.
### Requirement 5: Error Handling and Recovery
**User Story:** As a user, I want the system to handle errors gracefully and recover automatically, so that temporary issues don't cause permanent failures.
#### Acceptance Criteria
1. WHEN unhandled exceptions occur THEN they SHALL be caught and logged without crashing the process.
2. WHEN network connections fail THEN the system SHALL retry with exponential backoff.
3. WHEN data sources are unavailable THEN fallback mechanisms SHALL provide basic functionality.
4. WHEN memory issues occur THEN the system SHALL free resources and continue operating.
5. WHEN critical errors happen THEN the system SHALL attempt automatic recovery.
6. WHEN recovery fails THEN the system SHALL provide clear error messages and graceful degradation.
### Requirement 6: Monitoring and Diagnostics
**User Story:** As a developer, I want comprehensive monitoring and diagnostics, so that I can quickly identify and resolve stability issues.
#### Acceptance Criteria
1. WHEN the system runs THEN it SHALL provide real-time health monitoring for all components.
2. WHEN errors occur THEN detailed diagnostic information SHALL be logged with timestamps and context.
3. WHEN performance issues arise THEN resource usage metrics SHALL be available.
4. WHEN processes communicate THEN message flow SHALL be traceable for debugging.
5. WHEN the system starts THEN startup diagnostics SHALL verify all components are working correctly.
6. WHEN stability issues occur THEN automated alerts SHALL notify administrators.
### Requirement 7: Configuration and Control
**User Story:** As a system administrator, I want flexible configuration options, so that I can optimize system behavior for different environments.
#### Acceptance Criteria
1. WHEN configuring the system THEN separate configuration files SHALL be used for dashboard and training processes.
2. WHEN adjusting resource limits THEN configuration SHALL allow tuning memory, CPU, and GPU usage.
3. WHEN setting update intervals THEN dashboard refresh rates SHALL be configurable.
4. WHEN enabling features THEN individual components SHALL be independently controllable.
5. WHEN debugging THEN log levels SHALL be adjustable without restarting processes.
6. WHEN deploying THEN environment-specific configurations SHALL be supported.
### Requirement 8: Backward Compatibility
**User Story:** As a user, I want the stability fixes to maintain existing functionality, so that current workflows continue to work.
#### Acceptance Criteria
1. WHEN the fixes are applied THEN all existing dashboard features SHALL continue to work.
2. WHEN training processes run THEN they SHALL maintain the same interfaces and outputs.
3. WHEN data is accessed THEN existing data formats SHALL remain compatible.
4. WHEN APIs are used THEN existing endpoints SHALL continue to function.
5. WHEN configurations are loaded THEN existing config files SHALL remain valid.
6. WHEN the system upgrades THEN migration paths SHALL preserve user settings and data.

View File

@ -0,0 +1,79 @@
# Implementation Plan
- [x] 1. Create Shared Data Manager for inter-process communication
- Implement JSON-based file sharing with atomic writes and file locking
- Create data models for training status, dashboard state, and process status
- Add validation and error handling for all data operations
- _Requirements: 2.4, 3.4, 5.2_
- [ ] 2. Implement Async Handler for proper async/await management
- Create centralized async operation handler with single event loop management
- Fix all async/await patterns in dashboard code
- Add proper exception handling for async operations with timeout support
- _Requirements: 1.1, 1.2, 1.3, 1.6_
- [ ] 3. Create Isolated Training Process
- Extract training logic into standalone process without UI dependencies
- Implement file-based status reporting and metrics sharing
- Add proper resource cleanup and error handling
- _Requirements: 2.1, 2.2, 3.1, 4.5_
- [ ] 4. Create Isolated Dashboard Process
- Refactor dashboard to run independently with file-based data access
- Remove direct memory sharing and threading conflicts with training
- Implement proper process lifecycle management
- _Requirements: 2.1, 2.3, 4.1, 4.2_
- [ ] 5. Implement Process Manager
- Create process lifecycle management with subprocess handling
- Add process monitoring, health checks, and automatic restart capabilities
- Implement graceful shutdown with proper cleanup
- _Requirements: 2.5, 5.5, 6.1, 6.6_
- [ ] 6. Create Resource Manager
- Implement GPU resource allocation and conflict prevention
- Add memory usage monitoring and resource limits enforcement
- Create separate logging and temporary file management
- _Requirements: 3.1, 3.2, 3.5, 3.6_
- [ ] 7. Fix Threading Safety Issues
- Audit and fix all shared data access with proper synchronization
- Implement proper thread cleanup and exception handling
- Remove race conditions and deadlock potential
- _Requirements: 4.1, 4.2, 4.3, 4.6_
- [ ] 8. Implement Error Handling and Recovery
- Add comprehensive exception handling with proper logging
- Create automatic retry mechanisms with exponential backoff
- Implement fallback mechanisms and graceful degradation
- _Requirements: 5.1, 5.2, 5.3, 5.6_
- [ ] 9. Create System Launcher and Configuration
- Build unified launcher script for both processes
- Create separate configuration files for dashboard and training
- Add environment-specific configuration support
- _Requirements: 7.1, 7.2, 7.4, 7.6_
- [ ] 10. Add Monitoring and Diagnostics
- Implement real-time health monitoring for all components
- Create detailed diagnostic logging with structured format
- Add performance metrics collection and resource usage tracking
- _Requirements: 6.1, 6.2, 6.3, 6.5_
- [ ] 11. Create Integration Tests
- Write tests for inter-process communication and data sharing
- Test process lifecycle management and error recovery
- Validate resource conflict resolution and stability improvements
- _Requirements: 5.4, 5.5, 6.4, 8.1_
- [ ] 12. Update Documentation and Migration Guide
- Document new architecture and deployment procedures
- Create migration guide from existing system
- Add troubleshooting guide for common stability issues
- _Requirements: 8.2, 8.5, 8.6_

View File

@ -0,0 +1,293 @@
# WebSocket COB Data Fix Design Document
## Overview
This design document outlines the approach to fix the WebSocket COB (Change of Basis) data processing issue in the trading system. The current implementation is failing with `'NoneType' object has no attribute 'append'` errors for both BTC/USDT and ETH/USDT pairs, which indicates that a data structure expected to be a list is actually None. This issue is preventing the dashboard from functioning properly and needs to be addressed to ensure reliable real-time market data processing.
## Architecture
The COB data processing pipeline involves several components:
1. **MultiExchangeCOBProvider**: Collects order book data from exchanges via WebSockets
2. **StandardizedDataProvider**: Extends DataProvider with standardized BaseDataInput functionality
3. **Dashboard Components**: Display COB data in the UI
The error occurs during WebSocket data processing, specifically when trying to append data to a collection that hasn't been properly initialized. The fix will focus on ensuring proper initialization of data structures and implementing robust error handling.
## Components and Interfaces
### 1. MultiExchangeCOBProvider
The `MultiExchangeCOBProvider` class is responsible for collecting order book data from exchanges and distributing it to subscribers. The issue appears to be in the WebSocket data processing logic, where data structures may not be properly initialized before use.
#### Key Issues to Address
1. **Data Structure Initialization**: Ensure all data structures (particularly collections that will have `append` called on them) are properly initialized during object creation.
2. **Subscriber Notification**: Fix the `_notify_cob_subscribers` method to handle edge cases and ensure data is properly formatted before notification.
3. **WebSocket Processing**: Enhance error handling in WebSocket processing methods to prevent cascading failures.
#### Implementation Details
```python
class MultiExchangeCOBProvider:
def __init__(self, symbols: List[str], exchange_configs: Dict[str, ExchangeConfig]):
# Existing initialization code...
# Ensure all data structures are properly initialized
self.cob_data_cache = {} # Cache for COB data
self.cob_subscribers = [] # List of callback functions
self.exchange_order_books = {}
self.session_trades = {}
self.svp_cache = {}
# Initialize data structures for each symbol
for symbol in symbols:
self.cob_data_cache[symbol] = {}
self.exchange_order_books[symbol] = {}
self.session_trades[symbol] = []
self.svp_cache[symbol] = {}
# Initialize exchange-specific data structures
for exchange_name in self.active_exchanges:
self.exchange_order_books[symbol][exchange_name] = {
'bids': {},
'asks': {},
'deep_bids': {},
'deep_asks': {},
'timestamp': datetime.now(),
'deep_timestamp': datetime.now(),
'connected': False,
'last_update_id': 0
}
logger.info(f"Multi-exchange COB provider initialized for symbols: {symbols}")
async def _notify_cob_subscribers(self, symbol: str, cob_snapshot: Dict):
"""Notify all subscribers of COB data updates with improved error handling"""
try:
if not cob_snapshot:
logger.warning(f"Attempted to notify subscribers with empty COB snapshot for {symbol}")
return
for callback in self.cob_subscribers:
try:
if asyncio.iscoroutinefunction(callback):
await callback(symbol, cob_snapshot)
else:
callback(symbol, cob_snapshot)
except Exception as e:
logger.error(f"Error in COB subscriber callback: {e}", exc_info=True)
except Exception as e:
logger.error(f"Error notifying COB subscribers: {e}", exc_info=True)
```
### 2. StandardizedDataProvider
The `StandardizedDataProvider` class extends the base `DataProvider` with standardized data input functionality. It needs to properly handle COB data and ensure all data structures are initialized.
#### Key Issues to Address
1. **COB Data Handling**: Ensure proper initialization and validation of COB data structures.
2. **Error Handling**: Improve error handling when processing COB data.
3. **Data Structure Consistency**: Maintain consistent data structures throughout the processing pipeline.
#### Implementation Details
```python
class StandardizedDataProvider(DataProvider):
def __init__(self, symbols: List[str] = None, timeframes: List[str] = None):
"""Initialize the standardized data provider with proper data structure initialization"""
super().__init__(symbols, timeframes)
# Standardized data storage
self.base_data_cache = {} # {symbol: BaseDataInput}
self.cob_data_cache = {} # {symbol: COBData}
# Model output management with extensible storage
self.model_output_manager = ModelOutputManager(
cache_dir=str(self.cache_dir / "model_outputs"),
max_history=1000
)
# COB moving averages calculation
self.cob_imbalance_history = {} # {symbol: deque of (timestamp, imbalance_data)}
self.ma_calculation_lock = Lock()
# Initialize caches for each symbol
for symbol in self.symbols:
self.base_data_cache[symbol] = None
self.cob_data_cache[symbol] = None
self.cob_imbalance_history[symbol] = deque(maxlen=300) # 5 minutes of 1s data
# COB provider integration
self.cob_provider = None
self._initialize_cob_provider()
logger.info("StandardizedDataProvider initialized with BaseDataInput support")
def _process_cob_data(self, symbol: str, cob_snapshot: Dict):
"""Process COB data with improved error handling"""
try:
if not cob_snapshot:
logger.warning(f"Received empty COB snapshot for {symbol}")
return
# Process COB data and update caches
# ...
except Exception as e:
logger.error(f"Error processing COB data for {symbol}: {e}", exc_info=True)
```
### 3. WebSocket COB Data Processing
The WebSocket COB data processing logic needs to be enhanced to handle edge cases and ensure proper data structure initialization.
#### Key Issues to Address
1. **WebSocket Connection Management**: Improve connection management to handle disconnections gracefully.
2. **Data Processing**: Ensure data is properly validated before processing.
3. **Error Recovery**: Implement recovery mechanisms for WebSocket failures.
#### Implementation Details
```python
async def _stream_binance_orderbook(self, symbol: str, config: ExchangeConfig):
"""Stream order book data from Binance with improved error handling"""
reconnect_delay = 1 # Start with 1 second delay
max_reconnect_delay = 60 # Maximum delay of 60 seconds
while self.is_streaming:
try:
ws_url = f"{config.websocket_url}{config.symbols_mapping[symbol].lower()}@depth20@100ms"
logger.info(f"Connecting to Binance WebSocket: {ws_url}")
if websockets is None or websockets_connect is None:
raise ImportError("websockets module not available")
async with websockets_connect(ws_url) as websocket:
# Ensure data structures are initialized
if symbol not in self.exchange_order_books:
self.exchange_order_books[symbol] = {}
if 'binance' not in self.exchange_order_books[symbol]:
self.exchange_order_books[symbol]['binance'] = {
'bids': {},
'asks': {},
'deep_bids': {},
'deep_asks': {},
'timestamp': datetime.now(),
'deep_timestamp': datetime.now(),
'connected': False,
'last_update_id': 0
}
self.exchange_order_books[symbol]['binance']['connected'] = True
logger.info(f"Connected to Binance order book stream for {symbol}")
# Reset reconnect delay on successful connection
reconnect_delay = 1
async for message in websocket:
if not self.is_streaming:
break
try:
data = json.loads(message)
await self._process_binance_orderbook(symbol, data)
except json.JSONDecodeError as e:
logger.error(f"Error parsing Binance message: {e}")
except Exception as e:
logger.error(f"Error processing Binance data: {e}", exc_info=True)
except Exception as e:
logger.error(f"Binance WebSocket error for {symbol}: {e}", exc_info=True)
# Mark as disconnected
if symbol in self.exchange_order_books and 'binance' in self.exchange_order_books[symbol]:
self.exchange_order_books[symbol]['binance']['connected'] = False
# Implement exponential backoff for reconnection
logger.info(f"Reconnecting to Binance WebSocket for {symbol} in {reconnect_delay}s")
await asyncio.sleep(reconnect_delay)
reconnect_delay = min(reconnect_delay * 2, max_reconnect_delay)
```
## Data Models
The data models remain unchanged, but we need to ensure they are properly initialized and validated throughout the system.
### COBSnapshot
```python
@dataclass
class COBSnapshot:
"""Complete Consolidated Order Book snapshot"""
symbol: str
timestamp: datetime
consolidated_bids: List[ConsolidatedOrderBookLevel]
consolidated_asks: List[ConsolidatedOrderBookLevel]
exchanges_active: List[str]
volume_weighted_mid: float
total_bid_liquidity: float
total_ask_liquidity: float
spread_bps: float
liquidity_imbalance: float
price_buckets: Dict[str, Dict[str, float]] # Fine-grain volume buckets
```
## Error Handling
### WebSocket Connection Errors
- Implement exponential backoff for reconnection attempts
- Log detailed error information
- Maintain system operation with last valid data
### Data Processing Errors
- Validate data before processing
- Handle edge cases gracefully
- Log detailed error information
- Continue operation with last valid data
### Subscriber Notification Errors
- Catch and log errors in subscriber callbacks
- Prevent errors in one subscriber from affecting others
- Ensure data is properly formatted before notification
## Testing Strategy
### Unit Testing
- Test data structure initialization
- Test error handling in WebSocket processing
- Test subscriber notification with various edge cases
### Integration Testing
- Test end-to-end COB data flow
- Test recovery from WebSocket disconnections
- Test handling of malformed data
### System Testing
- Test dashboard operation with COB data
- Test system stability under high load
- Test recovery from various failure scenarios
## Implementation Plan
1. Fix data structure initialization in `MultiExchangeCOBProvider`
2. Enhance error handling in WebSocket processing
3. Improve subscriber notification logic
4. Update `StandardizedDataProvider` to properly handle COB data
5. Add comprehensive logging for debugging
6. Implement recovery mechanisms for WebSocket failures
7. Test all changes thoroughly
## Conclusion
This design addresses the WebSocket COB data processing issue by ensuring proper initialization of data structures, implementing robust error handling, and adding recovery mechanisms for WebSocket failures. These changes will improve the reliability and stability of the trading system, allowing traders to monitor market data in real-time without interruptions.

View File

@ -0,0 +1,43 @@
# Requirements Document
## Introduction
The WebSocket COB Data Fix is needed to address a critical issue in the trading system where WebSocket COB (Change of Basis) data processing is failing with the error `'NoneType' object has no attribute 'append'`. This error is occurring for both BTC/USDT and ETH/USDT pairs and is preventing the dashboard from functioning properly. The fix will ensure proper initialization and handling of data structures in the COB data processing pipeline.
## Requirements
### Requirement 1: Fix WebSocket COB Data Processing
**User Story:** As a trader, I want the WebSocket COB data processing to work reliably without errors, so that I can monitor market data in real-time and make informed trading decisions.
#### Acceptance Criteria
1. WHEN WebSocket COB data is received for any trading pair THEN the system SHALL process it without throwing 'NoneType' object has no attribute 'append' errors
2. WHEN the dashboard is started THEN all data structures for COB processing SHALL be properly initialized
3. WHEN COB data is processed THEN the system SHALL handle edge cases such as missing or incomplete data gracefully
4. WHEN a WebSocket connection is established THEN the system SHALL verify that all required data structures are initialized before processing data
5. WHEN COB data is being processed THEN the system SHALL log appropriate debug information to help diagnose any issues
### Requirement 2: Ensure Data Structure Consistency
**User Story:** As a system administrator, I want consistent data structures throughout the COB processing pipeline, so that data can flow smoothly between components without errors.
#### Acceptance Criteria
1. WHEN the multi_exchange_cob_provider initializes THEN it SHALL properly initialize all required data structures
2. WHEN the standardized_data_provider receives COB data THEN it SHALL validate the data structure before processing
3. WHEN COB data is passed between components THEN the system SHALL ensure type consistency
4. WHEN new COB data arrives THEN the system SHALL update the data structures atomically to prevent race conditions
5. WHEN a component subscribes to COB updates THEN the system SHALL verify the subscriber can handle the data format
### Requirement 3: Improve Error Handling and Recovery
**User Story:** As a system operator, I want robust error handling and recovery mechanisms in the COB data processing pipeline, so that temporary failures don't cause the entire system to crash.
#### Acceptance Criteria
1. WHEN an error occurs in COB data processing THEN the system SHALL log detailed error information
2. WHEN a WebSocket connection fails THEN the system SHALL attempt to reconnect automatically
3. WHEN data processing fails THEN the system SHALL continue operation with the last valid data
4. WHEN the system recovers from an error THEN it SHALL restore normal operation without manual intervention
5. WHEN multiple consecutive errors occur THEN the system SHALL implement exponential backoff to prevent overwhelming the system

View File

@ -0,0 +1,115 @@
# Implementation Plan
- [ ] 1. Fix data structure initialization in MultiExchangeCOBProvider
- Ensure all collections are properly initialized during object creation
- Add defensive checks before accessing data structures
- Implement proper initialization for symbol-specific data structures
- _Requirements: 1.1, 1.2, 2.1_
- [ ] 1.1. Update MultiExchangeCOBProvider constructor
- Modify __init__ method to properly initialize all data structures
- Ensure exchange_order_books is initialized for each symbol and exchange
- Initialize session_trades and svp_cache for each symbol
- Add defensive checks to prevent NoneType errors
- _Requirements: 1.2, 2.1_
- [ ] 1.2. Fix _notify_cob_subscribers method
- Add validation to ensure cob_snapshot is not None before processing
- Add defensive checks before accessing cob_snapshot attributes
- Improve error handling for subscriber callbacks
- Add detailed logging for debugging
- _Requirements: 1.1, 1.5, 2.3_
- [ ] 2. Enhance WebSocket data processing in MultiExchangeCOBProvider
- Improve error handling in WebSocket connection methods
- Add validation for incoming data
- Implement reconnection logic with exponential backoff
- _Requirements: 1.3, 1.4, 3.1, 3.2_
- [ ] 2.1. Update _stream_binance_orderbook method
- Add data structure initialization checks
- Implement exponential backoff for reconnection attempts
- Add detailed error logging
- Ensure proper cleanup on disconnection
- _Requirements: 1.4, 3.2, 3.4_
- [ ] 2.2. Fix _process_binance_orderbook method
- Add validation for incoming data
- Ensure data structures exist before updating
- Add defensive checks to prevent NoneType errors
- Improve error handling and logging
- _Requirements: 1.1, 1.3, 3.1_
- [ ] 3. Update StandardizedDataProvider to handle COB data properly
- Improve initialization of COB-related data structures
- Add validation for COB data
- Enhance error handling for COB data processing
- _Requirements: 1.3, 2.2, 2.3_
- [ ] 3.1. Fix _get_cob_data method
- Add validation for COB provider availability
- Ensure proper initialization of COB data structures
- Add defensive checks to prevent NoneType errors
- Improve error handling and logging
- _Requirements: 1.3, 2.2, 3.3_
- [ ] 3.2. Update _calculate_cob_moving_averages method
- Add validation for input data
- Ensure proper initialization of moving average data structures
- Add defensive checks to prevent NoneType errors
- Improve error handling for edge cases
- _Requirements: 1.3, 2.2, 3.3_
- [ ] 4. Implement recovery mechanisms for WebSocket failures
- Add state tracking for WebSocket connections
- Implement automatic reconnection with exponential backoff
- Add fallback mechanisms for temporary failures
- _Requirements: 3.2, 3.3, 3.4_
- [ ] 4.1. Add connection state management
- Track connection state for each WebSocket
- Implement health check mechanism
- Add reconnection logic based on connection state
- _Requirements: 3.2, 3.4_
- [ ] 4.2. Implement data recovery mechanisms
- Add caching for last valid data
- Implement fallback to cached data during connection issues
- Add mechanism to rebuild state after reconnection
- _Requirements: 3.3, 3.4_
- [ ] 5. Add comprehensive logging for debugging
- Add detailed logging throughout the COB processing pipeline
- Include context information in log messages
- Add performance metrics logging
- _Requirements: 1.5, 3.1_
- [ ] 5.1. Enhance logging in MultiExchangeCOBProvider
- Add detailed logging for WebSocket connections
- Log data processing steps and outcomes
- Add performance metrics for data processing
- _Requirements: 1.5, 3.1_
- [ ] 5.2. Add logging in StandardizedDataProvider
- Log COB data processing steps
- Add validation logging
- Include performance metrics for data processing
- _Requirements: 1.5, 3.1_
- [ ] 6. Test all changes thoroughly
- Write unit tests for fixed components
- Test integration between components
- Verify dashboard operation with COB data
- _Requirements: 1.1, 2.3, 3.4_
- [ ] 6.1. Write unit tests for MultiExchangeCOBProvider
- Test data structure initialization
- Test WebSocket processing with mock data
- Test error handling and recovery
- _Requirements: 1.1, 1.3, 3.1_
- [ ] 6.2. Test integration with dashboard
- Verify COB data display in dashboard
- Test system stability under load
- Verify recovery from failures
- _Requirements: 1.1, 3.3, 3.4_

2
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,2 @@
{
}

8
.vscode/tasks.json vendored
View File

@ -6,8 +6,10 @@
"type": "shell",
"command": "powershell",
"args": [
"-Command",
"Get-Process python | Where-Object {$_.ProcessName -eq 'python' -and $_.MainWindowTitle -like '*dashboard*'} | Stop-Process -Force; Start-Sleep -Seconds 1"
"-ExecutionPolicy",
"Bypass",
"-File",
"scripts/kill_stale_processes.ps1"
],
"group": "build",
"presentation": {
@ -108,4 +110,4 @@
"problemMatcher": []
}
]
}
}

231
COBY/README.md Normal file
View File

@ -0,0 +1,231 @@
# COBY - Multi-Exchange Data Aggregation System
COBY (Cryptocurrency Order Book Yielder) is a comprehensive data collection and aggregation subsystem designed to serve as the foundational data layer for trading systems. It collects real-time order book and OHLCV data from multiple cryptocurrency exchanges, aggregates it into standardized formats, and provides both live data feeds and historical replay capabilities.
## 🏗️ Architecture
The system follows a modular architecture with clear separation of concerns:
```
COBY/
├── config.py # Configuration management
├── models/ # Data models and structures
│ ├── __init__.py
│ └── core.py # Core data models
├── interfaces/ # Abstract interfaces
│ ├── __init__.py
│ ├── exchange_connector.py
│ ├── data_processor.py
│ ├── aggregation_engine.py
│ ├── storage_manager.py
│ └── replay_manager.py
├── utils/ # Utility functions
│ ├── __init__.py
│ ├── exceptions.py
│ ├── logging.py
│ ├── validation.py
│ └── timing.py
└── README.md
```
## 🚀 Features
- **Multi-Exchange Support**: Connect to 10+ major cryptocurrency exchanges
- **Real-Time Data**: High-frequency order book and trade data collection
- **Price Bucket Aggregation**: Configurable price buckets ($10 for BTC, $1 for ETH)
- **Heatmap Visualization**: Real-time market depth heatmaps
- **Historical Replay**: Replay past market events for model training
- **TimescaleDB Storage**: Optimized time-series data storage
- **Redis Caching**: High-performance data caching layer
- **Orchestrator Integration**: Compatible with existing trading systems
## 📊 Data Models
### Core Models
- **OrderBookSnapshot**: Standardized order book data
- **TradeEvent**: Individual trade events
- **PriceBuckets**: Aggregated price bucket data
- **HeatmapData**: Visualization-ready heatmap data
- **ConnectionStatus**: Exchange connection monitoring
- **ReplaySession**: Historical data replay management
### Key Features
- Automatic data validation and normalization
- Configurable price bucket sizes per symbol
- Real-time metrics calculation
- Cross-exchange data consolidation
- Quality scoring and anomaly detection
## ⚙️ Configuration
The system uses environment variables for configuration:
```python
# Database settings
DB_HOST=192.168.0.10
DB_PORT=5432
DB_NAME=market_data
DB_USER=market_user
DB_PASSWORD=your_password
# Redis settings
REDIS_HOST=192.168.0.10
REDIS_PORT=6379
REDIS_PASSWORD=your_password
# Aggregation settings
BTC_BUCKET_SIZE=10.0
ETH_BUCKET_SIZE=1.0
HEATMAP_DEPTH=50
UPDATE_FREQUENCY=0.5
# Performance settings
DATA_BUFFER_SIZE=10000
BATCH_WRITE_SIZE=1000
MAX_MEMORY_USAGE=2048
```
## 🔌 Interfaces
### ExchangeConnector
Abstract base class for exchange WebSocket connectors with:
- Connection management with auto-reconnect
- Order book and trade subscriptions
- Data normalization callbacks
- Health monitoring
### DataProcessor
Interface for data processing and validation:
- Raw data normalization
- Quality validation
- Metrics calculation
- Anomaly detection
### AggregationEngine
Interface for data aggregation:
- Price bucket creation
- Heatmap generation
- Cross-exchange consolidation
- Imbalance calculations
### StorageManager
Interface for data persistence:
- TimescaleDB operations
- Batch processing
- Historical data retrieval
- Storage optimization
### ReplayManager
Interface for historical data replay:
- Session management
- Configurable playback speeds
- Time-based seeking
- Real-time compatibility
## 🛠️ Utilities
### Logging
- Structured logging with correlation IDs
- Configurable log levels and outputs
- Rotating file handlers
- Context-aware logging
### Validation
- Symbol format validation
- Price and volume validation
- Configuration validation
- Data quality checks
### Timing
- UTC timestamp handling
- Performance measurement
- Time-based operations
- Interval calculations
### Exceptions
- Custom exception hierarchy
- Error code management
- Detailed error context
- Structured error responses
## 🔧 Usage
### Basic Configuration
```python
from COBY.config import config
# Access configuration
db_url = config.get_database_url()
bucket_size = config.get_bucket_size('BTCUSDT')
```
### Data Models
```python
from COBY.models import OrderBookSnapshot, PriceLevel
# Create order book snapshot
orderbook = OrderBookSnapshot(
symbol='BTCUSDT',
exchange='binance',
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(50000.0, 1.5)],
asks=[PriceLevel(50100.0, 2.0)]
)
# Access calculated properties
mid_price = orderbook.mid_price
spread = orderbook.spread
```
### Logging
```python
from COBY.utils import setup_logging, get_logger, set_correlation_id
# Setup logging
setup_logging(level='INFO', log_file='logs/coby.log')
# Get logger
logger = get_logger(__name__)
# Use correlation ID
set_correlation_id('req-123')
logger.info("Processing order book data")
```
## 🏃 Next Steps
This is the foundational structure for the COBY system. The next implementation tasks will build upon these interfaces and models to create:
1. TimescaleDB integration
2. Exchange connector implementations
3. Data processing engines
4. Aggregation algorithms
5. Web dashboard
6. API endpoints
7. Replay functionality
Each component will implement the defined interfaces, ensuring consistency and maintainability across the entire system.
## 📝 Development Guidelines
- All components must implement the defined interfaces
- Use the provided data models for consistency
- Follow the logging and error handling patterns
- Validate all input data using the utility functions
- Maintain backward compatibility with the orchestrator interface
- Write comprehensive tests for all functionality
## 🔍 Monitoring
The system provides comprehensive monitoring through:
- Structured logging with correlation IDs
- Performance metrics collection
- Health check endpoints
- Connection status monitoring
- Data quality indicators
- System resource tracking

9
COBY/__init__.py Normal file
View File

@ -0,0 +1,9 @@
"""
Multi-Exchange Data Aggregation System (COBY)
A comprehensive data collection and aggregation subsystem for cryptocurrency exchanges.
Provides real-time order book data, heatmap visualization, and historical replay capabilities.
"""
__version__ = "1.0.0"
__author__ = "Trading System Team"

View File

@ -0,0 +1,15 @@
"""
Data aggregation components for the COBY system.
"""
from .aggregation_engine import StandardAggregationEngine
from .price_bucketer import PriceBucketer
from .heatmap_generator import HeatmapGenerator
from .cross_exchange_aggregator import CrossExchangeAggregator
__all__ = [
'StandardAggregationEngine',
'PriceBucketer',
'HeatmapGenerator',
'CrossExchangeAggregator'
]

View File

@ -0,0 +1,338 @@
"""
Main aggregation engine implementation.
"""
from typing import Dict, List
from ..interfaces.aggregation_engine import AggregationEngine
from ..models.core import (
OrderBookSnapshot, PriceBuckets, HeatmapData,
ImbalanceMetrics, ConsolidatedOrderBook
)
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import AggregationError
from .price_bucketer import PriceBucketer
from .heatmap_generator import HeatmapGenerator
from .cross_exchange_aggregator import CrossExchangeAggregator
from ..processing.metrics_calculator import MetricsCalculator
logger = get_logger(__name__)
class StandardAggregationEngine(AggregationEngine):
"""
Standard implementation of aggregation engine interface.
Provides:
- Price bucket creation with $1 USD buckets
- Heatmap generation
- Cross-exchange aggregation
- Imbalance calculations
- Support/resistance detection
"""
def __init__(self):
"""Initialize aggregation engine with components"""
self.price_bucketer = PriceBucketer()
self.heatmap_generator = HeatmapGenerator()
self.cross_exchange_aggregator = CrossExchangeAggregator()
self.metrics_calculator = MetricsCalculator()
# Processing statistics
self.buckets_created = 0
self.heatmaps_generated = 0
self.consolidations_performed = 0
logger.info("Standard aggregation engine initialized")
def create_price_buckets(self, orderbook: OrderBookSnapshot,
bucket_size: float = None) -> PriceBuckets:
"""
Convert order book data to price buckets.
Args:
orderbook: Order book snapshot
bucket_size: Size of each price bucket (uses $1 default)
Returns:
PriceBuckets: Aggregated price bucket data
"""
try:
set_correlation_id()
# Use provided bucket size or default $1
if bucket_size:
bucketer = PriceBucketer(bucket_size)
else:
bucketer = self.price_bucketer
buckets = bucketer.create_price_buckets(orderbook)
self.buckets_created += 1
logger.debug(f"Created price buckets for {orderbook.symbol}@{orderbook.exchange}")
return buckets
except Exception as e:
logger.error(f"Error creating price buckets: {e}")
raise AggregationError(f"Price bucket creation failed: {e}", "BUCKET_ERROR")
def update_heatmap(self, symbol: str, buckets: PriceBuckets) -> HeatmapData:
"""
Update heatmap data with new price buckets.
Args:
symbol: Trading symbol
buckets: Price bucket data
Returns:
HeatmapData: Updated heatmap visualization data
"""
try:
set_correlation_id()
heatmap = self.heatmap_generator.generate_heatmap(buckets)
self.heatmaps_generated += 1
logger.debug(f"Generated heatmap for {symbol}: {len(heatmap.data)} points")
return heatmap
except Exception as e:
logger.error(f"Error updating heatmap: {e}")
raise AggregationError(f"Heatmap update failed: {e}", "HEATMAP_ERROR")
def calculate_imbalances(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics:
"""
Calculate order book imbalance metrics.
Args:
orderbook: Order book snapshot
Returns:
ImbalanceMetrics: Calculated imbalance metrics
"""
try:
set_correlation_id()
return self.metrics_calculator.calculate_imbalance_metrics(orderbook)
except Exception as e:
logger.error(f"Error calculating imbalances: {e}")
raise AggregationError(f"Imbalance calculation failed: {e}", "IMBALANCE_ERROR")
def aggregate_across_exchanges(self, symbol: str,
orderbooks: List[OrderBookSnapshot]) -> ConsolidatedOrderBook:
"""
Aggregate order book data from multiple exchanges.
Args:
symbol: Trading symbol
orderbooks: List of order book snapshots from different exchanges
Returns:
ConsolidatedOrderBook: Consolidated order book data
"""
try:
set_correlation_id()
consolidated = self.cross_exchange_aggregator.aggregate_across_exchanges(
symbol, orderbooks
)
self.consolidations_performed += 1
logger.debug(f"Consolidated {len(orderbooks)} order books for {symbol}")
return consolidated
except Exception as e:
logger.error(f"Error aggregating across exchanges: {e}")
raise AggregationError(f"Cross-exchange aggregation failed: {e}", "CONSOLIDATION_ERROR")
def calculate_volume_weighted_price(self, orderbooks: List[OrderBookSnapshot]) -> float:
"""
Calculate volume-weighted average price across exchanges.
Args:
orderbooks: List of order book snapshots
Returns:
float: Volume-weighted average price
"""
try:
set_correlation_id()
return self.cross_exchange_aggregator._calculate_weighted_mid_price(orderbooks)
except Exception as e:
logger.error(f"Error calculating volume weighted price: {e}")
raise AggregationError(f"VWAP calculation failed: {e}", "VWAP_ERROR")
def get_market_depth(self, orderbook: OrderBookSnapshot,
depth_levels: List[float]) -> Dict[float, Dict[str, float]]:
"""
Calculate market depth at different price levels.
Args:
orderbook: Order book snapshot
depth_levels: List of depth percentages (e.g., [0.1, 0.5, 1.0])
Returns:
Dict: Market depth data {level: {'bid_volume': x, 'ask_volume': y}}
"""
try:
set_correlation_id()
depth_data = {}
if not orderbook.mid_price:
return depth_data
for level_pct in depth_levels:
# Calculate price range for this depth level
price_range = orderbook.mid_price * (level_pct / 100.0)
min_bid_price = orderbook.mid_price - price_range
max_ask_price = orderbook.mid_price + price_range
# Calculate volumes within this range
bid_volume = sum(
bid.size for bid in orderbook.bids
if bid.price >= min_bid_price
)
ask_volume = sum(
ask.size for ask in orderbook.asks
if ask.price <= max_ask_price
)
depth_data[level_pct] = {
'bid_volume': bid_volume,
'ask_volume': ask_volume,
'total_volume': bid_volume + ask_volume
}
logger.debug(f"Calculated market depth for {len(depth_levels)} levels")
return depth_data
except Exception as e:
logger.error(f"Error calculating market depth: {e}")
return {}
def smooth_heatmap(self, heatmap: HeatmapData, smoothing_factor: float) -> HeatmapData:
"""
Apply smoothing to heatmap data to reduce noise.
Args:
heatmap: Raw heatmap data
smoothing_factor: Smoothing factor (0.0 to 1.0)
Returns:
HeatmapData: Smoothed heatmap data
"""
try:
set_correlation_id()
return self.heatmap_generator.apply_smoothing(heatmap, smoothing_factor)
except Exception as e:
logger.error(f"Error smoothing heatmap: {e}")
return heatmap # Return original on error
def calculate_liquidity_score(self, orderbook: OrderBookSnapshot) -> float:
"""
Calculate liquidity score for an order book.
Args:
orderbook: Order book snapshot
Returns:
float: Liquidity score (0.0 to 1.0)
"""
try:
set_correlation_id()
return self.metrics_calculator.calculate_liquidity_score(orderbook)
except Exception as e:
logger.error(f"Error calculating liquidity score: {e}")
return 0.0
def detect_support_resistance(self, heatmap: HeatmapData) -> Dict[str, List[float]]:
"""
Detect support and resistance levels from heatmap data.
Args:
heatmap: Heatmap data
Returns:
Dict: {'support': [prices], 'resistance': [prices]}
"""
try:
set_correlation_id()
return self.heatmap_generator.calculate_support_resistance(heatmap)
except Exception as e:
logger.error(f"Error detecting support/resistance: {e}")
return {'support': [], 'resistance': []}
def create_consolidated_heatmap(self, symbol: str,
orderbooks: List[OrderBookSnapshot]) -> HeatmapData:
"""
Create consolidated heatmap from multiple exchanges.
Args:
symbol: Trading symbol
orderbooks: List of order book snapshots
Returns:
HeatmapData: Consolidated heatmap data
"""
try:
set_correlation_id()
return self.cross_exchange_aggregator.create_consolidated_heatmap(
symbol, orderbooks
)
except Exception as e:
logger.error(f"Error creating consolidated heatmap: {e}")
raise AggregationError(f"Consolidated heatmap creation failed: {e}", "CONSOLIDATED_HEATMAP_ERROR")
def detect_arbitrage_opportunities(self, orderbooks: List[OrderBookSnapshot]) -> List[Dict]:
"""
Detect arbitrage opportunities between exchanges.
Args:
orderbooks: List of order book snapshots
Returns:
List[Dict]: Arbitrage opportunities
"""
try:
set_correlation_id()
return self.cross_exchange_aggregator.detect_arbitrage_opportunities(orderbooks)
except Exception as e:
logger.error(f"Error detecting arbitrage opportunities: {e}")
return []
def get_processing_stats(self) -> Dict[str, any]:
"""Get processing statistics"""
return {
'buckets_created': self.buckets_created,
'heatmaps_generated': self.heatmaps_generated,
'consolidations_performed': self.consolidations_performed,
'price_bucketer_stats': self.price_bucketer.get_processing_stats(),
'heatmap_generator_stats': self.heatmap_generator.get_processing_stats(),
'cross_exchange_stats': self.cross_exchange_aggregator.get_processing_stats()
}
def reset_stats(self) -> None:
"""Reset processing statistics"""
self.buckets_created = 0
self.heatmaps_generated = 0
self.consolidations_performed = 0
self.price_bucketer.reset_stats()
self.heatmap_generator.reset_stats()
self.cross_exchange_aggregator.reset_stats()
logger.info("Aggregation engine statistics reset")

View File

@ -0,0 +1,390 @@
"""
Cross-exchange data aggregation and consolidation.
"""
from typing import List, Dict, Optional
from collections import defaultdict
from datetime import datetime
from ..models.core import (
OrderBookSnapshot, ConsolidatedOrderBook, PriceLevel,
PriceBuckets, HeatmapData, HeatmapPoint
)
from ..utils.logging import get_logger
from ..utils.timing import get_current_timestamp
from .price_bucketer import PriceBucketer
from .heatmap_generator import HeatmapGenerator
logger = get_logger(__name__)
class CrossExchangeAggregator:
"""
Aggregates data across multiple exchanges.
Provides consolidated order books and cross-exchange heatmaps.
"""
def __init__(self):
"""Initialize cross-exchange aggregator"""
self.price_bucketer = PriceBucketer()
self.heatmap_generator = HeatmapGenerator()
# Exchange weights for aggregation
self.exchange_weights = {
'binance': 1.0,
'coinbase': 0.9,
'kraken': 0.8,
'bybit': 0.7,
'okx': 0.7,
'huobi': 0.6,
'kucoin': 0.6,
'gateio': 0.5,
'bitfinex': 0.5,
'mexc': 0.4
}
# Statistics
self.consolidations_performed = 0
self.exchanges_processed = set()
logger.info("Cross-exchange aggregator initialized")
def aggregate_across_exchanges(self, symbol: str,
orderbooks: List[OrderBookSnapshot]) -> ConsolidatedOrderBook:
"""
Aggregate order book data from multiple exchanges.
Args:
symbol: Trading symbol
orderbooks: List of order book snapshots from different exchanges
Returns:
ConsolidatedOrderBook: Consolidated order book data
"""
if not orderbooks:
raise ValueError("Cannot aggregate empty orderbook list")
try:
# Track exchanges
exchanges = [ob.exchange for ob in orderbooks]
self.exchanges_processed.update(exchanges)
# Calculate weighted mid price
weighted_mid_price = self._calculate_weighted_mid_price(orderbooks)
# Consolidate bids and asks
consolidated_bids = self._consolidate_price_levels(
[ob.bids for ob in orderbooks],
[ob.exchange for ob in orderbooks],
'bid'
)
consolidated_asks = self._consolidate_price_levels(
[ob.asks for ob in orderbooks],
[ob.exchange for ob in orderbooks],
'ask'
)
# Calculate total volumes
total_bid_volume = sum(level.size for level in consolidated_bids)
total_ask_volume = sum(level.size for level in consolidated_asks)
# Create consolidated order book
consolidated = ConsolidatedOrderBook(
symbol=symbol,
timestamp=get_current_timestamp(),
exchanges=exchanges,
bids=consolidated_bids,
asks=consolidated_asks,
weighted_mid_price=weighted_mid_price,
total_bid_volume=total_bid_volume,
total_ask_volume=total_ask_volume,
exchange_weights={ex: self.exchange_weights.get(ex, 0.5) for ex in exchanges}
)
self.consolidations_performed += 1
logger.debug(
f"Consolidated {len(orderbooks)} order books for {symbol}: "
f"{len(consolidated_bids)} bids, {len(consolidated_asks)} asks"
)
return consolidated
except Exception as e:
logger.error(f"Error aggregating across exchanges: {e}")
raise
def create_consolidated_heatmap(self, symbol: str,
orderbooks: List[OrderBookSnapshot]) -> HeatmapData:
"""
Create consolidated heatmap from multiple exchanges.
Args:
symbol: Trading symbol
orderbooks: List of order book snapshots
Returns:
HeatmapData: Consolidated heatmap data
"""
try:
# Create price buckets for each exchange
all_buckets = []
for orderbook in orderbooks:
buckets = self.price_bucketer.create_price_buckets(orderbook)
all_buckets.append(buckets)
# Aggregate all buckets
if len(all_buckets) == 1:
consolidated_buckets = all_buckets[0]
else:
consolidated_buckets = self.price_bucketer.aggregate_buckets(all_buckets)
# Generate heatmap from consolidated buckets
heatmap = self.heatmap_generator.generate_heatmap(consolidated_buckets)
# Add exchange metadata to heatmap points
self._add_exchange_metadata(heatmap, orderbooks)
logger.debug(f"Created consolidated heatmap for {symbol} from {len(orderbooks)} exchanges")
return heatmap
except Exception as e:
logger.error(f"Error creating consolidated heatmap: {e}")
raise
def _calculate_weighted_mid_price(self, orderbooks: List[OrderBookSnapshot]) -> float:
"""Calculate volume-weighted mid price across exchanges"""
total_weight = 0.0
weighted_sum = 0.0
for orderbook in orderbooks:
if orderbook.mid_price:
# Use total volume as weight
volume_weight = orderbook.bid_volume + orderbook.ask_volume
exchange_weight = self.exchange_weights.get(orderbook.exchange, 0.5)
# Combined weight
weight = volume_weight * exchange_weight
weighted_sum += orderbook.mid_price * weight
total_weight += weight
return weighted_sum / total_weight if total_weight > 0 else 0.0
def _consolidate_price_levels(self, level_lists: List[List[PriceLevel]],
exchanges: List[str], side: str) -> List[PriceLevel]:
"""Consolidate price levels from multiple exchanges"""
# Group levels by price bucket
price_groups = defaultdict(lambda: {'size': 0.0, 'count': 0, 'exchanges': set()})
for levels, exchange in zip(level_lists, exchanges):
exchange_weight = self.exchange_weights.get(exchange, 0.5)
for level in levels:
# Round price to bucket
bucket_price = self.price_bucketer.get_bucket_price(level.price)
# Add weighted volume
weighted_size = level.size * exchange_weight
price_groups[bucket_price]['size'] += weighted_size
price_groups[bucket_price]['count'] += level.count or 1
price_groups[bucket_price]['exchanges'].add(exchange)
# Create consolidated price levels
consolidated_levels = []
for price, data in price_groups.items():
if data['size'] > 0: # Only include non-zero volumes
level = PriceLevel(
price=price,
size=data['size'],
count=data['count']
)
consolidated_levels.append(level)
# Sort levels appropriately
if side == 'bid':
consolidated_levels.sort(key=lambda x: x.price, reverse=True)
else:
consolidated_levels.sort(key=lambda x: x.price)
return consolidated_levels
def _add_exchange_metadata(self, heatmap: HeatmapData,
orderbooks: List[OrderBookSnapshot]) -> None:
"""Add exchange metadata to heatmap points"""
# Create exchange mapping by price bucket
exchange_map = defaultdict(set)
for orderbook in orderbooks:
# Map bid prices to exchanges
for bid in orderbook.bids:
bucket_price = self.price_bucketer.get_bucket_price(bid.price)
exchange_map[bucket_price].add(orderbook.exchange)
# Map ask prices to exchanges
for ask in orderbook.asks:
bucket_price = self.price_bucketer.get_bucket_price(ask.price)
exchange_map[bucket_price].add(orderbook.exchange)
# Add exchange information to heatmap points
for point in heatmap.data:
bucket_price = self.price_bucketer.get_bucket_price(point.price)
# Store exchange info in a custom attribute (would need to extend HeatmapPoint)
# For now, we'll log it
exchanges_at_price = exchange_map.get(bucket_price, set())
if len(exchanges_at_price) > 1:
logger.debug(f"Price {point.price} has data from {len(exchanges_at_price)} exchanges")
def calculate_exchange_dominance(self, orderbooks: List[OrderBookSnapshot]) -> Dict[str, float]:
"""
Calculate which exchanges dominate at different price levels.
Args:
orderbooks: List of order book snapshots
Returns:
Dict[str, float]: Exchange dominance scores
"""
exchange_volumes = defaultdict(float)
total_volume = 0.0
for orderbook in orderbooks:
volume = orderbook.bid_volume + orderbook.ask_volume
exchange_volumes[orderbook.exchange] += volume
total_volume += volume
# Calculate dominance percentages
dominance = {}
for exchange, volume in exchange_volumes.items():
dominance[exchange] = (volume / total_volume * 100) if total_volume > 0 else 0.0
return dominance
def detect_arbitrage_opportunities(self, orderbooks: List[OrderBookSnapshot],
min_spread_pct: float = 0.1) -> List[Dict]:
"""
Detect potential arbitrage opportunities between exchanges.
Args:
orderbooks: List of order book snapshots
min_spread_pct: Minimum spread percentage to consider
Returns:
List[Dict]: Arbitrage opportunities
"""
opportunities = []
if len(orderbooks) < 2:
return opportunities
try:
# Find best bid and ask across exchanges
best_bids = []
best_asks = []
for orderbook in orderbooks:
if orderbook.bids and orderbook.asks:
best_bids.append({
'exchange': orderbook.exchange,
'price': orderbook.bids[0].price,
'size': orderbook.bids[0].size
})
best_asks.append({
'exchange': orderbook.exchange,
'price': orderbook.asks[0].price,
'size': orderbook.asks[0].size
})
# Sort to find best opportunities
best_bids.sort(key=lambda x: x['price'], reverse=True)
best_asks.sort(key=lambda x: x['price'])
# Check for arbitrage opportunities
for bid in best_bids:
for ask in best_asks:
if bid['exchange'] != ask['exchange'] and bid['price'] > ask['price']:
spread = bid['price'] - ask['price']
spread_pct = (spread / ask['price']) * 100
if spread_pct >= min_spread_pct:
opportunities.append({
'buy_exchange': ask['exchange'],
'sell_exchange': bid['exchange'],
'buy_price': ask['price'],
'sell_price': bid['price'],
'spread': spread,
'spread_percentage': spread_pct,
'max_size': min(bid['size'], ask['size'])
})
# Sort by spread percentage
opportunities.sort(key=lambda x: x['spread_percentage'], reverse=True)
if opportunities:
logger.info(f"Found {len(opportunities)} arbitrage opportunities")
return opportunities
except Exception as e:
logger.error(f"Error detecting arbitrage opportunities: {e}")
return []
def get_exchange_correlation(self, orderbooks: List[OrderBookSnapshot]) -> Dict[str, Dict[str, float]]:
"""
Calculate price correlation between exchanges.
Args:
orderbooks: List of order book snapshots
Returns:
Dict: Correlation matrix between exchanges
"""
correlations = {}
# Extract mid prices by exchange
exchange_prices = {}
for orderbook in orderbooks:
if orderbook.mid_price:
exchange_prices[orderbook.exchange] = orderbook.mid_price
# Calculate simple correlation (would need historical data for proper correlation)
exchanges = list(exchange_prices.keys())
for i, exchange1 in enumerate(exchanges):
correlations[exchange1] = {}
for j, exchange2 in enumerate(exchanges):
if i == j:
correlations[exchange1][exchange2] = 1.0
else:
# Simple price difference as correlation proxy
price1 = exchange_prices[exchange1]
price2 = exchange_prices[exchange2]
diff_pct = abs(price1 - price2) / max(price1, price2) * 100
# Convert to correlation-like score (lower difference = higher correlation)
correlation = max(0.0, 1.0 - (diff_pct / 10.0))
correlations[exchange1][exchange2] = correlation
return correlations
def get_processing_stats(self) -> Dict[str, int]:
"""Get processing statistics"""
return {
'consolidations_performed': self.consolidations_performed,
'unique_exchanges_processed': len(self.exchanges_processed),
'exchanges_processed': list(self.exchanges_processed),
'bucketer_stats': self.price_bucketer.get_processing_stats(),
'heatmap_stats': self.heatmap_generator.get_processing_stats()
}
def update_exchange_weights(self, new_weights: Dict[str, float]) -> None:
"""Update exchange weights for aggregation"""
self.exchange_weights.update(new_weights)
logger.info(f"Updated exchange weights: {new_weights}")
def reset_stats(self) -> None:
"""Reset processing statistics"""
self.consolidations_performed = 0
self.exchanges_processed.clear()
self.price_bucketer.reset_stats()
self.heatmap_generator.reset_stats()
logger.info("Cross-exchange aggregator statistics reset")

View File

@ -0,0 +1,376 @@
"""
Heatmap data generation from price buckets.
"""
from typing import List, Dict, Optional, Tuple
from ..models.core import PriceBuckets, HeatmapData, HeatmapPoint
from ..config import config
from ..utils.logging import get_logger
logger = get_logger(__name__)
class HeatmapGenerator:
"""
Generates heatmap visualization data from price buckets.
Creates intensity-based heatmap points for visualization.
"""
def __init__(self):
"""Initialize heatmap generator"""
self.heatmaps_generated = 0
self.total_points_created = 0
logger.info("Heatmap generator initialized")
def generate_heatmap(self, buckets: PriceBuckets,
max_points: Optional[int] = None) -> HeatmapData:
"""
Generate heatmap data from price buckets.
Args:
buckets: Price buckets to convert
max_points: Maximum number of points to include (None = all)
Returns:
HeatmapData: Heatmap visualization data
"""
try:
heatmap = HeatmapData(
symbol=buckets.symbol,
timestamp=buckets.timestamp,
bucket_size=buckets.bucket_size
)
# Calculate maximum volume for intensity normalization
all_volumes = list(buckets.bid_buckets.values()) + list(buckets.ask_buckets.values())
max_volume = max(all_volumes) if all_volumes else 1.0
# Generate bid points
bid_points = self._create_heatmap_points(
buckets.bid_buckets, 'bid', max_volume
)
# Generate ask points
ask_points = self._create_heatmap_points(
buckets.ask_buckets, 'ask', max_volume
)
# Combine all points
all_points = bid_points + ask_points
# Limit points if requested
if max_points and len(all_points) > max_points:
# Sort by volume and take top points
all_points.sort(key=lambda p: p.volume, reverse=True)
all_points = all_points[:max_points]
heatmap.data = all_points
self.heatmaps_generated += 1
self.total_points_created += len(all_points)
logger.debug(
f"Generated heatmap for {buckets.symbol}: {len(all_points)} points "
f"(max_volume: {max_volume:.6f})"
)
return heatmap
except Exception as e:
logger.error(f"Error generating heatmap: {e}")
raise
def _create_heatmap_points(self, bucket_dict: Dict[float, float],
side: str, max_volume: float) -> List[HeatmapPoint]:
"""
Create heatmap points from bucket dictionary.
Args:
bucket_dict: Dictionary of price -> volume
side: 'bid' or 'ask'
max_volume: Maximum volume for intensity calculation
Returns:
List[HeatmapPoint]: List of heatmap points
"""
points = []
for price, volume in bucket_dict.items():
if volume > 0: # Only include non-zero volumes
intensity = min(volume / max_volume, 1.0) if max_volume > 0 else 0.0
point = HeatmapPoint(
price=price,
volume=volume,
intensity=intensity,
side=side
)
points.append(point)
return points
def apply_smoothing(self, heatmap: HeatmapData,
smoothing_factor: float = 0.3) -> HeatmapData:
"""
Apply smoothing to heatmap data to reduce noise.
Args:
heatmap: Original heatmap data
smoothing_factor: Smoothing factor (0.0 = no smoothing, 1.0 = maximum)
Returns:
HeatmapData: Smoothed heatmap data
"""
if smoothing_factor <= 0:
return heatmap
try:
smoothed = HeatmapData(
symbol=heatmap.symbol,
timestamp=heatmap.timestamp,
bucket_size=heatmap.bucket_size
)
# Separate bids and asks
bids = [p for p in heatmap.data if p.side == 'bid']
asks = [p for p in heatmap.data if p.side == 'ask']
# Apply smoothing to each side
smoothed_bids = self._smooth_points(bids, smoothing_factor)
smoothed_asks = self._smooth_points(asks, smoothing_factor)
smoothed.data = smoothed_bids + smoothed_asks
logger.debug(f"Applied smoothing with factor {smoothing_factor}")
return smoothed
except Exception as e:
logger.error(f"Error applying smoothing: {e}")
return heatmap # Return original on error
def _smooth_points(self, points: List[HeatmapPoint],
smoothing_factor: float) -> List[HeatmapPoint]:
"""
Apply smoothing to a list of heatmap points.
Args:
points: Points to smooth
smoothing_factor: Smoothing factor
Returns:
List[HeatmapPoint]: Smoothed points
"""
if len(points) < 3:
return points
# Sort points by price
sorted_points = sorted(points, key=lambda p: p.price)
smoothed_points = []
for i, point in enumerate(sorted_points):
# Calculate weighted average with neighbors
total_weight = 1.0
weighted_volume = point.volume
weighted_intensity = point.intensity
# Add left neighbor
if i > 0:
left_point = sorted_points[i - 1]
weight = smoothing_factor
total_weight += weight
weighted_volume += left_point.volume * weight
weighted_intensity += left_point.intensity * weight
# Add right neighbor
if i < len(sorted_points) - 1:
right_point = sorted_points[i + 1]
weight = smoothing_factor
total_weight += weight
weighted_volume += right_point.volume * weight
weighted_intensity += right_point.intensity * weight
# Create smoothed point
smoothed_point = HeatmapPoint(
price=point.price,
volume=weighted_volume / total_weight,
intensity=min(weighted_intensity / total_weight, 1.0),
side=point.side
)
smoothed_points.append(smoothed_point)
return smoothed_points
def filter_by_intensity(self, heatmap: HeatmapData,
min_intensity: float = 0.1) -> HeatmapData:
"""
Filter heatmap points by minimum intensity.
Args:
heatmap: Original heatmap data
min_intensity: Minimum intensity threshold
Returns:
HeatmapData: Filtered heatmap data
"""
filtered = HeatmapData(
symbol=heatmap.symbol,
timestamp=heatmap.timestamp,
bucket_size=heatmap.bucket_size
)
# Filter points by intensity
filtered.data = [
point for point in heatmap.data
if point.intensity >= min_intensity
]
logger.debug(
f"Filtered heatmap: {len(heatmap.data)} -> {len(filtered.data)} points "
f"(min_intensity: {min_intensity})"
)
return filtered
def get_price_levels(self, heatmap: HeatmapData,
side: str = None) -> List[float]:
"""
Get sorted list of price levels from heatmap.
Args:
heatmap: Heatmap data
side: 'bid', 'ask', or None for both
Returns:
List[float]: Sorted price levels
"""
if side:
points = [p for p in heatmap.data if p.side == side]
else:
points = heatmap.data
prices = [p.price for p in points]
return sorted(prices)
def get_volume_profile(self, heatmap: HeatmapData) -> Dict[str, List[Tuple[float, float]]]:
"""
Get volume profile from heatmap data.
Args:
heatmap: Heatmap data
Returns:
Dict: Volume profile with 'bids' and 'asks' as (price, volume) tuples
"""
profile = {'bids': [], 'asks': []}
# Extract bid profile
bid_points = [p for p in heatmap.data if p.side == 'bid']
profile['bids'] = [(p.price, p.volume) for p in bid_points]
profile['bids'].sort(key=lambda x: x[0], reverse=True) # Highest price first
# Extract ask profile
ask_points = [p for p in heatmap.data if p.side == 'ask']
profile['asks'] = [(p.price, p.volume) for p in ask_points]
profile['asks'].sort(key=lambda x: x[0]) # Lowest price first
return profile
def calculate_support_resistance(self, heatmap: HeatmapData,
threshold: float = 0.7) -> Dict[str, List[float]]:
"""
Identify potential support and resistance levels from heatmap.
Args:
heatmap: Heatmap data
threshold: Intensity threshold for significant levels
Returns:
Dict: Support and resistance levels
"""
levels = {'support': [], 'resistance': []}
# Find high-intensity bid levels (potential support)
bid_points = [p for p in heatmap.data if p.side == 'bid' and p.intensity >= threshold]
levels['support'] = sorted([p.price for p in bid_points], reverse=True)
# Find high-intensity ask levels (potential resistance)
ask_points = [p for p in heatmap.data if p.side == 'ask' and p.intensity >= threshold]
levels['resistance'] = sorted([p.price for p in ask_points])
logger.debug(
f"Identified {len(levels['support'])} support and "
f"{len(levels['resistance'])} resistance levels"
)
return levels
def get_heatmap_summary(self, heatmap: HeatmapData) -> Dict[str, float]:
"""
Get summary statistics for heatmap data.
Args:
heatmap: Heatmap data
Returns:
Dict: Summary statistics
"""
if not heatmap.data:
return {}
# Separate bids and asks
bids = [p for p in heatmap.data if p.side == 'bid']
asks = [p for p in heatmap.data if p.side == 'ask']
summary = {
'total_points': len(heatmap.data),
'bid_points': len(bids),
'ask_points': len(asks),
'total_volume': sum(p.volume for p in heatmap.data),
'bid_volume': sum(p.volume for p in bids),
'ask_volume': sum(p.volume for p in asks),
'max_intensity': max(p.intensity for p in heatmap.data),
'avg_intensity': sum(p.intensity for p in heatmap.data) / len(heatmap.data),
'price_range': 0.0,
'best_bid': 0.0,
'best_ask': 0.0
}
# Calculate price range
all_prices = [p.price for p in heatmap.data]
if all_prices:
summary['price_range'] = max(all_prices) - min(all_prices)
# Calculate best bid and ask
if bids:
summary['best_bid'] = max(p.price for p in bids)
if asks:
summary['best_ask'] = min(p.price for p in asks)
# Calculate volume imbalance
total_volume = summary['total_volume']
if total_volume > 0:
summary['volume_imbalance'] = (
(summary['bid_volume'] - summary['ask_volume']) / total_volume
)
else:
summary['volume_imbalance'] = 0.0
return summary
def get_processing_stats(self) -> Dict[str, int]:
"""Get processing statistics"""
return {
'heatmaps_generated': self.heatmaps_generated,
'total_points_created': self.total_points_created,
'avg_points_per_heatmap': (
self.total_points_created // max(self.heatmaps_generated, 1)
)
}
def reset_stats(self) -> None:
"""Reset processing statistics"""
self.heatmaps_generated = 0
self.total_points_created = 0
logger.info("Heatmap generator statistics reset")

View File

@ -0,0 +1,341 @@
"""
Price bucketing system for order book aggregation.
"""
import math
from typing import Dict, List, Tuple, Optional
from collections import defaultdict
from ..models.core import OrderBookSnapshot, PriceBuckets, PriceLevel
from ..config import config
from ..utils.logging import get_logger
from ..utils.validation import validate_price, validate_volume
logger = get_logger(__name__)
class PriceBucketer:
"""
Converts order book data into price buckets for heatmap visualization.
Uses universal $1 USD buckets for all symbols to simplify logic.
"""
def __init__(self, bucket_size: float = None):
"""
Initialize price bucketer.
Args:
bucket_size: Size of price buckets in USD (defaults to config value)
"""
self.bucket_size = bucket_size or config.get_bucket_size()
# Statistics
self.buckets_created = 0
self.total_volume_processed = 0.0
logger.info(f"Price bucketer initialized with ${self.bucket_size} buckets")
def create_price_buckets(self, orderbook: OrderBookSnapshot) -> PriceBuckets:
"""
Convert order book data to price buckets.
Args:
orderbook: Order book snapshot
Returns:
PriceBuckets: Aggregated price bucket data
"""
try:
# Create price buckets object
buckets = PriceBuckets(
symbol=orderbook.symbol,
timestamp=orderbook.timestamp,
bucket_size=self.bucket_size
)
# Process bids (aggregate into buckets)
for bid in orderbook.bids:
if validate_price(bid.price) and validate_volume(bid.size):
buckets.add_bid(bid.price, bid.size)
self.total_volume_processed += bid.size
# Process asks (aggregate into buckets)
for ask in orderbook.asks:
if validate_price(ask.price) and validate_volume(ask.size):
buckets.add_ask(ask.price, ask.size)
self.total_volume_processed += ask.size
self.buckets_created += 1
logger.debug(
f"Created price buckets for {orderbook.symbol}: "
f"{len(buckets.bid_buckets)} bid buckets, {len(buckets.ask_buckets)} ask buckets"
)
return buckets
except Exception as e:
logger.error(f"Error creating price buckets: {e}")
raise
def aggregate_buckets(self, bucket_list: List[PriceBuckets]) -> PriceBuckets:
"""
Aggregate multiple price buckets into a single bucket set.
Args:
bucket_list: List of price buckets to aggregate
Returns:
PriceBuckets: Aggregated buckets
"""
if not bucket_list:
raise ValueError("Cannot aggregate empty bucket list")
# Use first bucket as template
first_bucket = bucket_list[0]
aggregated = PriceBuckets(
symbol=first_bucket.symbol,
timestamp=first_bucket.timestamp,
bucket_size=self.bucket_size
)
# Aggregate all bid buckets
for buckets in bucket_list:
for price, volume in buckets.bid_buckets.items():
bucket_price = aggregated.get_bucket_price(price)
aggregated.bid_buckets[bucket_price] = (
aggregated.bid_buckets.get(bucket_price, 0) + volume
)
# Aggregate all ask buckets
for buckets in bucket_list:
for price, volume in buckets.ask_buckets.items():
bucket_price = aggregated.get_bucket_price(price)
aggregated.ask_buckets[bucket_price] = (
aggregated.ask_buckets.get(bucket_price, 0) + volume
)
logger.debug(f"Aggregated {len(bucket_list)} bucket sets")
return aggregated
def get_bucket_range(self, center_price: float, depth: int) -> Tuple[float, float]:
"""
Get price range for buckets around a center price.
Args:
center_price: Center price for the range
depth: Number of buckets on each side
Returns:
Tuple[float, float]: (min_price, max_price)
"""
half_range = depth * self.bucket_size
min_price = center_price - half_range
max_price = center_price + half_range
return (max(0, min_price), max_price)
def filter_buckets_by_range(self, buckets: PriceBuckets,
min_price: float, max_price: float) -> PriceBuckets:
"""
Filter buckets to only include those within a price range.
Args:
buckets: Original price buckets
min_price: Minimum price to include
max_price: Maximum price to include
Returns:
PriceBuckets: Filtered buckets
"""
filtered = PriceBuckets(
symbol=buckets.symbol,
timestamp=buckets.timestamp,
bucket_size=buckets.bucket_size
)
# Filter bid buckets
for price, volume in buckets.bid_buckets.items():
if min_price <= price <= max_price:
filtered.bid_buckets[price] = volume
# Filter ask buckets
for price, volume in buckets.ask_buckets.items():
if min_price <= price <= max_price:
filtered.ask_buckets[price] = volume
return filtered
def get_top_buckets(self, buckets: PriceBuckets, count: int) -> PriceBuckets:
"""
Get top N buckets by volume.
Args:
buckets: Original price buckets
count: Number of top buckets to return
Returns:
PriceBuckets: Top buckets by volume
"""
top_buckets = PriceBuckets(
symbol=buckets.symbol,
timestamp=buckets.timestamp,
bucket_size=buckets.bucket_size
)
# Get top bid buckets
top_bids = sorted(
buckets.bid_buckets.items(),
key=lambda x: x[1], # Sort by volume
reverse=True
)[:count]
for price, volume in top_bids:
top_buckets.bid_buckets[price] = volume
# Get top ask buckets
top_asks = sorted(
buckets.ask_buckets.items(),
key=lambda x: x[1], # Sort by volume
reverse=True
)[:count]
for price, volume in top_asks:
top_buckets.ask_buckets[price] = volume
return top_buckets
def calculate_bucket_statistics(self, buckets: PriceBuckets) -> Dict[str, float]:
"""
Calculate statistics for price buckets.
Args:
buckets: Price buckets to analyze
Returns:
Dict[str, float]: Bucket statistics
"""
stats = {
'total_bid_buckets': len(buckets.bid_buckets),
'total_ask_buckets': len(buckets.ask_buckets),
'total_bid_volume': sum(buckets.bid_buckets.values()),
'total_ask_volume': sum(buckets.ask_buckets.values()),
'bid_price_range': 0.0,
'ask_price_range': 0.0,
'max_bid_volume': 0.0,
'max_ask_volume': 0.0,
'avg_bid_volume': 0.0,
'avg_ask_volume': 0.0
}
# Calculate bid statistics
if buckets.bid_buckets:
bid_prices = list(buckets.bid_buckets.keys())
bid_volumes = list(buckets.bid_buckets.values())
stats['bid_price_range'] = max(bid_prices) - min(bid_prices)
stats['max_bid_volume'] = max(bid_volumes)
stats['avg_bid_volume'] = sum(bid_volumes) / len(bid_volumes)
# Calculate ask statistics
if buckets.ask_buckets:
ask_prices = list(buckets.ask_buckets.keys())
ask_volumes = list(buckets.ask_buckets.values())
stats['ask_price_range'] = max(ask_prices) - min(ask_prices)
stats['max_ask_volume'] = max(ask_volumes)
stats['avg_ask_volume'] = sum(ask_volumes) / len(ask_volumes)
# Calculate combined statistics
stats['total_volume'] = stats['total_bid_volume'] + stats['total_ask_volume']
stats['volume_imbalance'] = (
(stats['total_bid_volume'] - stats['total_ask_volume']) /
max(stats['total_volume'], 1e-10)
)
return stats
def merge_adjacent_buckets(self, buckets: PriceBuckets, merge_factor: int = 2) -> PriceBuckets:
"""
Merge adjacent buckets to create larger bucket sizes.
Args:
buckets: Original price buckets
merge_factor: Number of adjacent buckets to merge
Returns:
PriceBuckets: Merged buckets with larger bucket size
"""
merged = PriceBuckets(
symbol=buckets.symbol,
timestamp=buckets.timestamp,
bucket_size=buckets.bucket_size * merge_factor
)
# Merge bid buckets
bid_groups = defaultdict(float)
for price, volume in buckets.bid_buckets.items():
# Calculate new bucket price
new_bucket_price = merged.get_bucket_price(price)
bid_groups[new_bucket_price] += volume
merged.bid_buckets = dict(bid_groups)
# Merge ask buckets
ask_groups = defaultdict(float)
for price, volume in buckets.ask_buckets.items():
# Calculate new bucket price
new_bucket_price = merged.get_bucket_price(price)
ask_groups[new_bucket_price] += volume
merged.ask_buckets = dict(ask_groups)
logger.debug(f"Merged buckets with factor {merge_factor}")
return merged
def get_bucket_depth_profile(self, buckets: PriceBuckets,
center_price: float) -> Dict[str, List[Tuple[float, float]]]:
"""
Get depth profile showing volume at different distances from center price.
Args:
buckets: Price buckets
center_price: Center price for depth calculation
Returns:
Dict: Depth profile with 'bids' and 'asks' lists of (distance, volume) tuples
"""
profile = {'bids': [], 'asks': []}
# Calculate bid depth profile
for price, volume in buckets.bid_buckets.items():
distance = abs(center_price - price)
profile['bids'].append((distance, volume))
# Calculate ask depth profile
for price, volume in buckets.ask_buckets.items():
distance = abs(price - center_price)
profile['asks'].append((distance, volume))
# Sort by distance
profile['bids'].sort(key=lambda x: x[0])
profile['asks'].sort(key=lambda x: x[0])
return profile
def get_processing_stats(self) -> Dict[str, float]:
"""Get processing statistics"""
return {
'bucket_size': self.bucket_size,
'buckets_created': self.buckets_created,
'total_volume_processed': self.total_volume_processed,
'avg_volume_per_bucket': (
self.total_volume_processed / max(self.buckets_created, 1)
)
}
def reset_stats(self) -> None:
"""Reset processing statistics"""
self.buckets_created = 0
self.total_volume_processed = 0.0
logger.info("Price bucketer statistics reset")

15
COBY/api/__init__.py Normal file
View File

@ -0,0 +1,15 @@
"""
API layer for the COBY system.
"""
from .rest_api import create_app
from .websocket_server import WebSocketServer
from .rate_limiter import RateLimiter
from .response_formatter import ResponseFormatter
__all__ = [
'create_app',
'WebSocketServer',
'RateLimiter',
'ResponseFormatter'
]

183
COBY/api/rate_limiter.py Normal file
View File

@ -0,0 +1,183 @@
"""
Rate limiting for API endpoints.
"""
import time
from typing import Dict, Optional
from collections import defaultdict, deque
from ..utils.logging import get_logger
logger = get_logger(__name__)
class RateLimiter:
"""
Token bucket rate limiter for API endpoints.
Provides per-client rate limiting with configurable limits.
"""
def __init__(self, requests_per_minute: int = 100, burst_size: int = 20):
"""
Initialize rate limiter.
Args:
requests_per_minute: Maximum requests per minute
burst_size: Maximum burst requests
"""
self.requests_per_minute = requests_per_minute
self.burst_size = burst_size
self.refill_rate = requests_per_minute / 60.0 # tokens per second
# Client buckets: client_id -> {'tokens': float, 'last_refill': float}
self.buckets: Dict[str, Dict] = defaultdict(lambda: {
'tokens': float(burst_size),
'last_refill': time.time()
})
# Request history for monitoring
self.request_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000))
logger.info(f"Rate limiter initialized: {requests_per_minute} req/min, burst: {burst_size}")
def is_allowed(self, client_id: str, tokens_requested: int = 1) -> bool:
"""
Check if request is allowed for client.
Args:
client_id: Client identifier (IP, user ID, etc.)
tokens_requested: Number of tokens requested
Returns:
bool: True if request is allowed, False otherwise
"""
current_time = time.time()
bucket = self.buckets[client_id]
# Refill tokens based on time elapsed
time_elapsed = current_time - bucket['last_refill']
tokens_to_add = time_elapsed * self.refill_rate
# Update bucket
bucket['tokens'] = min(self.burst_size, bucket['tokens'] + tokens_to_add)
bucket['last_refill'] = current_time
# Check if enough tokens available
if bucket['tokens'] >= tokens_requested:
bucket['tokens'] -= tokens_requested
# Record successful request
self.request_history[client_id].append(current_time)
return True
else:
logger.debug(f"Rate limit exceeded for client {client_id}")
return False
def get_remaining_tokens(self, client_id: str) -> float:
"""
Get remaining tokens for client.
Args:
client_id: Client identifier
Returns:
float: Number of remaining tokens
"""
current_time = time.time()
bucket = self.buckets[client_id]
# Calculate current tokens (with refill)
time_elapsed = current_time - bucket['last_refill']
tokens_to_add = time_elapsed * self.refill_rate
current_tokens = min(self.burst_size, bucket['tokens'] + tokens_to_add)
return current_tokens
def get_reset_time(self, client_id: str) -> float:
"""
Get time until bucket is fully refilled.
Args:
client_id: Client identifier
Returns:
float: Seconds until full refill
"""
remaining_tokens = self.get_remaining_tokens(client_id)
tokens_needed = self.burst_size - remaining_tokens
if tokens_needed <= 0:
return 0.0
return tokens_needed / self.refill_rate
def get_client_stats(self, client_id: str) -> Dict[str, float]:
"""
Get statistics for a client.
Args:
client_id: Client identifier
Returns:
Dict: Client statistics
"""
current_time = time.time()
history = self.request_history[client_id]
# Count requests in last minute
minute_ago = current_time - 60
recent_requests = sum(1 for req_time in history if req_time > minute_ago)
return {
'remaining_tokens': self.get_remaining_tokens(client_id),
'reset_time': self.get_reset_time(client_id),
'requests_last_minute': recent_requests,
'total_requests': len(history)
}
def cleanup_old_data(self, max_age_hours: int = 24) -> None:
"""
Clean up old client data.
Args:
max_age_hours: Maximum age of data to keep
"""
current_time = time.time()
cutoff_time = current_time - (max_age_hours * 3600)
# Clean up buckets for inactive clients
inactive_clients = []
for client_id, bucket in self.buckets.items():
if bucket['last_refill'] < cutoff_time:
inactive_clients.append(client_id)
for client_id in inactive_clients:
del self.buckets[client_id]
if client_id in self.request_history:
del self.request_history[client_id]
logger.debug(f"Cleaned up {len(inactive_clients)} inactive clients")
def get_global_stats(self) -> Dict[str, int]:
"""Get global rate limiter statistics"""
current_time = time.time()
minute_ago = current_time - 60
total_clients = len(self.buckets)
active_clients = 0
total_requests_last_minute = 0
for client_id, history in self.request_history.items():
recent_requests = sum(1 for req_time in history if req_time > minute_ago)
if recent_requests > 0:
active_clients += 1
total_requests_last_minute += recent_requests
return {
'total_clients': total_clients,
'active_clients': active_clients,
'requests_per_minute_limit': self.requests_per_minute,
'burst_size': self.burst_size,
'total_requests_last_minute': total_requests_last_minute
}

View File

@ -0,0 +1,326 @@
"""
Response formatting for API endpoints.
"""
import json
from typing import Any, Dict, Optional, List
from datetime import datetime
from ..utils.logging import get_logger
from ..utils.timing import get_current_timestamp
logger = get_logger(__name__)
class ResponseFormatter:
"""
Formats API responses with consistent structure and metadata.
"""
def __init__(self):
"""Initialize response formatter"""
self.responses_formatted = 0
logger.info("Response formatter initialized")
def success(self, data: Any, message: str = "Success",
metadata: Optional[Dict] = None) -> Dict[str, Any]:
"""
Format successful response.
Args:
data: Response data
message: Success message
metadata: Additional metadata
Returns:
Dict: Formatted response
"""
response = {
'success': True,
'message': message,
'data': data,
'timestamp': get_current_timestamp().isoformat(),
'metadata': metadata or {}
}
self.responses_formatted += 1
return response
def error(self, message: str, error_code: str = "UNKNOWN_ERROR",
details: Optional[Dict] = None, status_code: int = 400) -> Dict[str, Any]:
"""
Format error response.
Args:
message: Error message
error_code: Error code
details: Error details
status_code: HTTP status code
Returns:
Dict: Formatted error response
"""
response = {
'success': False,
'error': {
'message': message,
'code': error_code,
'details': details or {},
'status_code': status_code
},
'timestamp': get_current_timestamp().isoformat()
}
self.responses_formatted += 1
return response
def paginated(self, data: List[Any], page: int, page_size: int,
total_count: int, message: str = "Success") -> Dict[str, Any]:
"""
Format paginated response.
Args:
data: Page data
page: Current page number
page_size: Items per page
total_count: Total number of items
message: Success message
Returns:
Dict: Formatted paginated response
"""
total_pages = (total_count + page_size - 1) // page_size
pagination = {
'page': page,
'page_size': page_size,
'total_count': total_count,
'total_pages': total_pages,
'has_next': page < total_pages,
'has_previous': page > 1
}
return self.success(
data=data,
message=message,
metadata={'pagination': pagination}
)
def heatmap_response(self, heatmap_data, symbol: str,
exchange: Optional[str] = None) -> Dict[str, Any]:
"""
Format heatmap data response.
Args:
heatmap_data: Heatmap data
symbol: Trading symbol
exchange: Exchange name (None for consolidated)
Returns:
Dict: Formatted heatmap response
"""
if not heatmap_data:
return self.error("Heatmap data not found", "HEATMAP_NOT_FOUND", status_code=404)
# Convert heatmap to API format
formatted_data = {
'symbol': heatmap_data.symbol,
'timestamp': heatmap_data.timestamp.isoformat(),
'bucket_size': heatmap_data.bucket_size,
'exchange': exchange,
'points': [
{
'price': point.price,
'volume': point.volume,
'intensity': point.intensity,
'side': point.side
}
for point in heatmap_data.data
]
}
metadata = {
'total_points': len(heatmap_data.data),
'bid_points': len([p for p in heatmap_data.data if p.side == 'bid']),
'ask_points': len([p for p in heatmap_data.data if p.side == 'ask']),
'data_type': 'consolidated' if not exchange else 'exchange_specific'
}
return self.success(
data=formatted_data,
message=f"Heatmap data for {symbol}",
metadata=metadata
)
def orderbook_response(self, orderbook_data, symbol: str, exchange: str) -> Dict[str, Any]:
"""
Format order book response.
Args:
orderbook_data: Order book data
symbol: Trading symbol
exchange: Exchange name
Returns:
Dict: Formatted order book response
"""
if not orderbook_data:
return self.error("Order book not found", "ORDERBOOK_NOT_FOUND", status_code=404)
# Convert order book to API format
formatted_data = {
'symbol': orderbook_data.symbol,
'exchange': orderbook_data.exchange,
'timestamp': orderbook_data.timestamp.isoformat(),
'sequence_id': orderbook_data.sequence_id,
'bids': [
{
'price': bid.price,
'size': bid.size,
'count': bid.count
}
for bid in orderbook_data.bids
],
'asks': [
{
'price': ask.price,
'size': ask.size,
'count': ask.count
}
for ask in orderbook_data.asks
],
'mid_price': orderbook_data.mid_price,
'spread': orderbook_data.spread,
'bid_volume': orderbook_data.bid_volume,
'ask_volume': orderbook_data.ask_volume
}
metadata = {
'bid_levels': len(orderbook_data.bids),
'ask_levels': len(orderbook_data.asks),
'total_bid_volume': orderbook_data.bid_volume,
'total_ask_volume': orderbook_data.ask_volume
}
return self.success(
data=formatted_data,
message=f"Order book for {symbol}@{exchange}",
metadata=metadata
)
def metrics_response(self, metrics_data, symbol: str, exchange: str) -> Dict[str, Any]:
"""
Format metrics response.
Args:
metrics_data: Metrics data
symbol: Trading symbol
exchange: Exchange name
Returns:
Dict: Formatted metrics response
"""
if not metrics_data:
return self.error("Metrics not found", "METRICS_NOT_FOUND", status_code=404)
# Convert metrics to API format
formatted_data = {
'symbol': metrics_data.symbol,
'exchange': metrics_data.exchange,
'timestamp': metrics_data.timestamp.isoformat(),
'mid_price': metrics_data.mid_price,
'spread': metrics_data.spread,
'spread_percentage': metrics_data.spread_percentage,
'bid_volume': metrics_data.bid_volume,
'ask_volume': metrics_data.ask_volume,
'volume_imbalance': metrics_data.volume_imbalance,
'depth_10': metrics_data.depth_10,
'depth_50': metrics_data.depth_50
}
return self.success(
data=formatted_data,
message=f"Metrics for {symbol}@{exchange}"
)
def status_response(self, status_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Format system status response.
Args:
status_data: System status data
Returns:
Dict: Formatted status response
"""
return self.success(
data=status_data,
message="System status",
metadata={'response_count': self.responses_formatted}
)
def rate_limit_error(self, client_stats: Dict[str, float]) -> Dict[str, Any]:
"""
Format rate limit error response.
Args:
client_stats: Client rate limit statistics
Returns:
Dict: Formatted rate limit error
"""
return self.error(
message="Rate limit exceeded",
error_code="RATE_LIMIT_EXCEEDED",
details={
'remaining_tokens': client_stats['remaining_tokens'],
'reset_time': client_stats['reset_time'],
'requests_last_minute': client_stats['requests_last_minute']
},
status_code=429
)
def validation_error(self, field: str, message: str) -> Dict[str, Any]:
"""
Format validation error response.
Args:
field: Field that failed validation
message: Validation error message
Returns:
Dict: Formatted validation error
"""
return self.error(
message=f"Validation error: {message}",
error_code="VALIDATION_ERROR",
details={'field': field, 'message': message},
status_code=400
)
def to_json(self, response: Dict[str, Any], indent: Optional[int] = None) -> str:
"""
Convert response to JSON string.
Args:
response: Response dictionary
indent: JSON indentation (None for compact)
Returns:
str: JSON string
"""
try:
return json.dumps(response, indent=indent, ensure_ascii=False, default=str)
except Exception as e:
logger.error(f"Error converting response to JSON: {e}")
return json.dumps(self.error("JSON serialization failed", "JSON_ERROR"))
def get_stats(self) -> Dict[str, int]:
"""Get formatter statistics"""
return {
'responses_formatted': self.responses_formatted
}
def reset_stats(self) -> None:
"""Reset formatter statistics"""
self.responses_formatted = 0
logger.info("Response formatter statistics reset")

391
COBY/api/rest_api.py Normal file
View File

@ -0,0 +1,391 @@
"""
REST API server for COBY system.
"""
from fastapi import FastAPI, HTTPException, Request, Query, Path
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from typing import Optional, List
import asyncio
from ..config import config
from ..caching.redis_manager import redis_manager
from ..utils.logging import get_logger, set_correlation_id
from ..utils.validation import validate_symbol
from .rate_limiter import RateLimiter
from .response_formatter import ResponseFormatter
logger = get_logger(__name__)
def create_app() -> FastAPI:
"""Create and configure FastAPI application"""
app = FastAPI(
title="COBY Market Data API",
description="Real-time cryptocurrency market data aggregation API",
version="1.0.0",
docs_url="/docs",
redoc_url="/redoc"
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=config.api.cors_origins,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE"],
allow_headers=["*"],
)
# Initialize components
rate_limiter = RateLimiter(
requests_per_minute=config.api.rate_limit,
burst_size=20
)
response_formatter = ResponseFormatter()
@app.middleware("http")
async def rate_limit_middleware(request: Request, call_next):
"""Rate limiting middleware"""
client_ip = request.client.host
if not rate_limiter.is_allowed(client_ip):
client_stats = rate_limiter.get_client_stats(client_ip)
error_response = response_formatter.rate_limit_error(client_stats)
return JSONResponse(
status_code=429,
content=error_response,
headers={
"X-RateLimit-Remaining": str(int(client_stats['remaining_tokens'])),
"X-RateLimit-Reset": str(int(client_stats['reset_time']))
}
)
response = await call_next(request)
# Add rate limit headers
client_stats = rate_limiter.get_client_stats(client_ip)
response.headers["X-RateLimit-Remaining"] = str(int(client_stats['remaining_tokens']))
response.headers["X-RateLimit-Reset"] = str(int(client_stats['reset_time']))
return response
@app.middleware("http")
async def correlation_middleware(request: Request, call_next):
"""Add correlation ID to requests"""
set_correlation_id()
response = await call_next(request)
return response
@app.on_event("startup")
async def startup_event():
"""Initialize services on startup"""
try:
await redis_manager.initialize()
logger.info("API server startup completed")
except Exception as e:
logger.error(f"API server startup failed: {e}")
raise
@app.on_event("shutdown")
async def shutdown_event():
"""Cleanup on shutdown"""
try:
await redis_manager.close()
logger.info("API server shutdown completed")
except Exception as e:
logger.error(f"API server shutdown error: {e}")
# Health check endpoint
@app.get("/health")
async def health_check():
"""Health check endpoint"""
try:
# Check Redis connection
redis_healthy = await redis_manager.ping()
health_data = {
'status': 'healthy' if redis_healthy else 'degraded',
'redis': 'connected' if redis_healthy else 'disconnected',
'version': '1.0.0'
}
return response_formatter.status_response(health_data)
except Exception as e:
logger.error(f"Health check failed: {e}")
return JSONResponse(
status_code=503,
content=response_formatter.error("Service unavailable", "HEALTH_CHECK_FAILED")
)
# Heatmap endpoints
@app.get("/api/v1/heatmap/{symbol}")
async def get_heatmap(
symbol: str = Path(..., description="Trading symbol (e.g., BTCUSDT)"),
exchange: Optional[str] = Query(None, description="Exchange name (None for consolidated)")
):
"""Get heatmap data for a symbol"""
try:
# Validate symbol
if not validate_symbol(symbol):
return JSONResponse(
status_code=400,
content=response_formatter.validation_error("symbol", "Invalid symbol format")
)
# Get heatmap from cache
heatmap_data = await redis_manager.get_heatmap(symbol.upper(), exchange)
return response_formatter.heatmap_response(heatmap_data, symbol.upper(), exchange)
except Exception as e:
logger.error(f"Error getting heatmap for {symbol}: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "HEATMAP_ERROR")
)
# Order book endpoints
@app.get("/api/v1/orderbook/{symbol}/{exchange}")
async def get_orderbook(
symbol: str = Path(..., description="Trading symbol"),
exchange: str = Path(..., description="Exchange name")
):
"""Get order book data for a symbol on an exchange"""
try:
# Validate symbol
if not validate_symbol(symbol):
return JSONResponse(
status_code=400,
content=response_formatter.validation_error("symbol", "Invalid symbol format")
)
# Get order book from cache
orderbook_data = await redis_manager.get_orderbook(symbol.upper(), exchange.lower())
return response_formatter.orderbook_response(orderbook_data, symbol.upper(), exchange.lower())
except Exception as e:
logger.error(f"Error getting order book for {symbol}@{exchange}: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "ORDERBOOK_ERROR")
)
# Metrics endpoints
@app.get("/api/v1/metrics/{symbol}/{exchange}")
async def get_metrics(
symbol: str = Path(..., description="Trading symbol"),
exchange: str = Path(..., description="Exchange name")
):
"""Get metrics data for a symbol on an exchange"""
try:
# Validate symbol
if not validate_symbol(symbol):
return JSONResponse(
status_code=400,
content=response_formatter.validation_error("symbol", "Invalid symbol format")
)
# Get metrics from cache
metrics_data = await redis_manager.get_metrics(symbol.upper(), exchange.lower())
return response_formatter.metrics_response(metrics_data, symbol.upper(), exchange.lower())
except Exception as e:
logger.error(f"Error getting metrics for {symbol}@{exchange}: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "METRICS_ERROR")
)
# Exchange status endpoints
@app.get("/api/v1/status/{exchange}")
async def get_exchange_status(
exchange: str = Path(..., description="Exchange name")
):
"""Get status for an exchange"""
try:
# Get status from cache
status_data = await redis_manager.get_exchange_status(exchange.lower())
if not status_data:
return JSONResponse(
status_code=404,
content=response_formatter.error("Exchange status not found", "STATUS_NOT_FOUND")
)
return response_formatter.success(
data=status_data,
message=f"Status for {exchange}"
)
except Exception as e:
logger.error(f"Error getting status for {exchange}: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "STATUS_ERROR")
)
# List endpoints
@app.get("/api/v1/symbols")
async def list_symbols():
"""List available trading symbols"""
try:
# Get symbols from cache (this would be populated by exchange connectors)
symbols_pattern = "symbols:*"
symbol_keys = await redis_manager.keys(symbols_pattern)
all_symbols = set()
for key in symbol_keys:
symbols_data = await redis_manager.get(key)
if symbols_data and isinstance(symbols_data, list):
all_symbols.update(symbols_data)
return response_formatter.success(
data=sorted(list(all_symbols)),
message="Available trading symbols",
metadata={'total_symbols': len(all_symbols)}
)
except Exception as e:
logger.error(f"Error listing symbols: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "SYMBOLS_ERROR")
)
@app.get("/api/v1/exchanges")
async def list_exchanges():
"""List available exchanges"""
try:
# Get exchange status keys
status_pattern = "st:*"
status_keys = await redis_manager.keys(status_pattern)
exchanges = []
for key in status_keys:
# Extract exchange name from key (st:exchange_name)
exchange_name = key.split(':', 1)[1] if ':' in key else key
exchanges.append(exchange_name)
return response_formatter.success(
data=sorted(exchanges),
message="Available exchanges",
metadata={'total_exchanges': len(exchanges)}
)
except Exception as e:
logger.error(f"Error listing exchanges: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "EXCHANGES_ERROR")
)
# Statistics endpoints
@app.get("/api/v1/stats/cache")
async def get_cache_stats():
"""Get cache statistics"""
try:
cache_stats = redis_manager.get_stats()
redis_health = await redis_manager.health_check()
stats_data = {
'cache_performance': cache_stats,
'redis_health': redis_health
}
return response_formatter.success(
data=stats_data,
message="Cache statistics"
)
except Exception as e:
logger.error(f"Error getting cache stats: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "STATS_ERROR")
)
@app.get("/api/v1/stats/api")
async def get_api_stats():
"""Get API statistics"""
try:
api_stats = {
'rate_limiter': rate_limiter.get_global_stats(),
'response_formatter': response_formatter.get_stats()
}
return response_formatter.success(
data=api_stats,
message="API statistics"
)
except Exception as e:
logger.error(f"Error getting API stats: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "API_STATS_ERROR")
)
# Batch endpoints for efficiency
@app.get("/api/v1/batch/heatmaps")
async def get_batch_heatmaps(
symbols: str = Query(..., description="Comma-separated list of symbols"),
exchange: Optional[str] = Query(None, description="Exchange name (None for consolidated)")
):
"""Get heatmaps for multiple symbols"""
try:
symbol_list = [s.strip().upper() for s in symbols.split(',')]
# Validate all symbols
for symbol in symbol_list:
if not validate_symbol(symbol):
return JSONResponse(
status_code=400,
content=response_formatter.validation_error("symbols", f"Invalid symbol: {symbol}")
)
# Get heatmaps in batch
heatmaps = {}
for symbol in symbol_list:
heatmap_data = await redis_manager.get_heatmap(symbol, exchange)
if heatmap_data:
heatmaps[symbol] = {
'symbol': heatmap_data.symbol,
'timestamp': heatmap_data.timestamp.isoformat(),
'bucket_size': heatmap_data.bucket_size,
'points': [
{
'price': point.price,
'volume': point.volume,
'intensity': point.intensity,
'side': point.side
}
for point in heatmap_data.data
]
}
return response_formatter.success(
data=heatmaps,
message=f"Batch heatmaps for {len(symbol_list)} symbols",
metadata={
'requested_symbols': len(symbol_list),
'found_heatmaps': len(heatmaps),
'exchange': exchange or 'consolidated'
}
)
except Exception as e:
logger.error(f"Error getting batch heatmaps: {e}")
return JSONResponse(
status_code=500,
content=response_formatter.error("Internal server error", "BATCH_HEATMAPS_ERROR")
)
return app
# Create the FastAPI app instance
app = create_app()

View File

@ -0,0 +1,400 @@
"""
WebSocket server for real-time data streaming.
"""
import asyncio
import json
from typing import Dict, Set, Optional, Any
from fastapi import WebSocket, WebSocketDisconnect
from ..utils.logging import get_logger, set_correlation_id
from ..utils.validation import validate_symbol
from ..caching.redis_manager import redis_manager
from .response_formatter import ResponseFormatter
logger = get_logger(__name__)
class WebSocketManager:
"""
Manages WebSocket connections and real-time data streaming.
"""
def __init__(self):
"""Initialize WebSocket manager"""
# Active connections: connection_id -> WebSocket
self.connections: Dict[str, WebSocket] = {}
# Subscriptions: symbol -> set of connection_ids
self.subscriptions: Dict[str, Set[str]] = {}
# Connection metadata: connection_id -> metadata
self.connection_metadata: Dict[str, Dict[str, Any]] = {}
self.response_formatter = ResponseFormatter()
self.connection_counter = 0
logger.info("WebSocket manager initialized")
async def connect(self, websocket: WebSocket, client_ip: str) -> str:
"""
Accept new WebSocket connection.
Args:
websocket: WebSocket connection
client_ip: Client IP address
Returns:
str: Connection ID
"""
await websocket.accept()
# Generate connection ID
self.connection_counter += 1
connection_id = f"ws_{self.connection_counter}_{client_ip}"
# Store connection
self.connections[connection_id] = websocket
self.connection_metadata[connection_id] = {
'client_ip': client_ip,
'connected_at': asyncio.get_event_loop().time(),
'subscriptions': set(),
'messages_sent': 0
}
logger.info(f"WebSocket connected: {connection_id}")
# Send welcome message
welcome_msg = self.response_formatter.success(
data={'connection_id': connection_id},
message="WebSocket connected successfully"
)
await self._send_to_connection(connection_id, welcome_msg)
return connection_id
async def disconnect(self, connection_id: str) -> None:
"""
Handle WebSocket disconnection.
Args:
connection_id: Connection ID to disconnect
"""
if connection_id in self.connections:
# Remove from all subscriptions
metadata = self.connection_metadata.get(connection_id, {})
for symbol in metadata.get('subscriptions', set()):
await self._unsubscribe_connection(connection_id, symbol)
# Remove connection
del self.connections[connection_id]
del self.connection_metadata[connection_id]
logger.info(f"WebSocket disconnected: {connection_id}")
async def subscribe(self, connection_id: str, symbol: str,
data_type: str = "heatmap") -> bool:
"""
Subscribe connection to symbol updates.
Args:
connection_id: Connection ID
symbol: Trading symbol
data_type: Type of data to subscribe to
Returns:
bool: True if subscribed successfully
"""
try:
# Validate symbol
if not validate_symbol(symbol):
error_msg = self.response_formatter.validation_error("symbol", "Invalid symbol format")
await self._send_to_connection(connection_id, error_msg)
return False
symbol = symbol.upper()
subscription_key = f"{symbol}:{data_type}"
# Add to subscriptions
if subscription_key not in self.subscriptions:
self.subscriptions[subscription_key] = set()
self.subscriptions[subscription_key].add(connection_id)
# Update connection metadata
if connection_id in self.connection_metadata:
self.connection_metadata[connection_id]['subscriptions'].add(subscription_key)
logger.info(f"WebSocket {connection_id} subscribed to {subscription_key}")
# Send confirmation
confirm_msg = self.response_formatter.success(
data={'symbol': symbol, 'data_type': data_type},
message=f"Subscribed to {symbol} {data_type} updates"
)
await self._send_to_connection(connection_id, confirm_msg)
# Send initial data if available
await self._send_initial_data(connection_id, symbol, data_type)
return True
except Exception as e:
logger.error(f"Error subscribing {connection_id} to {symbol}: {e}")
error_msg = self.response_formatter.error("Subscription failed", "SUBSCRIBE_ERROR")
await self._send_to_connection(connection_id, error_msg)
return False
async def unsubscribe(self, connection_id: str, symbol: str,
data_type: str = "heatmap") -> bool:
"""
Unsubscribe connection from symbol updates.
Args:
connection_id: Connection ID
symbol: Trading symbol
data_type: Type of data to unsubscribe from
Returns:
bool: True if unsubscribed successfully
"""
try:
symbol = symbol.upper()
subscription_key = f"{symbol}:{data_type}"
await self._unsubscribe_connection(connection_id, subscription_key)
# Send confirmation
confirm_msg = self.response_formatter.success(
data={'symbol': symbol, 'data_type': data_type},
message=f"Unsubscribed from {symbol} {data_type} updates"
)
await self._send_to_connection(connection_id, confirm_msg)
return True
except Exception as e:
logger.error(f"Error unsubscribing {connection_id} from {symbol}: {e}")
return False
async def broadcast_update(self, symbol: str, data_type: str, data: Any) -> int:
"""
Broadcast data update to all subscribers.
Args:
symbol: Trading symbol
data_type: Type of data
data: Data to broadcast
Returns:
int: Number of connections notified
"""
try:
set_correlation_id()
subscription_key = f"{symbol.upper()}:{data_type}"
subscribers = self.subscriptions.get(subscription_key, set())
if not subscribers:
return 0
# Format message based on data type
if data_type == "heatmap":
message = self.response_formatter.heatmap_response(data, symbol)
elif data_type == "orderbook":
message = self.response_formatter.orderbook_response(data, symbol, "consolidated")
else:
message = self.response_formatter.success(data, f"{data_type} update for {symbol}")
# Add update type to message
message['update_type'] = data_type
message['symbol'] = symbol
# Send to all subscribers
sent_count = 0
for connection_id in subscribers.copy(): # Copy to avoid modification during iteration
if await self._send_to_connection(connection_id, message):
sent_count += 1
logger.debug(f"Broadcasted {data_type} update for {symbol} to {sent_count} connections")
return sent_count
except Exception as e:
logger.error(f"Error broadcasting update for {symbol}: {e}")
return 0
async def _send_to_connection(self, connection_id: str, message: Dict[str, Any]) -> bool:
"""
Send message to specific connection.
Args:
connection_id: Connection ID
message: Message to send
Returns:
bool: True if sent successfully
"""
try:
if connection_id not in self.connections:
return False
websocket = self.connections[connection_id]
message_json = json.dumps(message, default=str)
await websocket.send_text(message_json)
# Update statistics
if connection_id in self.connection_metadata:
self.connection_metadata[connection_id]['messages_sent'] += 1
return True
except Exception as e:
logger.warning(f"Error sending message to {connection_id}: {e}")
# Remove broken connection
await self.disconnect(connection_id)
return False
async def _unsubscribe_connection(self, connection_id: str, subscription_key: str) -> None:
"""Remove connection from subscription"""
if subscription_key in self.subscriptions:
self.subscriptions[subscription_key].discard(connection_id)
# Clean up empty subscriptions
if not self.subscriptions[subscription_key]:
del self.subscriptions[subscription_key]
# Update connection metadata
if connection_id in self.connection_metadata:
self.connection_metadata[connection_id]['subscriptions'].discard(subscription_key)
async def _send_initial_data(self, connection_id: str, symbol: str, data_type: str) -> None:
"""Send initial data to newly subscribed connection"""
try:
if data_type == "heatmap":
# Get latest heatmap from cache
heatmap_data = await redis_manager.get_heatmap(symbol)
if heatmap_data:
message = self.response_formatter.heatmap_response(heatmap_data, symbol)
message['update_type'] = 'initial_data'
await self._send_to_connection(connection_id, message)
elif data_type == "orderbook":
# Could get latest order book from cache
# This would require knowing which exchange to get data from
pass
except Exception as e:
logger.warning(f"Error sending initial data to {connection_id}: {e}")
def get_stats(self) -> Dict[str, Any]:
"""Get WebSocket manager statistics"""
total_subscriptions = sum(len(subs) for subs in self.subscriptions.values())
return {
'active_connections': len(self.connections),
'total_subscriptions': total_subscriptions,
'unique_symbols': len(set(key.split(':')[0] for key in self.subscriptions.keys())),
'connection_counter': self.connection_counter
}
# Global WebSocket manager instance
websocket_manager = WebSocketManager()
class WebSocketServer:
"""
WebSocket server for real-time data streaming.
"""
def __init__(self):
"""Initialize WebSocket server"""
self.manager = websocket_manager
logger.info("WebSocket server initialized")
async def handle_connection(self, websocket: WebSocket, client_ip: str) -> None:
"""
Handle WebSocket connection lifecycle.
Args:
websocket: WebSocket connection
client_ip: Client IP address
"""
connection_id = None
try:
# Accept connection
connection_id = await self.manager.connect(websocket, client_ip)
# Handle messages
while True:
try:
# Receive message
message = await websocket.receive_text()
await self._handle_message(connection_id, message)
except WebSocketDisconnect:
logger.info(f"WebSocket client disconnected: {connection_id}")
break
except Exception as e:
logger.error(f"WebSocket connection error: {e}")
finally:
# Clean up connection
if connection_id:
await self.manager.disconnect(connection_id)
async def _handle_message(self, connection_id: str, message: str) -> None:
"""
Handle incoming WebSocket message.
Args:
connection_id: Connection ID
message: Received message
"""
try:
# Parse message
data = json.loads(message)
action = data.get('action')
if action == 'subscribe':
symbol = data.get('symbol')
data_type = data.get('data_type', 'heatmap')
await self.manager.subscribe(connection_id, symbol, data_type)
elif action == 'unsubscribe':
symbol = data.get('symbol')
data_type = data.get('data_type', 'heatmap')
await self.manager.unsubscribe(connection_id, symbol, data_type)
elif action == 'ping':
# Send pong response
pong_msg = self.manager.response_formatter.success(
data={'action': 'pong'},
message="Pong"
)
await self.manager._send_to_connection(connection_id, pong_msg)
else:
# Unknown action
error_msg = self.manager.response_formatter.error(
f"Unknown action: {action}",
"UNKNOWN_ACTION"
)
await self.manager._send_to_connection(connection_id, error_msg)
except json.JSONDecodeError:
error_msg = self.manager.response_formatter.error(
"Invalid JSON message",
"INVALID_JSON"
)
await self.manager._send_to_connection(connection_id, error_msg)
except Exception as e:
logger.error(f"Error handling WebSocket message: {e}")
error_msg = self.manager.response_formatter.error(
"Message processing failed",
"MESSAGE_ERROR"
)
await self.manager._send_to_connection(connection_id, error_msg)

13
COBY/caching/__init__.py Normal file
View File

@ -0,0 +1,13 @@
"""
Caching layer for the COBY system.
"""
from .redis_manager import RedisManager
from .cache_keys import CacheKeys
from .data_serializer import DataSerializer
__all__ = [
'RedisManager',
'CacheKeys',
'DataSerializer'
]

278
COBY/caching/cache_keys.py Normal file
View File

@ -0,0 +1,278 @@
"""
Cache key management for Redis operations.
"""
from typing import Optional
from ..utils.logging import get_logger
logger = get_logger(__name__)
class CacheKeys:
"""
Centralized cache key management for consistent Redis operations.
Provides standardized key patterns for different data types.
"""
# Key prefixes
ORDERBOOK_PREFIX = "ob"
HEATMAP_PREFIX = "hm"
TRADE_PREFIX = "tr"
METRICS_PREFIX = "mt"
STATUS_PREFIX = "st"
STATS_PREFIX = "stats"
# TTL values (seconds)
ORDERBOOK_TTL = 60 # 1 minute
HEATMAP_TTL = 30 # 30 seconds
TRADE_TTL = 300 # 5 minutes
METRICS_TTL = 120 # 2 minutes
STATUS_TTL = 60 # 1 minute
STATS_TTL = 300 # 5 minutes
@classmethod
def orderbook_key(cls, symbol: str, exchange: str) -> str:
"""
Generate cache key for order book data.
Args:
symbol: Trading symbol
exchange: Exchange name
Returns:
str: Cache key
"""
return f"{cls.ORDERBOOK_PREFIX}:{exchange}:{symbol}"
@classmethod
def heatmap_key(cls, symbol: str, bucket_size: float = 1.0,
exchange: Optional[str] = None) -> str:
"""
Generate cache key for heatmap data.
Args:
symbol: Trading symbol
bucket_size: Price bucket size
exchange: Exchange name (None for consolidated)
Returns:
str: Cache key
"""
if exchange:
return f"{cls.HEATMAP_PREFIX}:{exchange}:{symbol}:{bucket_size}"
else:
return f"{cls.HEATMAP_PREFIX}:consolidated:{symbol}:{bucket_size}"
@classmethod
def trade_key(cls, symbol: str, exchange: str, trade_id: str) -> str:
"""
Generate cache key for trade data.
Args:
symbol: Trading symbol
exchange: Exchange name
trade_id: Trade identifier
Returns:
str: Cache key
"""
return f"{cls.TRADE_PREFIX}:{exchange}:{symbol}:{trade_id}"
@classmethod
def metrics_key(cls, symbol: str, exchange: str) -> str:
"""
Generate cache key for metrics data.
Args:
symbol: Trading symbol
exchange: Exchange name
Returns:
str: Cache key
"""
return f"{cls.METRICS_PREFIX}:{exchange}:{symbol}"
@classmethod
def status_key(cls, exchange: str) -> str:
"""
Generate cache key for exchange status.
Args:
exchange: Exchange name
Returns:
str: Cache key
"""
return f"{cls.STATUS_PREFIX}:{exchange}"
@classmethod
def stats_key(cls, component: str) -> str:
"""
Generate cache key for component statistics.
Args:
component: Component name
Returns:
str: Cache key
"""
return f"{cls.STATS_PREFIX}:{component}"
@classmethod
def latest_heatmaps_key(cls, symbol: str) -> str:
"""
Generate cache key for latest heatmaps list.
Args:
symbol: Trading symbol
Returns:
str: Cache key
"""
return f"{cls.HEATMAP_PREFIX}:latest:{symbol}"
@classmethod
def symbol_list_key(cls, exchange: str) -> str:
"""
Generate cache key for symbol list.
Args:
exchange: Exchange name
Returns:
str: Cache key
"""
return f"symbols:{exchange}"
@classmethod
def price_bucket_key(cls, symbol: str, exchange: str) -> str:
"""
Generate cache key for price buckets.
Args:
symbol: Trading symbol
exchange: Exchange name
Returns:
str: Cache key
"""
return f"buckets:{exchange}:{symbol}"
@classmethod
def arbitrage_key(cls, symbol: str) -> str:
"""
Generate cache key for arbitrage opportunities.
Args:
symbol: Trading symbol
Returns:
str: Cache key
"""
return f"arbitrage:{symbol}"
@classmethod
def get_ttl(cls, key: str) -> int:
"""
Get appropriate TTL for a cache key.
Args:
key: Cache key
Returns:
int: TTL in seconds
"""
if key.startswith(cls.ORDERBOOK_PREFIX):
return cls.ORDERBOOK_TTL
elif key.startswith(cls.HEATMAP_PREFIX):
return cls.HEATMAP_TTL
elif key.startswith(cls.TRADE_PREFIX):
return cls.TRADE_TTL
elif key.startswith(cls.METRICS_PREFIX):
return cls.METRICS_TTL
elif key.startswith(cls.STATUS_PREFIX):
return cls.STATUS_TTL
elif key.startswith(cls.STATS_PREFIX):
return cls.STATS_TTL
else:
return 300 # Default 5 minutes
@classmethod
def parse_key(cls, key: str) -> dict:
"""
Parse cache key to extract components.
Args:
key: Cache key to parse
Returns:
dict: Parsed key components
"""
parts = key.split(':')
if len(parts) < 2:
return {'type': 'unknown', 'key': key}
key_type = parts[0]
if key_type == cls.ORDERBOOK_PREFIX and len(parts) >= 3:
return {
'type': 'orderbook',
'exchange': parts[1],
'symbol': parts[2]
}
elif key_type == cls.HEATMAP_PREFIX and len(parts) >= 4:
return {
'type': 'heatmap',
'exchange': parts[1] if parts[1] != 'consolidated' else None,
'symbol': parts[2],
'bucket_size': float(parts[3]) if len(parts) > 3 else 1.0
}
elif key_type == cls.TRADE_PREFIX and len(parts) >= 4:
return {
'type': 'trade',
'exchange': parts[1],
'symbol': parts[2],
'trade_id': parts[3]
}
elif key_type == cls.METRICS_PREFIX and len(parts) >= 3:
return {
'type': 'metrics',
'exchange': parts[1],
'symbol': parts[2]
}
elif key_type == cls.STATUS_PREFIX and len(parts) >= 2:
return {
'type': 'status',
'exchange': parts[1]
}
elif key_type == cls.STATS_PREFIX and len(parts) >= 2:
return {
'type': 'stats',
'component': parts[1]
}
else:
return {'type': 'unknown', 'key': key}
@classmethod
def get_pattern(cls, key_type: str) -> str:
"""
Get Redis pattern for key type.
Args:
key_type: Type of key
Returns:
str: Redis pattern
"""
patterns = {
'orderbook': f"{cls.ORDERBOOK_PREFIX}:*",
'heatmap': f"{cls.HEATMAP_PREFIX}:*",
'trade': f"{cls.TRADE_PREFIX}:*",
'metrics': f"{cls.METRICS_PREFIX}:*",
'status': f"{cls.STATUS_PREFIX}:*",
'stats': f"{cls.STATS_PREFIX}:*"
}
return patterns.get(key_type, "*")

View File

@ -0,0 +1,355 @@
"""
Data serialization for Redis caching.
"""
import json
import pickle
import gzip
from typing import Any, Union, Dict, List
from datetime import datetime
from ..models.core import (
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook
)
from ..utils.logging import get_logger
from ..utils.exceptions import ProcessingError
logger = get_logger(__name__)
class DataSerializer:
"""
Handles serialization and deserialization of data for Redis storage.
Supports multiple serialization formats:
- JSON for simple data
- Pickle for complex objects
- Compressed formats for large data
"""
def __init__(self, use_compression: bool = True):
"""
Initialize data serializer.
Args:
use_compression: Whether to use gzip compression
"""
self.use_compression = use_compression
self.serialization_stats = {
'serialized': 0,
'deserialized': 0,
'compression_ratio': 0.0,
'errors': 0
}
logger.info(f"Data serializer initialized (compression: {use_compression})")
def serialize(self, data: Any, format_type: str = 'auto') -> bytes:
"""
Serialize data for Redis storage.
Args:
data: Data to serialize
format_type: Serialization format ('json', 'pickle', 'auto')
Returns:
bytes: Serialized data
"""
try:
# Determine format
if format_type == 'auto':
format_type = self._determine_format(data)
# Serialize based on format
if format_type == 'json':
serialized = self._serialize_json(data)
elif format_type == 'pickle':
serialized = self._serialize_pickle(data)
else:
raise ValueError(f"Unsupported format: {format_type}")
# Apply compression if enabled
if self.use_compression:
original_size = len(serialized)
serialized = gzip.compress(serialized)
compressed_size = len(serialized)
# Update compression ratio
if original_size > 0:
ratio = compressed_size / original_size
self.serialization_stats['compression_ratio'] = (
(self.serialization_stats['compression_ratio'] *
self.serialization_stats['serialized'] + ratio) /
(self.serialization_stats['serialized'] + 1)
)
self.serialization_stats['serialized'] += 1
return serialized
except Exception as e:
self.serialization_stats['errors'] += 1
logger.error(f"Serialization error: {e}")
raise ProcessingError(f"Serialization failed: {e}", "SERIALIZE_ERROR")
def deserialize(self, data: bytes, format_type: str = 'auto') -> Any:
"""
Deserialize data from Redis storage.
Args:
data: Serialized data
format_type: Expected format ('json', 'pickle', 'auto')
Returns:
Any: Deserialized data
"""
try:
# Decompress if needed
if self.use_compression:
try:
data = gzip.decompress(data)
except gzip.BadGzipFile:
# Data might not be compressed
pass
# Determine format if auto
if format_type == 'auto':
format_type = self._detect_format(data)
# Deserialize based on format
if format_type == 'json':
result = self._deserialize_json(data)
elif format_type == 'pickle':
result = self._deserialize_pickle(data)
else:
raise ValueError(f"Unsupported format: {format_type}")
self.serialization_stats['deserialized'] += 1
return result
except Exception as e:
self.serialization_stats['errors'] += 1
logger.error(f"Deserialization error: {e}")
raise ProcessingError(f"Deserialization failed: {e}", "DESERIALIZE_ERROR")
def _determine_format(self, data: Any) -> str:
"""Determine best serialization format for data"""
# Use JSON for simple data types
if isinstance(data, (dict, list, str, int, float, bool)) or data is None:
return 'json'
# Use pickle for complex objects
return 'pickle'
def _detect_format(self, data: bytes) -> str:
"""Detect serialization format from data"""
try:
# Try JSON first
json.loads(data.decode('utf-8'))
return 'json'
except (json.JSONDecodeError, UnicodeDecodeError):
# Assume pickle
return 'pickle'
def _serialize_json(self, data: Any) -> bytes:
"""Serialize data as JSON"""
# Convert complex objects to dictionaries
if hasattr(data, '__dict__'):
data = self._object_to_dict(data)
elif isinstance(data, list):
data = [self._object_to_dict(item) if hasattr(item, '__dict__') else item
for item in data]
json_str = json.dumps(data, default=self._json_serializer, ensure_ascii=False)
return json_str.encode('utf-8')
def _deserialize_json(self, data: bytes) -> Any:
"""Deserialize JSON data"""
json_str = data.decode('utf-8')
return json.loads(json_str, object_hook=self._json_deserializer)
def _serialize_pickle(self, data: Any) -> bytes:
"""Serialize data as pickle"""
return pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
def _deserialize_pickle(self, data: bytes) -> Any:
"""Deserialize pickle data"""
return pickle.loads(data)
def _object_to_dict(self, obj: Any) -> Dict:
"""Convert object to dictionary for JSON serialization"""
if isinstance(obj, (OrderBookSnapshot, TradeEvent, HeatmapData,
PriceBuckets, OrderBookMetrics, ImbalanceMetrics,
ConsolidatedOrderBook)):
result = {
'__type__': obj.__class__.__name__,
'__data__': {}
}
# Convert object attributes
for key, value in obj.__dict__.items():
if isinstance(value, datetime):
result['__data__'][key] = {
'__datetime__': value.isoformat()
}
elif isinstance(value, list):
result['__data__'][key] = [
self._object_to_dict(item) if hasattr(item, '__dict__') else item
for item in value
]
elif hasattr(value, '__dict__'):
result['__data__'][key] = self._object_to_dict(value)
else:
result['__data__'][key] = value
return result
else:
return obj.__dict__ if hasattr(obj, '__dict__') else obj
def _json_serializer(self, obj: Any) -> Any:
"""Custom JSON serializer for special types"""
if isinstance(obj, datetime):
return {'__datetime__': obj.isoformat()}
elif hasattr(obj, '__dict__'):
return self._object_to_dict(obj)
else:
return str(obj)
def _json_deserializer(self, obj: Dict) -> Any:
"""Custom JSON deserializer for special types"""
if '__datetime__' in obj:
return datetime.fromisoformat(obj['__datetime__'])
elif '__type__' in obj and '__data__' in obj:
return self._reconstruct_object(obj['__type__'], obj['__data__'])
else:
return obj
def _reconstruct_object(self, type_name: str, data: Dict) -> Any:
"""Reconstruct object from serialized data"""
# Import required classes
from ..models.core import (
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook,
PriceLevel, HeatmapPoint
)
# Map type names to classes
type_map = {
'OrderBookSnapshot': OrderBookSnapshot,
'TradeEvent': TradeEvent,
'HeatmapData': HeatmapData,
'PriceBuckets': PriceBuckets,
'OrderBookMetrics': OrderBookMetrics,
'ImbalanceMetrics': ImbalanceMetrics,
'ConsolidatedOrderBook': ConsolidatedOrderBook,
'PriceLevel': PriceLevel,
'HeatmapPoint': HeatmapPoint
}
if type_name in type_map:
cls = type_map[type_name]
# Recursively deserialize nested objects
processed_data = {}
for key, value in data.items():
if isinstance(value, dict) and '__datetime__' in value:
processed_data[key] = datetime.fromisoformat(value['__datetime__'])
elif isinstance(value, dict) and '__type__' in value:
processed_data[key] = self._reconstruct_object(
value['__type__'], value['__data__']
)
elif isinstance(value, list):
processed_data[key] = [
self._reconstruct_object(item['__type__'], item['__data__'])
if isinstance(item, dict) and '__type__' in item
else item
for item in value
]
else:
processed_data[key] = value
try:
return cls(**processed_data)
except Exception as e:
logger.warning(f"Failed to reconstruct {type_name}: {e}")
return processed_data
else:
logger.warning(f"Unknown type for reconstruction: {type_name}")
return data
def serialize_heatmap(self, heatmap: HeatmapData) -> bytes:
"""Specialized serialization for heatmap data"""
try:
# Create optimized representation
heatmap_dict = {
'symbol': heatmap.symbol,
'timestamp': heatmap.timestamp.isoformat(),
'bucket_size': heatmap.bucket_size,
'points': [
{
'p': point.price, # price
'v': point.volume, # volume
'i': point.intensity, # intensity
's': point.side # side
}
for point in heatmap.data
]
}
return self.serialize(heatmap_dict, 'json')
except Exception as e:
logger.error(f"Heatmap serialization error: {e}")
# Fallback to standard serialization
return self.serialize(heatmap, 'pickle')
def deserialize_heatmap(self, data: bytes) -> HeatmapData:
"""Specialized deserialization for heatmap data"""
try:
# Try optimized format first
heatmap_dict = self.deserialize(data, 'json')
if isinstance(heatmap_dict, dict) and 'points' in heatmap_dict:
from ..models.core import HeatmapData, HeatmapPoint
# Reconstruct heatmap points
points = []
for point_data in heatmap_dict['points']:
point = HeatmapPoint(
price=point_data['p'],
volume=point_data['v'],
intensity=point_data['i'],
side=point_data['s']
)
points.append(point)
# Create heatmap
heatmap = HeatmapData(
symbol=heatmap_dict['symbol'],
timestamp=datetime.fromisoformat(heatmap_dict['timestamp']),
bucket_size=heatmap_dict['bucket_size']
)
heatmap.data = points
return heatmap
else:
# Fallback to standard deserialization
return self.deserialize(data, 'pickle')
except Exception as e:
logger.error(f"Heatmap deserialization error: {e}")
# Final fallback
return self.deserialize(data, 'pickle')
def get_stats(self) -> Dict[str, Any]:
"""Get serialization statistics"""
return self.serialization_stats.copy()
def reset_stats(self) -> None:
"""Reset serialization statistics"""
self.serialization_stats = {
'serialized': 0,
'deserialized': 0,
'compression_ratio': 0.0,
'errors': 0
}
logger.info("Serialization statistics reset")

View File

@ -0,0 +1,691 @@
"""
Redis cache manager for high-performance data access.
"""
import asyncio
import redis.asyncio as redis
from typing import Any, Optional, List, Dict, Union
from datetime import datetime, timedelta
from ..config import config
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import StorageError
from ..utils.timing import get_current_timestamp
from .cache_keys import CacheKeys
from .data_serializer import DataSerializer
logger = get_logger(__name__)
class RedisManager:
"""
High-performance Redis cache manager for market data.
Provides:
- Connection pooling and management
- Data serialization and compression
- TTL management
- Batch operations
- Performance monitoring
"""
def __init__(self):
"""Initialize Redis manager"""
self.redis_pool: Optional[redis.ConnectionPool] = None
self.redis_client: Optional[redis.Redis] = None
self.serializer = DataSerializer(use_compression=True)
self.cache_keys = CacheKeys()
# Performance statistics
self.stats = {
'gets': 0,
'sets': 0,
'deletes': 0,
'hits': 0,
'misses': 0,
'errors': 0,
'total_data_size': 0,
'avg_response_time': 0.0
}
logger.info("Redis manager initialized")
async def initialize(self) -> None:
"""Initialize Redis connection pool"""
try:
# Create connection pool
self.redis_pool = redis.ConnectionPool(
host=config.redis.host,
port=config.redis.port,
password=config.redis.password,
db=config.redis.db,
max_connections=config.redis.max_connections,
socket_timeout=config.redis.socket_timeout,
socket_connect_timeout=config.redis.socket_connect_timeout,
decode_responses=False, # We handle bytes directly
retry_on_timeout=True,
health_check_interval=30
)
# Create Redis client
self.redis_client = redis.Redis(connection_pool=self.redis_pool)
# Test connection
await self.redis_client.ping()
logger.info(f"Redis connection established: {config.redis.host}:{config.redis.port}")
except Exception as e:
logger.error(f"Failed to initialize Redis connection: {e}")
raise StorageError(f"Redis initialization failed: {e}", "REDIS_INIT_ERROR")
async def close(self) -> None:
"""Close Redis connections"""
try:
if self.redis_client:
await self.redis_client.close()
if self.redis_pool:
await self.redis_pool.disconnect()
logger.info("Redis connections closed")
except Exception as e:
logger.warning(f"Error closing Redis connections: {e}")
async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool:
"""
Set value in cache with optional TTL.
Args:
key: Cache key
value: Value to cache
ttl: Time to live in seconds (None = use default)
Returns:
bool: True if successful, False otherwise
"""
try:
set_correlation_id()
start_time = asyncio.get_event_loop().time()
# Serialize value
serialized_value = self.serializer.serialize(value)
# Determine TTL
if ttl is None:
ttl = self.cache_keys.get_ttl(key)
# Set in Redis
result = await self.redis_client.setex(key, ttl, serialized_value)
# Update statistics
self.stats['sets'] += 1
self.stats['total_data_size'] += len(serialized_value)
# Update response time
response_time = asyncio.get_event_loop().time() - start_time
self._update_avg_response_time(response_time)
logger.debug(f"Cached data: {key} (size: {len(serialized_value)} bytes, ttl: {ttl}s)")
return bool(result)
except Exception as e:
self.stats['errors'] += 1
logger.error(f"Error setting cache key {key}: {e}")
return False
async def get(self, key: str) -> Optional[Any]:
"""
Get value from cache.
Args:
key: Cache key
Returns:
Any: Cached value or None if not found
"""
try:
set_correlation_id()
start_time = asyncio.get_event_loop().time()
# Get from Redis
serialized_value = await self.redis_client.get(key)
# Update statistics
self.stats['gets'] += 1
if serialized_value is None:
self.stats['misses'] += 1
logger.debug(f"Cache miss: {key}")
return None
# Deserialize value
value = self.serializer.deserialize(serialized_value)
# Update statistics
self.stats['hits'] += 1
# Update response time
response_time = asyncio.get_event_loop().time() - start_time
self._update_avg_response_time(response_time)
logger.debug(f"Cache hit: {key} (size: {len(serialized_value)} bytes)")
return value
except Exception as e:
self.stats['errors'] += 1
logger.error(f"Error getting cache key {key}: {e}")
return None
async def delete(self, key: str) -> bool:
"""
Delete key from cache.
Args:
key: Cache key to delete
Returns:
bool: True if deleted, False otherwise
"""
try:
set_correlation_id()
result = await self.redis_client.delete(key)
self.stats['deletes'] += 1
logger.debug(f"Deleted cache key: {key}")
return bool(result)
except Exception as e:
self.stats['errors'] += 1
logger.error(f"Error deleting cache key {key}: {e}")
return False
async def exists(self, key: str) -> bool:
"""
Check if key exists in cache.
Args:
key: Cache key to check
Returns:
bool: True if exists, False otherwise
"""
try:
result = await self.redis_client.exists(key)
return bool(result)
except Exception as e:
logger.error(f"Error checking cache key existence {key}: {e}")
return False
async def expire(self, key: str, ttl: int) -> bool:
"""
Set expiration time for key.
Args:
key: Cache key
ttl: Time to live in seconds
Returns:
bool: True if successful, False otherwise
"""
try:
result = await self.redis_client.expire(key, ttl)
return bool(result)
except Exception as e:
logger.error(f"Error setting expiration for key {key}: {e}")
return False
async def mget(self, keys: List[str]) -> List[Optional[Any]]:
"""
Get multiple values from cache.
Args:
keys: List of cache keys
Returns:
List[Optional[Any]]: List of values (None for missing keys)
"""
try:
set_correlation_id()
start_time = asyncio.get_event_loop().time()
# Get from Redis
serialized_values = await self.redis_client.mget(keys)
# Deserialize values
values = []
for serialized_value in serialized_values:
if serialized_value is None:
values.append(None)
self.stats['misses'] += 1
else:
try:
value = self.serializer.deserialize(serialized_value)
values.append(value)
self.stats['hits'] += 1
except Exception as e:
logger.warning(f"Error deserializing value: {e}")
values.append(None)
self.stats['errors'] += 1
# Update statistics
self.stats['gets'] += len(keys)
# Update response time
response_time = asyncio.get_event_loop().time() - start_time
self._update_avg_response_time(response_time)
logger.debug(f"Multi-get: {len(keys)} keys, {sum(1 for v in values if v is not None)} hits")
return values
except Exception as e:
self.stats['errors'] += 1
logger.error(f"Error in multi-get: {e}")
return [None] * len(keys)
async def mset(self, key_value_pairs: Dict[str, Any], ttl: Optional[int] = None) -> bool:
"""
Set multiple key-value pairs.
Args:
key_value_pairs: Dictionary of key-value pairs
ttl: Time to live in seconds (None = use default per key)
Returns:
bool: True if successful, False otherwise
"""
try:
set_correlation_id()
# Serialize all values
serialized_pairs = {}
for key, value in key_value_pairs.items():
serialized_value = self.serializer.serialize(value)
serialized_pairs[key] = serialized_value
self.stats['total_data_size'] += len(serialized_value)
# Set in Redis
result = await self.redis_client.mset(serialized_pairs)
# Set TTL for each key if specified
if ttl is not None:
for key in key_value_pairs.keys():
await self.redis_client.expire(key, ttl)
else:
# Use individual TTLs
for key in key_value_pairs.keys():
key_ttl = self.cache_keys.get_ttl(key)
await self.redis_client.expire(key, key_ttl)
self.stats['sets'] += len(key_value_pairs)
logger.debug(f"Multi-set: {len(key_value_pairs)} keys")
return bool(result)
except Exception as e:
self.stats['errors'] += 1
logger.error(f"Error in multi-set: {e}")
return False
async def keys(self, pattern: str) -> List[str]:
"""
Get keys matching pattern.
Args:
pattern: Redis pattern (e.g., "hm:*")
Returns:
List[str]: List of matching keys
"""
try:
keys = await self.redis_client.keys(pattern)
return [key.decode('utf-8') if isinstance(key, bytes) else key for key in keys]
except Exception as e:
logger.error(f"Error getting keys with pattern {pattern}: {e}")
return []
async def flushdb(self) -> bool:
"""
Clear all keys in current database.
Returns:
bool: True if successful, False otherwise
"""
try:
result = await self.redis_client.flushdb()
logger.info("Redis database flushed")
return bool(result)
except Exception as e:
logger.error(f"Error flushing Redis database: {e}")
return False
async def info(self) -> Dict[str, Any]:
"""
Get Redis server information.
Returns:
Dict: Redis server info
"""
try:
info = await self.redis_client.info()
return info
except Exception as e:
logger.error(f"Error getting Redis info: {e}")
return {}
async def ping(self) -> bool:
"""
Ping Redis server.
Returns:
bool: True if server responds, False otherwise
"""
try:
result = await self.redis_client.ping()
return bool(result)
except Exception as e:
logger.error(f"Redis ping failed: {e}")
return False
async def set_heatmap(self, symbol: str, heatmap_data,
exchange: Optional[str] = None, ttl: Optional[int] = None) -> bool:
"""
Cache heatmap data with optimized serialization.
Args:
symbol: Trading symbol
heatmap_data: Heatmap data to cache
exchange: Exchange name (None for consolidated)
ttl: Time to live in seconds
Returns:
bool: True if successful, False otherwise
"""
try:
key = self.cache_keys.heatmap_key(symbol, 1.0, exchange)
# Use specialized heatmap serialization
serialized_value = self.serializer.serialize_heatmap(heatmap_data)
# Determine TTL
if ttl is None:
ttl = self.cache_keys.HEATMAP_TTL
# Set in Redis
result = await self.redis_client.setex(key, ttl, serialized_value)
# Update statistics
self.stats['sets'] += 1
self.stats['total_data_size'] += len(serialized_value)
logger.debug(f"Cached heatmap: {key} (size: {len(serialized_value)} bytes)")
return bool(result)
except Exception as e:
self.stats['errors'] += 1
logger.error(f"Error caching heatmap for {symbol}: {e}")
return False
async def get_heatmap(self, symbol: str, exchange: Optional[str] = None):
"""
Get cached heatmap data with optimized deserialization.
Args:
symbol: Trading symbol
exchange: Exchange name (None for consolidated)
Returns:
HeatmapData: Cached heatmap or None if not found
"""
try:
key = self.cache_keys.heatmap_key(symbol, 1.0, exchange)
# Get from Redis
serialized_value = await self.redis_client.get(key)
self.stats['gets'] += 1
if serialized_value is None:
self.stats['misses'] += 1
return None
# Use specialized heatmap deserialization
heatmap_data = self.serializer.deserialize_heatmap(serialized_value)
self.stats['hits'] += 1
logger.debug(f"Retrieved heatmap: {key}")
return heatmap_data
except Exception as e:
self.stats['errors'] += 1
logger.error(f"Error retrieving heatmap for {symbol}: {e}")
return None
async def cache_orderbook(self, orderbook) -> bool:
"""
Cache order book data.
Args:
orderbook: OrderBookSnapshot to cache
Returns:
bool: True if successful, False otherwise
"""
try:
key = self.cache_keys.orderbook_key(orderbook.symbol, orderbook.exchange)
return await self.set(key, orderbook)
except Exception as e:
logger.error(f"Error caching order book: {e}")
return False
async def get_orderbook(self, symbol: str, exchange: str):
"""
Get cached order book data.
Args:
symbol: Trading symbol
exchange: Exchange name
Returns:
OrderBookSnapshot: Cached order book or None if not found
"""
try:
key = self.cache_keys.orderbook_key(symbol, exchange)
return await self.get(key)
except Exception as e:
logger.error(f"Error retrieving order book: {e}")
return None
async def cache_metrics(self, metrics, symbol: str, exchange: str) -> bool:
"""
Cache metrics data.
Args:
metrics: Metrics data to cache
symbol: Trading symbol
exchange: Exchange name
Returns:
bool: True if successful, False otherwise
"""
try:
key = self.cache_keys.metrics_key(symbol, exchange)
return await self.set(key, metrics)
except Exception as e:
logger.error(f"Error caching metrics: {e}")
return False
async def get_metrics(self, symbol: str, exchange: str):
"""
Get cached metrics data.
Args:
symbol: Trading symbol
exchange: Exchange name
Returns:
Metrics data or None if not found
"""
try:
key = self.cache_keys.metrics_key(symbol, exchange)
return await self.get(key)
except Exception as e:
logger.error(f"Error retrieving metrics: {e}")
return None
async def cache_exchange_status(self, exchange: str, status_data) -> bool:
"""
Cache exchange status.
Args:
exchange: Exchange name
status_data: Status data to cache
Returns:
bool: True if successful, False otherwise
"""
try:
key = self.cache_keys.status_key(exchange)
return await self.set(key, status_data)
except Exception as e:
logger.error(f"Error caching exchange status: {e}")
return False
async def get_exchange_status(self, exchange: str):
"""
Get cached exchange status.
Args:
exchange: Exchange name
Returns:
Status data or None if not found
"""
try:
key = self.cache_keys.status_key(exchange)
return await self.get(key)
except Exception as e:
logger.error(f"Error retrieving exchange status: {e}")
return None
async def cleanup_expired_keys(self) -> int:
"""
Clean up expired keys (Redis handles this automatically, but we can force it).
Returns:
int: Number of keys cleaned up
"""
try:
# Get all keys
all_keys = await self.keys("*")
# Check which ones are expired
expired_count = 0
for key in all_keys:
ttl = await self.redis_client.ttl(key)
if ttl == -2: # Key doesn't exist (expired)
expired_count += 1
logger.debug(f"Found {expired_count} expired keys")
return expired_count
except Exception as e:
logger.error(f"Error cleaning up expired keys: {e}")
return 0
def _update_avg_response_time(self, response_time: float) -> None:
"""Update average response time"""
total_operations = self.stats['gets'] + self.stats['sets']
if total_operations > 0:
self.stats['avg_response_time'] = (
(self.stats['avg_response_time'] * (total_operations - 1) + response_time) /
total_operations
)
def get_stats(self) -> Dict[str, Any]:
"""Get cache statistics"""
total_operations = self.stats['gets'] + self.stats['sets']
hit_rate = (self.stats['hits'] / max(self.stats['gets'], 1)) * 100
return {
**self.stats,
'total_operations': total_operations,
'hit_rate_percentage': hit_rate,
'serializer_stats': self.serializer.get_stats()
}
def reset_stats(self) -> None:
"""Reset cache statistics"""
self.stats = {
'gets': 0,
'sets': 0,
'deletes': 0,
'hits': 0,
'misses': 0,
'errors': 0,
'total_data_size': 0,
'avg_response_time': 0.0
}
self.serializer.reset_stats()
logger.info("Redis manager statistics reset")
async def health_check(self) -> Dict[str, Any]:
"""
Perform comprehensive health check.
Returns:
Dict: Health check results
"""
health = {
'redis_ping': False,
'connection_pool_size': 0,
'memory_usage': 0,
'connected_clients': 0,
'total_keys': 0,
'hit_rate': 0.0,
'avg_response_time': self.stats['avg_response_time']
}
try:
# Test ping
health['redis_ping'] = await self.ping()
# Get Redis info
info = await self.info()
if info:
health['memory_usage'] = info.get('used_memory', 0)
health['connected_clients'] = info.get('connected_clients', 0)
# Get key count
all_keys = await self.keys("*")
health['total_keys'] = len(all_keys)
# Calculate hit rate
if self.stats['gets'] > 0:
health['hit_rate'] = (self.stats['hits'] / self.stats['gets']) * 100
# Connection pool info
if self.redis_pool:
health['connection_pool_size'] = self.redis_pool.max_connections
except Exception as e:
logger.error(f"Health check error: {e}")
return health
# Global Redis manager instance
redis_manager = RedisManager()

167
COBY/config.py Normal file
View File

@ -0,0 +1,167 @@
"""
Configuration management for the multi-exchange data aggregation system.
"""
import os
from dataclasses import dataclass, field
from typing import List, Dict, Any
from pathlib import Path
@dataclass
class DatabaseConfig:
"""Database configuration settings"""
host: str = os.getenv('DB_HOST', '192.168.0.10')
port: int = int(os.getenv('DB_PORT', '5432'))
name: str = os.getenv('DB_NAME', 'market_data')
user: str = os.getenv('DB_USER', 'market_user')
password: str = os.getenv('DB_PASSWORD', 'market_data_secure_pass_2024')
schema: str = os.getenv('DB_SCHEMA', 'market_data')
pool_size: int = int(os.getenv('DB_POOL_SIZE', '10'))
max_overflow: int = int(os.getenv('DB_MAX_OVERFLOW', '20'))
pool_timeout: int = int(os.getenv('DB_POOL_TIMEOUT', '30'))
@dataclass
class RedisConfig:
"""Redis configuration settings"""
host: str = os.getenv('REDIS_HOST', '192.168.0.10')
port: int = int(os.getenv('REDIS_PORT', '6379'))
password: str = os.getenv('REDIS_PASSWORD', 'market_data_redis_2024')
db: int = int(os.getenv('REDIS_DB', '0'))
max_connections: int = int(os.getenv('REDIS_MAX_CONNECTIONS', '50'))
socket_timeout: int = int(os.getenv('REDIS_SOCKET_TIMEOUT', '5'))
socket_connect_timeout: int = int(os.getenv('REDIS_CONNECT_TIMEOUT', '5'))
@dataclass
class ExchangeConfig:
"""Exchange configuration settings"""
exchanges: List[str] = field(default_factory=lambda: [
'binance', 'coinbase', 'kraken', 'bybit', 'okx',
'huobi', 'kucoin', 'gateio', 'bitfinex', 'mexc'
])
symbols: List[str] = field(default_factory=lambda: ['BTCUSDT', 'ETHUSDT'])
max_connections_per_exchange: int = int(os.getenv('MAX_CONNECTIONS_PER_EXCHANGE', '5'))
reconnect_delay: int = int(os.getenv('RECONNECT_DELAY', '5'))
max_reconnect_attempts: int = int(os.getenv('MAX_RECONNECT_ATTEMPTS', '10'))
heartbeat_interval: int = int(os.getenv('HEARTBEAT_INTERVAL', '30'))
@dataclass
class AggregationConfig:
"""Data aggregation configuration"""
bucket_size: float = float(os.getenv('BUCKET_SIZE', '1.0')) # $1 USD buckets for all symbols
heatmap_depth: int = int(os.getenv('HEATMAP_DEPTH', '50')) # Number of price levels
update_frequency: float = float(os.getenv('UPDATE_FREQUENCY', '0.5')) # Seconds
volume_threshold: float = float(os.getenv('VOLUME_THRESHOLD', '0.01')) # Minimum volume
@dataclass
class PerformanceConfig:
"""Performance and optimization settings"""
data_buffer_size: int = int(os.getenv('DATA_BUFFER_SIZE', '10000'))
batch_write_size: int = int(os.getenv('BATCH_WRITE_SIZE', '1000'))
max_memory_usage: int = int(os.getenv('MAX_MEMORY_USAGE', '2048')) # MB
gc_threshold: float = float(os.getenv('GC_THRESHOLD', '0.8')) # 80% of max memory
processing_timeout: int = int(os.getenv('PROCESSING_TIMEOUT', '10')) # Seconds
max_queue_size: int = int(os.getenv('MAX_QUEUE_SIZE', '50000'))
@dataclass
class APIConfig:
"""API server configuration"""
host: str = os.getenv('API_HOST', '0.0.0.0')
port: int = int(os.getenv('API_PORT', '8080'))
websocket_port: int = int(os.getenv('WS_PORT', '8081'))
cors_origins: List[str] = field(default_factory=lambda: ['*'])
rate_limit: int = int(os.getenv('RATE_LIMIT', '100')) # Requests per minute
max_connections: int = int(os.getenv('MAX_WS_CONNECTIONS', '1000'))
@dataclass
class LoggingConfig:
"""Logging configuration"""
level: str = os.getenv('LOG_LEVEL', 'INFO')
format: str = os.getenv('LOG_FORMAT', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_path: str = os.getenv('LOG_FILE', 'logs/coby.log')
max_file_size: int = int(os.getenv('LOG_MAX_SIZE', '100')) # MB
backup_count: int = int(os.getenv('LOG_BACKUP_COUNT', '5'))
enable_correlation_id: bool = os.getenv('ENABLE_CORRELATION_ID', 'true').lower() == 'true'
@dataclass
class Config:
"""Main configuration class"""
database: DatabaseConfig = field(default_factory=DatabaseConfig)
redis: RedisConfig = field(default_factory=RedisConfig)
exchanges: ExchangeConfig = field(default_factory=ExchangeConfig)
aggregation: AggregationConfig = field(default_factory=AggregationConfig)
performance: PerformanceConfig = field(default_factory=PerformanceConfig)
api: APIConfig = field(default_factory=APIConfig)
logging: LoggingConfig = field(default_factory=LoggingConfig)
# Environment
environment: str = os.getenv('ENVIRONMENT', 'development')
debug: bool = os.getenv('DEBUG', 'false').lower() == 'true'
def __post_init__(self):
"""Post-initialization validation and setup"""
# Create logs directory if it doesn't exist
log_dir = Path(self.logging.file_path).parent
log_dir.mkdir(parents=True, exist_ok=True)
# Validate bucket sizes
if self.aggregation.btc_bucket_size <= 0:
raise ValueError("BTC bucket size must be positive")
if self.aggregation.eth_bucket_size <= 0:
raise ValueError("ETH bucket size must be positive")
def get_bucket_size(self, symbol: str = None) -> float:
"""Get bucket size (now universal $1 for all symbols)"""
return self.aggregation.bucket_size
def get_database_url(self) -> str:
"""Get database connection URL"""
return (f"postgresql://{self.database.user}:{self.database.password}"
f"@{self.database.host}:{self.database.port}/{self.database.name}")
def get_redis_url(self) -> str:
"""Get Redis connection URL"""
auth = f":{self.redis.password}@" if self.redis.password else ""
return f"redis://{auth}{self.redis.host}:{self.redis.port}/{self.redis.db}"
def to_dict(self) -> Dict[str, Any]:
"""Convert configuration to dictionary"""
return {
'database': {
'host': self.database.host,
'port': self.database.port,
'name': self.database.name,
'schema': self.database.schema,
},
'redis': {
'host': self.redis.host,
'port': self.redis.port,
'db': self.redis.db,
},
'exchanges': {
'count': len(self.exchanges.exchanges),
'symbols': self.exchanges.symbols,
},
'aggregation': {
'bucket_size': self.aggregation.bucket_size,
'heatmap_depth': self.aggregation.heatmap_depth,
},
'api': {
'host': self.api.host,
'port': self.api.port,
'websocket_port': self.api.websocket_port,
},
'environment': self.environment,
'debug': self.debug,
}
# Global configuration instance
config = Config()

View File

@ -0,0 +1,13 @@
"""
Exchange connector implementations for the COBY system.
"""
from .base_connector import BaseExchangeConnector
from .connection_manager import ConnectionManager
from .circuit_breaker import CircuitBreaker
__all__ = [
'BaseExchangeConnector',
'ConnectionManager',
'CircuitBreaker'
]

View File

@ -0,0 +1,383 @@
"""
Base exchange connector implementation with connection management and error handling.
"""
import asyncio
import json
import websockets
from typing import Dict, List, Optional, Callable, Any
from datetime import datetime, timezone
from ..interfaces.exchange_connector import ExchangeConnector
from ..models.core import ConnectionStatus, OrderBookSnapshot, TradeEvent
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import ConnectionError, ValidationError
from ..utils.timing import get_current_timestamp
from .connection_manager import ConnectionManager
from .circuit_breaker import CircuitBreaker, CircuitBreakerOpenError
logger = get_logger(__name__)
class BaseExchangeConnector(ExchangeConnector):
"""
Base implementation of exchange connector with common functionality.
Provides:
- WebSocket connection management
- Exponential backoff retry logic
- Circuit breaker pattern
- Health monitoring
- Message handling framework
- Subscription management
"""
def __init__(self, exchange_name: str, websocket_url: str):
"""
Initialize base exchange connector.
Args:
exchange_name: Name of the exchange
websocket_url: WebSocket URL for the exchange
"""
super().__init__(exchange_name)
self.websocket_url = websocket_url
self.websocket: Optional[websockets.WebSocketServerProtocol] = None
self.subscriptions: Dict[str, List[str]] = {} # symbol -> [subscription_types]
self.message_handlers: Dict[str, Callable] = {}
# Connection management
self.connection_manager = ConnectionManager(
name=f"{exchange_name}_connector",
max_retries=10,
initial_delay=1.0,
max_delay=300.0,
health_check_interval=30
)
# Circuit breaker
self.circuit_breaker = CircuitBreaker(
failure_threshold=5,
recovery_timeout=60,
expected_exception=Exception,
name=f"{exchange_name}_circuit"
)
# Statistics
self.message_count = 0
self.error_count = 0
self.last_message_time: Optional[datetime] = None
# Setup callbacks
self.connection_manager.on_connect = self._on_connect
self.connection_manager.on_disconnect = self._on_disconnect
self.connection_manager.on_error = self._on_error
self.connection_manager.on_health_check = self._health_check
# Message processing
self._message_queue = asyncio.Queue(maxsize=10000)
self._message_processor_task: Optional[asyncio.Task] = None
logger.info(f"Base connector initialized for {exchange_name}")
async def connect(self) -> bool:
"""Establish connection to the exchange WebSocket"""
try:
set_correlation_id()
logger.info(f"Connecting to {self.exchange_name} at {self.websocket_url}")
return await self.connection_manager.connect(self._establish_websocket_connection)
except Exception as e:
logger.error(f"Failed to connect to {self.exchange_name}: {e}")
self._notify_status_callbacks(ConnectionStatus.ERROR)
return False
async def disconnect(self) -> None:
"""Disconnect from the exchange WebSocket"""
try:
set_correlation_id()
logger.info(f"Disconnecting from {self.exchange_name}")
await self.connection_manager.disconnect(self._close_websocket_connection)
except Exception as e:
logger.error(f"Error during disconnect from {self.exchange_name}: {e}")
async def _establish_websocket_connection(self) -> None:
"""Establish WebSocket connection"""
try:
# Use circuit breaker for connection
self.websocket = await self.circuit_breaker.call_async(
websockets.connect,
self.websocket_url,
ping_interval=20,
ping_timeout=10,
close_timeout=10
)
logger.info(f"WebSocket connected to {self.exchange_name}")
# Start message processing
await self._start_message_processing()
except CircuitBreakerOpenError as e:
logger.error(f"Circuit breaker open for {self.exchange_name}: {e}")
raise ConnectionError(f"Circuit breaker open: {e}", "CIRCUIT_BREAKER_OPEN")
except Exception as e:
logger.error(f"WebSocket connection failed for {self.exchange_name}: {e}")
raise ConnectionError(f"WebSocket connection failed: {e}", "WEBSOCKET_CONNECT_FAILED")
async def _close_websocket_connection(self) -> None:
"""Close WebSocket connection"""
try:
# Stop message processing
await self._stop_message_processing()
# Close WebSocket
if self.websocket:
await self.websocket.close()
self.websocket = None
logger.info(f"WebSocket disconnected from {self.exchange_name}")
except Exception as e:
logger.warning(f"Error closing WebSocket for {self.exchange_name}: {e}")
async def _start_message_processing(self) -> None:
"""Start message processing tasks"""
if self._message_processor_task:
return
# Start message processor
self._message_processor_task = asyncio.create_task(self._message_processor())
# Start message receiver
asyncio.create_task(self._message_receiver())
logger.debug(f"Message processing started for {self.exchange_name}")
async def _stop_message_processing(self) -> None:
"""Stop message processing tasks"""
if self._message_processor_task:
self._message_processor_task.cancel()
try:
await self._message_processor_task
except asyncio.CancelledError:
pass
self._message_processor_task = None
logger.debug(f"Message processing stopped for {self.exchange_name}")
async def _message_receiver(self) -> None:
"""Receive messages from WebSocket"""
try:
while self.websocket and not self.websocket.closed:
try:
message = await asyncio.wait_for(self.websocket.recv(), timeout=30.0)
# Queue message for processing
try:
self._message_queue.put_nowait(message)
except asyncio.QueueFull:
logger.warning(f"Message queue full for {self.exchange_name}, dropping message")
except asyncio.TimeoutError:
# Send ping to keep connection alive
if self.websocket:
await self.websocket.ping()
except websockets.exceptions.ConnectionClosed:
logger.warning(f"WebSocket connection closed for {self.exchange_name}")
break
except Exception as e:
logger.error(f"Error receiving message from {self.exchange_name}: {e}")
self.error_count += 1
break
except Exception as e:
logger.error(f"Message receiver error for {self.exchange_name}: {e}")
finally:
# Mark as disconnected
self.connection_manager.is_connected = False
async def _message_processor(self) -> None:
"""Process messages from the queue"""
while True:
try:
# Get message from queue
message = await self._message_queue.get()
# Process message
await self._process_message(message)
# Update statistics
self.message_count += 1
self.last_message_time = get_current_timestamp()
# Mark task as done
self._message_queue.task_done()
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error processing message for {self.exchange_name}: {e}")
self.error_count += 1
async def _process_message(self, message: str) -> None:
"""
Process incoming WebSocket message.
Args:
message: Raw message string
"""
try:
# Parse JSON message
data = json.loads(message)
# Determine message type and route to appropriate handler
message_type = self._get_message_type(data)
if message_type in self.message_handlers:
await self.message_handlers[message_type](data)
else:
logger.debug(f"Unhandled message type '{message_type}' from {self.exchange_name}")
except json.JSONDecodeError as e:
logger.warning(f"Invalid JSON message from {self.exchange_name}: {e}")
except Exception as e:
logger.error(f"Error processing message from {self.exchange_name}: {e}")
def _get_message_type(self, data: Dict) -> str:
"""
Determine message type from message data.
Override in subclasses for exchange-specific logic.
Args:
data: Parsed message data
Returns:
str: Message type identifier
"""
# Default implementation - override in subclasses
return data.get('type', 'unknown')
async def _send_message(self, message: Dict) -> bool:
"""
Send message to WebSocket.
Args:
message: Message to send
Returns:
bool: True if sent successfully, False otherwise
"""
try:
if not self.websocket or self.websocket.closed:
logger.warning(f"Cannot send message to {self.exchange_name}: not connected")
return False
message_str = json.dumps(message)
await self.websocket.send(message_str)
logger.debug(f"Sent message to {self.exchange_name}: {message_str[:100]}...")
return True
except Exception as e:
logger.error(f"Error sending message to {self.exchange_name}: {e}")
return False
# Callback handlers
async def _on_connect(self) -> None:
"""Handle successful connection"""
self._notify_status_callbacks(ConnectionStatus.CONNECTED)
# Resubscribe to all previous subscriptions
await self._resubscribe_all()
async def _on_disconnect(self) -> None:
"""Handle disconnection"""
self._notify_status_callbacks(ConnectionStatus.DISCONNECTED)
async def _on_error(self, error: Exception) -> None:
"""Handle connection error"""
logger.error(f"Connection error for {self.exchange_name}: {error}")
self._notify_status_callbacks(ConnectionStatus.ERROR)
async def _health_check(self) -> bool:
"""Perform health check"""
try:
if not self.websocket or self.websocket.closed:
return False
# Check if we've received messages recently
if self.last_message_time:
time_since_last_message = (get_current_timestamp() - self.last_message_time).total_seconds()
if time_since_last_message > 60: # No messages for 60 seconds
logger.warning(f"No messages received from {self.exchange_name} for {time_since_last_message}s")
return False
# Send ping
await self.websocket.ping()
return True
except Exception as e:
logger.error(f"Health check failed for {self.exchange_name}: {e}")
return False
async def _resubscribe_all(self) -> None:
"""Resubscribe to all previous subscriptions after reconnection"""
for symbol, subscription_types in self.subscriptions.items():
for sub_type in subscription_types:
try:
if sub_type == 'orderbook':
await self.subscribe_orderbook(symbol)
elif sub_type == 'trades':
await self.subscribe_trades(symbol)
except Exception as e:
logger.error(f"Failed to resubscribe to {sub_type} for {symbol}: {e}")
# Abstract methods that must be implemented by subclasses
async def subscribe_orderbook(self, symbol: str) -> None:
"""Subscribe to order book updates - must be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement subscribe_orderbook")
async def subscribe_trades(self, symbol: str) -> None:
"""Subscribe to trade updates - must be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement subscribe_trades")
async def unsubscribe_orderbook(self, symbol: str) -> None:
"""Unsubscribe from order book updates - must be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement unsubscribe_orderbook")
async def unsubscribe_trades(self, symbol: str) -> None:
"""Unsubscribe from trade updates - must be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement unsubscribe_trades")
async def get_symbols(self) -> List[str]:
"""Get available symbols - must be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement get_symbols")
def normalize_symbol(self, symbol: str) -> str:
"""Normalize symbol format - must be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement normalize_symbol")
async def get_orderbook_snapshot(self, symbol: str, depth: int = 20) -> Optional[OrderBookSnapshot]:
"""Get order book snapshot - must be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement get_orderbook_snapshot")
# Utility methods
def get_stats(self) -> Dict[str, Any]:
"""Get connector statistics"""
return {
'exchange': self.exchange_name,
'connection_status': self.get_connection_status().value,
'is_connected': self.is_connected,
'message_count': self.message_count,
'error_count': self.error_count,
'last_message_time': self.last_message_time.isoformat() if self.last_message_time else None,
'subscriptions': dict(self.subscriptions),
'connection_manager': self.connection_manager.get_stats(),
'circuit_breaker': self.circuit_breaker.get_stats(),
'queue_size': self._message_queue.qsize()
}

View File

@ -0,0 +1,489 @@
"""
Binance exchange connector implementation.
"""
import json
from typing import Dict, List, Optional, Any
from datetime import datetime, timezone
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import ValidationError
from ..utils.validation import validate_symbol, validate_price, validate_volume
from .base_connector import BaseExchangeConnector
logger = get_logger(__name__)
class BinanceConnector(BaseExchangeConnector):
"""
Binance WebSocket connector implementation.
Supports:
- Order book depth streams
- Trade streams
- Symbol normalization
- Real-time data processing
"""
# Binance WebSocket URLs
WEBSOCKET_URL = "wss://stream.binance.com:9443/ws"
API_URL = "https://api.binance.com/api/v3"
def __init__(self):
"""Initialize Binance connector"""
super().__init__("binance", self.WEBSOCKET_URL)
# Binance-specific message handlers
self.message_handlers.update({
'depthUpdate': self._handle_orderbook_update,
'trade': self._handle_trade_update,
'error': self._handle_error_message
})
# Stream management
self.active_streams: List[str] = []
self.stream_id = 1
logger.info("Binance connector initialized")
def _get_message_type(self, data: Dict) -> str:
"""
Determine message type from Binance message data.
Args:
data: Parsed message data
Returns:
str: Message type identifier
"""
# Binance uses 'e' field for event type
if 'e' in data:
return data['e']
# Handle error messages
if 'error' in data:
return 'error'
# Handle subscription confirmations
if 'result' in data and 'id' in data:
return 'subscription_response'
return 'unknown'
def normalize_symbol(self, symbol: str) -> str:
"""
Normalize symbol to Binance format.
Args:
symbol: Standard symbol format (e.g., 'BTCUSDT')
Returns:
str: Binance symbol format (e.g., 'BTCUSDT')
"""
# Binance uses uppercase symbols without separators
normalized = symbol.upper().replace('-', '').replace('/', '')
# Validate symbol format
if not validate_symbol(normalized):
raise ValidationError(f"Invalid symbol format: {symbol}", "INVALID_SYMBOL")
return normalized
async def subscribe_orderbook(self, symbol: str) -> None:
"""
Subscribe to order book depth updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
set_correlation_id()
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@depth@100ms"
# Create subscription message
subscription_msg = {
"method": "SUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send subscription
success = await self._send_message(subscription_msg)
if success:
# Track subscription
if symbol not in self.subscriptions:
self.subscriptions[symbol] = []
if 'orderbook' not in self.subscriptions[symbol]:
self.subscriptions[symbol].append('orderbook')
self.active_streams.append(stream_name)
self.stream_id += 1
logger.info(f"Subscribed to order book for {symbol} on Binance")
else:
logger.error(f"Failed to subscribe to order book for {symbol} on Binance")
except Exception as e:
logger.error(f"Error subscribing to order book for {symbol}: {e}")
raise
async def subscribe_trades(self, symbol: str) -> None:
"""
Subscribe to trade updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
set_correlation_id()
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@trade"
# Create subscription message
subscription_msg = {
"method": "SUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send subscription
success = await self._send_message(subscription_msg)
if success:
# Track subscription
if symbol not in self.subscriptions:
self.subscriptions[symbol] = []
if 'trades' not in self.subscriptions[symbol]:
self.subscriptions[symbol].append('trades')
self.active_streams.append(stream_name)
self.stream_id += 1
logger.info(f"Subscribed to trades for {symbol} on Binance")
else:
logger.error(f"Failed to subscribe to trades for {symbol} on Binance")
except Exception as e:
logger.error(f"Error subscribing to trades for {symbol}: {e}")
raise
async def unsubscribe_orderbook(self, symbol: str) -> None:
"""
Unsubscribe from order book updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@depth@100ms"
# Create unsubscription message
unsubscription_msg = {
"method": "UNSUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send unsubscription
success = await self._send_message(unsubscription_msg)
if success:
# Remove from tracking
if symbol in self.subscriptions and 'orderbook' in self.subscriptions[symbol]:
self.subscriptions[symbol].remove('orderbook')
if not self.subscriptions[symbol]:
del self.subscriptions[symbol]
if stream_name in self.active_streams:
self.active_streams.remove(stream_name)
self.stream_id += 1
logger.info(f"Unsubscribed from order book for {symbol} on Binance")
else:
logger.error(f"Failed to unsubscribe from order book for {symbol} on Binance")
except Exception as e:
logger.error(f"Error unsubscribing from order book for {symbol}: {e}")
raise
async def unsubscribe_trades(self, symbol: str) -> None:
"""
Unsubscribe from trade updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
try:
normalized_symbol = self.normalize_symbol(symbol)
stream_name = f"{normalized_symbol.lower()}@trade"
# Create unsubscription message
unsubscription_msg = {
"method": "UNSUBSCRIBE",
"params": [stream_name],
"id": self.stream_id
}
# Send unsubscription
success = await self._send_message(unsubscription_msg)
if success:
# Remove from tracking
if symbol in self.subscriptions and 'trades' in self.subscriptions[symbol]:
self.subscriptions[symbol].remove('trades')
if not self.subscriptions[symbol]:
del self.subscriptions[symbol]
if stream_name in self.active_streams:
self.active_streams.remove(stream_name)
self.stream_id += 1
logger.info(f"Unsubscribed from trades for {symbol} on Binance")
else:
logger.error(f"Failed to unsubscribe from trades for {symbol} on Binance")
except Exception as e:
logger.error(f"Error unsubscribing from trades for {symbol}: {e}")
raise
async def get_symbols(self) -> List[str]:
"""
Get list of available trading symbols from Binance.
Returns:
List[str]: List of available symbols
"""
try:
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get(f"{self.API_URL}/exchangeInfo") as response:
if response.status == 200:
data = await response.json()
symbols = [
symbol_info['symbol']
for symbol_info in data.get('symbols', [])
if symbol_info.get('status') == 'TRADING'
]
logger.info(f"Retrieved {len(symbols)} symbols from Binance")
return symbols
else:
logger.error(f"Failed to get symbols from Binance: HTTP {response.status}")
return []
except Exception as e:
logger.error(f"Error getting symbols from Binance: {e}")
return []
async def get_orderbook_snapshot(self, symbol: str, depth: int = 20) -> Optional[OrderBookSnapshot]:
"""
Get current order book snapshot from Binance REST API.
Args:
symbol: Trading symbol
depth: Number of price levels to retrieve
Returns:
OrderBookSnapshot: Current order book or None if unavailable
"""
try:
import aiohttp
normalized_symbol = self.normalize_symbol(symbol)
# Binance supports depths: 5, 10, 20, 50, 100, 500, 1000, 5000
valid_depths = [5, 10, 20, 50, 100, 500, 1000, 5000]
api_depth = min(valid_depths, key=lambda x: abs(x - depth))
url = f"{self.API_URL}/depth"
params = {
'symbol': normalized_symbol,
'limit': api_depth
}
async with aiohttp.ClientSession() as session:
async with session.get(url, params=params) as response:
if response.status == 200:
data = await response.json()
return self._parse_orderbook_snapshot(data, symbol)
else:
logger.error(f"Failed to get order book for {symbol}: HTTP {response.status}")
return None
except Exception as e:
logger.error(f"Error getting order book snapshot for {symbol}: {e}")
return None
def _parse_orderbook_snapshot(self, data: Dict, symbol: str) -> OrderBookSnapshot:
"""
Parse Binance order book data into OrderBookSnapshot.
Args:
data: Raw Binance order book data
symbol: Trading symbol
Returns:
OrderBookSnapshot: Parsed order book
"""
try:
# Parse bids and asks
bids = []
for bid_data in data.get('bids', []):
price = float(bid_data[0])
size = float(bid_data[1])
if validate_price(price) and validate_volume(size):
bids.append(PriceLevel(price=price, size=size))
asks = []
for ask_data in data.get('asks', []):
price = float(ask_data[0])
size = float(ask_data[1])
if validate_price(price) and validate_volume(size):
asks.append(PriceLevel(price=price, size=size))
# Create order book snapshot
orderbook = OrderBookSnapshot(
symbol=symbol,
exchange=self.exchange_name,
timestamp=datetime.now(timezone.utc),
bids=bids,
asks=asks,
sequence_id=data.get('lastUpdateId')
)
return orderbook
except Exception as e:
logger.error(f"Error parsing order book snapshot: {e}")
raise ValidationError(f"Invalid order book data: {e}", "PARSE_ERROR")
async def _handle_orderbook_update(self, data: Dict) -> None:
"""
Handle order book depth update from Binance.
Args:
data: Order book update data
"""
try:
set_correlation_id()
# Extract symbol from stream name
stream = data.get('s', '').upper()
if not stream:
logger.warning("Order book update missing symbol")
return
# Parse bids and asks
bids = []
for bid_data in data.get('b', []):
price = float(bid_data[0])
size = float(bid_data[1])
if validate_price(price) and validate_volume(size):
bids.append(PriceLevel(price=price, size=size))
asks = []
for ask_data in data.get('a', []):
price = float(ask_data[0])
size = float(ask_data[1])
if validate_price(price) and validate_volume(size):
asks.append(PriceLevel(price=price, size=size))
# Create order book snapshot
orderbook = OrderBookSnapshot(
symbol=stream,
exchange=self.exchange_name,
timestamp=datetime.fromtimestamp(data.get('E', 0) / 1000, tz=timezone.utc),
bids=bids,
asks=asks,
sequence_id=data.get('u') # Final update ID
)
# Notify callbacks
self._notify_data_callbacks(orderbook)
logger.debug(f"Processed order book update for {stream}")
except Exception as e:
logger.error(f"Error handling order book update: {e}")
async def _handle_trade_update(self, data: Dict) -> None:
"""
Handle trade update from Binance.
Args:
data: Trade update data
"""
try:
set_correlation_id()
# Extract trade data
symbol = data.get('s', '').upper()
if not symbol:
logger.warning("Trade update missing symbol")
return
price = float(data.get('p', 0))
size = float(data.get('q', 0))
# Validate data
if not validate_price(price) or not validate_volume(size):
logger.warning(f"Invalid trade data: price={price}, size={size}")
return
# Determine side (Binance uses 'm' field - true if buyer is market maker)
is_buyer_maker = data.get('m', False)
side = 'sell' if is_buyer_maker else 'buy'
# Create trade event
trade = TradeEvent(
symbol=symbol,
exchange=self.exchange_name,
timestamp=datetime.fromtimestamp(data.get('T', 0) / 1000, tz=timezone.utc),
price=price,
size=size,
side=side,
trade_id=str(data.get('t', ''))
)
# Notify callbacks
self._notify_data_callbacks(trade)
logger.debug(f"Processed trade for {symbol}: {side} {size} @ {price}")
except Exception as e:
logger.error(f"Error handling trade update: {e}")
async def _handle_error_message(self, data: Dict) -> None:
"""
Handle error message from Binance.
Args:
data: Error message data
"""
error_code = data.get('code', 'unknown')
error_msg = data.get('msg', 'Unknown error')
logger.error(f"Binance error {error_code}: {error_msg}")
# Handle specific error codes
if error_code == -1121: # Invalid symbol
logger.error("Invalid symbol error - check symbol format")
elif error_code == -1130: # Invalid listen key
logger.error("Invalid listen key - may need to reconnect")
def get_binance_stats(self) -> Dict[str, Any]:
"""Get Binance-specific statistics"""
base_stats = self.get_stats()
binance_stats = {
'active_streams': len(self.active_streams),
'stream_list': self.active_streams.copy(),
'next_stream_id': self.stream_id
}
base_stats.update(binance_stats)
return base_stats

View File

@ -0,0 +1,206 @@
"""
Circuit breaker pattern implementation for exchange connections.
"""
import time
from enum import Enum
from typing import Optional, Callable, Any
from ..utils.logging import get_logger
logger = get_logger(__name__)
class CircuitState(Enum):
"""Circuit breaker states"""
CLOSED = "closed" # Normal operation
OPEN = "open" # Circuit is open, calls fail fast
HALF_OPEN = "half_open" # Testing if service is back
class CircuitBreaker:
"""
Circuit breaker to prevent cascading failures in exchange connections.
States:
- CLOSED: Normal operation, requests pass through
- OPEN: Circuit is open, requests fail immediately
- HALF_OPEN: Testing if service is back, limited requests allowed
"""
def __init__(
self,
failure_threshold: int = 5,
recovery_timeout: int = 60,
expected_exception: type = Exception,
name: str = "CircuitBreaker"
):
"""
Initialize circuit breaker.
Args:
failure_threshold: Number of failures before opening circuit
recovery_timeout: Time in seconds before attempting recovery
expected_exception: Exception type that triggers circuit breaker
name: Name for logging purposes
"""
self.failure_threshold = failure_threshold
self.recovery_timeout = recovery_timeout
self.expected_exception = expected_exception
self.name = name
# State tracking
self._state = CircuitState.CLOSED
self._failure_count = 0
self._last_failure_time: Optional[float] = None
self._next_attempt_time: Optional[float] = None
logger.info(f"Circuit breaker '{name}' initialized with threshold={failure_threshold}")
@property
def state(self) -> CircuitState:
"""Get current circuit state"""
return self._state
@property
def failure_count(self) -> int:
"""Get current failure count"""
return self._failure_count
def _should_attempt_reset(self) -> bool:
"""Check if we should attempt to reset the circuit"""
if self._state != CircuitState.OPEN:
return False
if self._next_attempt_time is None:
return False
return time.time() >= self._next_attempt_time
def _on_success(self) -> None:
"""Handle successful operation"""
if self._state == CircuitState.HALF_OPEN:
logger.info(f"Circuit breaker '{self.name}' reset to CLOSED after successful test")
self._state = CircuitState.CLOSED
self._failure_count = 0
self._last_failure_time = None
self._next_attempt_time = None
def _on_failure(self) -> None:
"""Handle failed operation"""
self._failure_count += 1
self._last_failure_time = time.time()
if self._state == CircuitState.HALF_OPEN:
# Failed during test, go back to OPEN
logger.warning(f"Circuit breaker '{self.name}' failed during test, returning to OPEN")
self._state = CircuitState.OPEN
self._next_attempt_time = time.time() + self.recovery_timeout
elif self._failure_count >= self.failure_threshold:
# Too many failures, open the circuit
logger.error(
f"Circuit breaker '{self.name}' OPENED after {self._failure_count} failures"
)
self._state = CircuitState.OPEN
self._next_attempt_time = time.time() + self.recovery_timeout
def call(self, func: Callable, *args, **kwargs) -> Any:
"""
Execute function with circuit breaker protection.
Args:
func: Function to execute
*args: Function arguments
**kwargs: Function keyword arguments
Returns:
Function result
Raises:
CircuitBreakerOpenError: When circuit is open
Original exception: When function fails
"""
# Check if we should attempt reset
if self._should_attempt_reset():
logger.info(f"Circuit breaker '{self.name}' attempting reset to HALF_OPEN")
self._state = CircuitState.HALF_OPEN
# Fail fast if circuit is open
if self._state == CircuitState.OPEN:
raise CircuitBreakerOpenError(
f"Circuit breaker '{self.name}' is OPEN. "
f"Next attempt in {self._next_attempt_time - time.time():.1f}s"
)
try:
# Execute the function
result = func(*args, **kwargs)
self._on_success()
return result
except self.expected_exception as e:
self._on_failure()
raise e
async def call_async(self, func: Callable, *args, **kwargs) -> Any:
"""
Execute async function with circuit breaker protection.
Args:
func: Async function to execute
*args: Function arguments
**kwargs: Function keyword arguments
Returns:
Function result
Raises:
CircuitBreakerOpenError: When circuit is open
Original exception: When function fails
"""
# Check if we should attempt reset
if self._should_attempt_reset():
logger.info(f"Circuit breaker '{self.name}' attempting reset to HALF_OPEN")
self._state = CircuitState.HALF_OPEN
# Fail fast if circuit is open
if self._state == CircuitState.OPEN:
raise CircuitBreakerOpenError(
f"Circuit breaker '{self.name}' is OPEN. "
f"Next attempt in {self._next_attempt_time - time.time():.1f}s"
)
try:
# Execute the async function
result = await func(*args, **kwargs)
self._on_success()
return result
except self.expected_exception as e:
self._on_failure()
raise e
def reset(self) -> None:
"""Manually reset the circuit breaker"""
logger.info(f"Circuit breaker '{self.name}' manually reset")
self._state = CircuitState.CLOSED
self._failure_count = 0
self._last_failure_time = None
self._next_attempt_time = None
def get_stats(self) -> dict:
"""Get circuit breaker statistics"""
return {
'name': self.name,
'state': self._state.value,
'failure_count': self._failure_count,
'failure_threshold': self.failure_threshold,
'last_failure_time': self._last_failure_time,
'next_attempt_time': self._next_attempt_time,
'recovery_timeout': self.recovery_timeout
}
class CircuitBreakerOpenError(Exception):
"""Exception raised when circuit breaker is open"""
pass

View File

@ -0,0 +1,271 @@
"""
Connection management with exponential backoff and retry logic.
"""
import asyncio
import random
from typing import Optional, Callable, Any
from ..utils.logging import get_logger
from ..utils.exceptions import ConnectionError
logger = get_logger(__name__)
class ExponentialBackoff:
"""Exponential backoff strategy for connection retries"""
def __init__(
self,
initial_delay: float = 1.0,
max_delay: float = 300.0,
multiplier: float = 2.0,
jitter: bool = True
):
"""
Initialize exponential backoff.
Args:
initial_delay: Initial delay in seconds
max_delay: Maximum delay in seconds
multiplier: Backoff multiplier
jitter: Whether to add random jitter
"""
self.initial_delay = initial_delay
self.max_delay = max_delay
self.multiplier = multiplier
self.jitter = jitter
self.current_delay = initial_delay
self.attempt_count = 0
def get_delay(self) -> float:
"""Get next delay value"""
delay = min(self.current_delay, self.max_delay)
# Add jitter to prevent thundering herd
if self.jitter:
delay = delay * (0.5 + random.random() * 0.5)
# Update for next attempt
self.current_delay *= self.multiplier
self.attempt_count += 1
return delay
def reset(self) -> None:
"""Reset backoff to initial state"""
self.current_delay = self.initial_delay
self.attempt_count = 0
class ConnectionManager:
"""
Manages connection lifecycle with retry logic and health monitoring.
"""
def __init__(
self,
name: str,
max_retries: int = 10,
initial_delay: float = 1.0,
max_delay: float = 300.0,
health_check_interval: int = 30
):
"""
Initialize connection manager.
Args:
name: Connection name for logging
max_retries: Maximum number of retry attempts
initial_delay: Initial retry delay in seconds
max_delay: Maximum retry delay in seconds
health_check_interval: Health check interval in seconds
"""
self.name = name
self.max_retries = max_retries
self.health_check_interval = health_check_interval
self.backoff = ExponentialBackoff(initial_delay, max_delay)
self.is_connected = False
self.connection_attempts = 0
self.last_error: Optional[Exception] = None
self.health_check_task: Optional[asyncio.Task] = None
# Callbacks
self.on_connect: Optional[Callable] = None
self.on_disconnect: Optional[Callable] = None
self.on_error: Optional[Callable] = None
self.on_health_check: Optional[Callable] = None
logger.info(f"Connection manager '{name}' initialized")
async def connect(self, connect_func: Callable) -> bool:
"""
Attempt to establish connection with retry logic.
Args:
connect_func: Async function that establishes the connection
Returns:
bool: True if connection successful, False otherwise
"""
self.connection_attempts = 0
self.backoff.reset()
while self.connection_attempts < self.max_retries:
try:
logger.info(f"Attempting to connect '{self.name}' (attempt {self.connection_attempts + 1})")
# Attempt connection
await connect_func()
# Connection successful
self.is_connected = True
self.connection_attempts = 0
self.last_error = None
self.backoff.reset()
logger.info(f"Connection '{self.name}' established successfully")
# Start health check
await self._start_health_check()
# Notify success
if self.on_connect:
try:
await self.on_connect()
except Exception as e:
logger.warning(f"Error in connect callback: {e}")
return True
except Exception as e:
self.connection_attempts += 1
self.last_error = e
logger.warning(
f"Connection '{self.name}' failed (attempt {self.connection_attempts}): {e}"
)
# Notify error
if self.on_error:
try:
await self.on_error(e)
except Exception as callback_error:
logger.warning(f"Error in error callback: {callback_error}")
# Check if we should retry
if self.connection_attempts >= self.max_retries:
logger.error(f"Connection '{self.name}' failed after {self.max_retries} attempts")
break
# Wait before retry
delay = self.backoff.get_delay()
logger.info(f"Retrying connection '{self.name}' in {delay:.1f} seconds")
await asyncio.sleep(delay)
self.is_connected = False
return False
async def disconnect(self, disconnect_func: Optional[Callable] = None) -> None:
"""
Disconnect and cleanup.
Args:
disconnect_func: Optional async function to handle disconnection
"""
logger.info(f"Disconnecting '{self.name}'")
# Stop health check
await self._stop_health_check()
# Execute disconnect function
if disconnect_func:
try:
await disconnect_func()
except Exception as e:
logger.warning(f"Error during disconnect: {e}")
self.is_connected = False
# Notify disconnect
if self.on_disconnect:
try:
await self.on_disconnect()
except Exception as e:
logger.warning(f"Error in disconnect callback: {e}")
logger.info(f"Connection '{self.name}' disconnected")
async def reconnect(self, connect_func: Callable, disconnect_func: Optional[Callable] = None) -> bool:
"""
Reconnect by disconnecting first then connecting.
Args:
connect_func: Async function that establishes the connection
disconnect_func: Optional async function to handle disconnection
Returns:
bool: True if reconnection successful, False otherwise
"""
logger.info(f"Reconnecting '{self.name}'")
# Disconnect first
await self.disconnect(disconnect_func)
# Wait a bit before reconnecting
await asyncio.sleep(1.0)
# Attempt to connect
return await self.connect(connect_func)
async def _start_health_check(self) -> None:
"""Start periodic health check"""
if self.health_check_task:
return
self.health_check_task = asyncio.create_task(self._health_check_loop())
logger.debug(f"Health check started for '{self.name}'")
async def _stop_health_check(self) -> None:
"""Stop health check"""
if self.health_check_task:
self.health_check_task.cancel()
try:
await self.health_check_task
except asyncio.CancelledError:
pass
self.health_check_task = None
logger.debug(f"Health check stopped for '{self.name}'")
async def _health_check_loop(self) -> None:
"""Health check loop"""
while self.is_connected:
try:
await asyncio.sleep(self.health_check_interval)
if self.on_health_check:
is_healthy = await self.on_health_check()
if not is_healthy:
logger.warning(f"Health check failed for '{self.name}'")
self.is_connected = False
break
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Health check error for '{self.name}': {e}")
self.is_connected = False
break
def get_stats(self) -> dict:
"""Get connection statistics"""
return {
'name': self.name,
'is_connected': self.is_connected,
'connection_attempts': self.connection_attempts,
'max_retries': self.max_retries,
'current_delay': self.backoff.current_delay,
'backoff_attempts': self.backoff.attempt_count,
'last_error': str(self.last_error) if self.last_error else None,
'health_check_active': self.health_check_task is not None
}

273
COBY/docker/README.md Normal file
View File

@ -0,0 +1,273 @@
# Market Data Infrastructure Docker Setup
This directory contains Docker Compose configurations and scripts for deploying TimescaleDB and Redis infrastructure for the multi-exchange data aggregation system.
## 🏗️ Architecture
- **TimescaleDB**: Time-series database optimized for high-frequency market data
- **Redis**: High-performance caching layer for real-time data
- **Network**: Isolated Docker network for secure communication
## 📋 Prerequisites
- Docker Engine 20.10+
- Docker Compose 2.0+
- At least 4GB RAM available for containers
- 50GB+ disk space for data storage
## 🚀 Quick Start
1. **Copy environment file**:
```bash
cp .env.example .env
```
2. **Edit configuration** (update passwords and settings):
```bash
nano .env
```
3. **Deploy infrastructure**:
```bash
chmod +x deploy.sh
./deploy.sh
```
4. **Verify deployment**:
```bash
docker-compose -f timescaledb-compose.yml ps
```
## 📁 File Structure
```
docker/
├── timescaledb-compose.yml # Main Docker Compose configuration
├── init-scripts/ # Database initialization scripts
│ └── 01-init-timescaledb.sql
├── redis.conf # Redis configuration
├── .env # Environment variables
├── deploy.sh # Deployment script
├── backup.sh # Backup script
├── restore.sh # Restore script
└── README.md # This file
```
## ⚙️ Configuration
### Environment Variables
Key variables in `.env`:
```bash
# Database credentials
POSTGRES_PASSWORD=your_secure_password
POSTGRES_USER=market_user
POSTGRES_DB=market_data
# Redis settings
REDIS_PASSWORD=your_redis_password
# Performance tuning
POSTGRES_SHARED_BUFFERS=256MB
POSTGRES_EFFECTIVE_CACHE_SIZE=1GB
REDIS_MAXMEMORY=2gb
```
### TimescaleDB Configuration
The database is pre-configured with:
- Optimized PostgreSQL settings for time-series data
- TimescaleDB extension enabled
- Hypertables for automatic partitioning
- Retention policies (90 days for raw data)
- Continuous aggregates for common queries
- Proper indexes for query performance
### Redis Configuration
Redis is configured for:
- High-frequency data caching
- Memory optimization (2GB limit)
- Persistence with AOF and RDB
- Optimized for order book data structures
## 🔌 Connection Details
After deployment, connect using:
### TimescaleDB
```
Host: 192.168.0.10
Port: 5432
Database: market_data
Username: market_user
Password: (from .env file)
```
### Redis
```
Host: 192.168.0.10
Port: 6379
Password: (from .env file)
```
## 🗄️ Database Schema
The system creates the following tables:
- `order_book_snapshots`: Real-time order book data
- `trade_events`: Individual trade events
- `heatmap_data`: Aggregated price bucket data
- `ohlcv_data`: OHLCV candlestick data
- `exchange_status`: Exchange connection monitoring
- `system_metrics`: System performance metrics
## 💾 Backup & Restore
### Create Backup
```bash
chmod +x backup.sh
./backup.sh
```
Backups are stored in `./backups/` with timestamp.
### Restore from Backup
```bash
chmod +x restore.sh
./restore.sh market_data_backup_YYYYMMDD_HHMMSS.tar.gz
```
### Automated Backups
Set up a cron job for regular backups:
```bash
# Daily backup at 2 AM
0 2 * * * /path/to/docker/backup.sh
```
## 📊 Monitoring
### Health Checks
Check service health:
```bash
# TimescaleDB
docker exec market_data_timescaledb pg_isready -U market_user -d market_data
# Redis
docker exec market_data_redis redis-cli -a your_password ping
```
### View Logs
```bash
# All services
docker-compose -f timescaledb-compose.yml logs -f
# Specific service
docker-compose -f timescaledb-compose.yml logs -f timescaledb
```
### Database Queries
Connect to TimescaleDB:
```bash
docker exec -it market_data_timescaledb psql -U market_user -d market_data
```
Example queries:
```sql
-- Check table sizes
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables
WHERE schemaname = 'market_data';
-- Recent order book data
SELECT * FROM market_data.order_book_snapshots
ORDER BY timestamp DESC LIMIT 10;
-- Exchange status
SELECT * FROM market_data.exchange_status
ORDER BY timestamp DESC LIMIT 10;
```
## 🔧 Maintenance
### Update Images
```bash
docker-compose -f timescaledb-compose.yml pull
docker-compose -f timescaledb-compose.yml up -d
```
### Clean Up Old Data
```bash
# TimescaleDB has automatic retention policies
# Manual cleanup if needed:
docker exec market_data_timescaledb psql -U market_user -d market_data -c "
SELECT drop_chunks('market_data.order_book_snapshots', INTERVAL '30 days');
"
```
### Scale Resources
Edit `timescaledb-compose.yml` to adjust:
- Memory limits
- CPU limits
- Shared buffers
- Connection limits
## 🚨 Troubleshooting
### Common Issues
1. **Port conflicts**: Change ports in compose file if 5432/6379 are in use
2. **Memory issues**: Reduce shared_buffers and Redis maxmemory
3. **Disk space**: Monitor `/var/lib/docker/volumes/` usage
4. **Connection refused**: Check firewall settings and container status
### Performance Tuning
1. **TimescaleDB**:
- Adjust `shared_buffers` based on available RAM
- Tune `effective_cache_size` to 75% of system RAM
- Monitor query performance with `pg_stat_statements`
2. **Redis**:
- Adjust `maxmemory` based on data volume
- Monitor memory usage with `INFO memory`
- Use appropriate eviction policy
### Recovery Procedures
1. **Container failure**: `docker-compose restart <service>`
2. **Data corruption**: Restore from latest backup
3. **Network issues**: Check Docker network configuration
4. **Performance degradation**: Review logs and system metrics
## 🔐 Security
- Change default passwords in `.env`
- Use strong passwords (20+ characters)
- Restrict network access to trusted IPs
- Regular security updates
- Monitor access logs
- Enable SSL/TLS for production
## 📞 Support
For issues related to:
- TimescaleDB: Check [TimescaleDB docs](https://docs.timescale.com/)
- Redis: Check [Redis docs](https://redis.io/documentation)
- Docker: Check [Docker docs](https://docs.docker.com/)
## 🔄 Updates
This infrastructure supports:
- Rolling updates with zero downtime
- Blue-green deployments
- Automated failover
- Data migration scripts

108
COBY/docker/backup.sh Normal file
View File

@ -0,0 +1,108 @@
#!/bin/bash
# Backup script for market data infrastructure
# Run this script regularly to backup your data
set -e
# Configuration
BACKUP_DIR="./backups"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
RETENTION_DAYS=30
# Load environment variables
if [ -f .env ]; then
source .env
fi
echo "🗄️ Starting backup process..."
# Create backup directory if it doesn't exist
mkdir -p "$BACKUP_DIR"
# Backup TimescaleDB
echo "📊 Backing up TimescaleDB..."
docker exec market_data_timescaledb pg_dump \
-U market_user \
-d market_data \
--verbose \
--no-password \
--format=custom \
--compress=9 \
> "$BACKUP_DIR/timescaledb_backup_$TIMESTAMP.dump"
if [ $? -eq 0 ]; then
echo "✅ TimescaleDB backup completed: timescaledb_backup_$TIMESTAMP.dump"
else
echo "❌ TimescaleDB backup failed"
exit 1
fi
# Backup Redis
echo "📦 Backing up Redis..."
docker exec market_data_redis redis-cli \
-a "$REDIS_PASSWORD" \
--rdb /data/redis_backup_$TIMESTAMP.rdb \
BGSAVE
# Wait for Redis backup to complete
sleep 5
# Copy Redis backup from container
docker cp market_data_redis:/data/redis_backup_$TIMESTAMP.rdb "$BACKUP_DIR/"
if [ $? -eq 0 ]; then
echo "✅ Redis backup completed: redis_backup_$TIMESTAMP.rdb"
else
echo "❌ Redis backup failed"
exit 1
fi
# Create backup metadata
cat > "$BACKUP_DIR/backup_$TIMESTAMP.info" << EOF
Backup Information
==================
Timestamp: $TIMESTAMP
Date: $(date)
TimescaleDB Backup: timescaledb_backup_$TIMESTAMP.dump
Redis Backup: redis_backup_$TIMESTAMP.rdb
Container Versions:
TimescaleDB: $(docker exec market_data_timescaledb psql -U market_user -d market_data -t -c "SELECT version();")
Redis: $(docker exec market_data_redis redis-cli -a "$REDIS_PASSWORD" INFO server | grep redis_version)
Database Size:
$(docker exec market_data_timescaledb psql -U market_user -d market_data -c "\l+")
EOF
# Compress backups
echo "🗜️ Compressing backups..."
tar -czf "$BACKUP_DIR/market_data_backup_$TIMESTAMP.tar.gz" \
-C "$BACKUP_DIR" \
"timescaledb_backup_$TIMESTAMP.dump" \
"redis_backup_$TIMESTAMP.rdb" \
"backup_$TIMESTAMP.info"
# Remove individual files after compression
rm "$BACKUP_DIR/timescaledb_backup_$TIMESTAMP.dump"
rm "$BACKUP_DIR/redis_backup_$TIMESTAMP.rdb"
rm "$BACKUP_DIR/backup_$TIMESTAMP.info"
echo "✅ Compressed backup created: market_data_backup_$TIMESTAMP.tar.gz"
# Clean up old backups
echo "🧹 Cleaning up old backups (older than $RETENTION_DAYS days)..."
find "$BACKUP_DIR" -name "market_data_backup_*.tar.gz" -mtime +$RETENTION_DAYS -delete
# Display backup information
BACKUP_SIZE=$(du -h "$BACKUP_DIR/market_data_backup_$TIMESTAMP.tar.gz" | cut -f1)
echo ""
echo "📋 Backup Summary:"
echo " File: market_data_backup_$TIMESTAMP.tar.gz"
echo " Size: $BACKUP_SIZE"
echo " Location: $BACKUP_DIR"
echo ""
echo "🔄 To restore from this backup:"
echo " ./restore.sh market_data_backup_$TIMESTAMP.tar.gz"
echo ""
echo "✅ Backup process completed successfully!"

112
COBY/docker/deploy.sh Normal file
View File

@ -0,0 +1,112 @@
#!/bin/bash
# Deployment script for market data infrastructure
# Run this on your Docker host at 192.168.0.10
set -e
echo "🚀 Deploying Market Data Infrastructure..."
# Check if Docker and Docker Compose are available
if ! command -v docker &> /dev/null; then
echo "❌ Docker is not installed or not in PATH"
exit 1
fi
if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
echo "❌ Docker Compose is not installed or not in PATH"
exit 1
fi
# Set Docker Compose command
if docker compose version &> /dev/null; then
DOCKER_COMPOSE="docker compose"
else
DOCKER_COMPOSE="docker-compose"
fi
# Create necessary directories
echo "📁 Creating directories..."
mkdir -p ./data/timescale
mkdir -p ./data/redis
mkdir -p ./logs
mkdir -p ./backups
# Set proper permissions
echo "🔐 Setting permissions..."
chmod 755 ./data/timescale
chmod 755 ./data/redis
chmod 755 ./logs
chmod 755 ./backups
# Copy environment file if it doesn't exist
if [ ! -f .env ]; then
echo "📋 Creating .env file..."
cp .env.example .env
echo "⚠️ Please edit .env file with your specific configuration"
echo "⚠️ Default passwords are set - change them for production!"
fi
# Pull latest images
echo "📥 Pulling Docker images..."
$DOCKER_COMPOSE -f timescaledb-compose.yml pull
# Stop existing containers if running
echo "🛑 Stopping existing containers..."
$DOCKER_COMPOSE -f timescaledb-compose.yml down
# Start the services
echo "🏃 Starting services..."
$DOCKER_COMPOSE -f timescaledb-compose.yml up -d
# Wait for services to be ready
echo "⏳ Waiting for services to be ready..."
sleep 30
# Check service health
echo "🏥 Checking service health..."
# Check TimescaleDB
if docker exec market_data_timescaledb pg_isready -U market_user -d market_data; then
echo "✅ TimescaleDB is ready"
else
echo "❌ TimescaleDB is not ready"
exit 1
fi
# Check Redis
if docker exec market_data_redis redis-cli -a market_data_redis_2024 ping | grep -q PONG; then
echo "✅ Redis is ready"
else
echo "❌ Redis is not ready"
exit 1
fi
# Display connection information
echo ""
echo "🎉 Deployment completed successfully!"
echo ""
echo "📊 Connection Information:"
echo " TimescaleDB:"
echo " Host: 192.168.0.10"
echo " Port: 5432"
echo " Database: market_data"
echo " Username: market_user"
echo " Password: (check .env file)"
echo ""
echo " Redis:"
echo " Host: 192.168.0.10"
echo " Port: 6379"
echo " Password: (check .env file)"
echo ""
echo "📝 Next steps:"
echo " 1. Update your application configuration to use these connection details"
echo " 2. Test the connection from your application"
echo " 3. Set up monitoring and alerting"
echo " 4. Configure backup schedules"
echo ""
echo "🔍 To view logs:"
echo " docker-compose -f timescaledb-compose.yml logs -f"
echo ""
echo "🛑 To stop services:"
echo " docker-compose -f timescaledb-compose.yml down"

View File

@ -0,0 +1,214 @@
-- Initialize TimescaleDB extension and create market data schema
CREATE EXTENSION IF NOT EXISTS timescaledb;
-- Create database schema for market data
CREATE SCHEMA IF NOT EXISTS market_data;
-- Set search path
SET search_path TO market_data, public;
-- Order book snapshots table
CREATE TABLE IF NOT EXISTS order_book_snapshots (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bids JSONB NOT NULL,
asks JSONB NOT NULL,
sequence_id BIGINT,
mid_price DECIMAL(20,8),
spread DECIMAL(20,8),
bid_volume DECIMAL(30,8),
ask_volume DECIMAL(30,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange)
);
-- Convert to hypertable
SELECT create_hypertable('order_book_snapshots', 'timestamp', if_not_exists => TRUE);
-- Create indexes for better query performance
CREATE INDEX IF NOT EXISTS idx_order_book_symbol_exchange ON order_book_snapshots (symbol, exchange, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_order_book_timestamp ON order_book_snapshots (timestamp DESC);
-- Trade events table
CREATE TABLE IF NOT EXISTS trade_events (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
price DECIMAL(20,8) NOT NULL,
size DECIMAL(30,8) NOT NULL,
side VARCHAR(4) NOT NULL,
trade_id VARCHAR(100) NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
);
-- Convert to hypertable
SELECT create_hypertable('trade_events', 'timestamp', if_not_exists => TRUE);
-- Create indexes for trade events
CREATE INDEX IF NOT EXISTS idx_trade_events_symbol_exchange ON trade_events (symbol, exchange, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_trade_events_timestamp ON trade_events (timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_trade_events_price ON trade_events (symbol, price, timestamp DESC);
-- Aggregated heatmap data table
CREATE TABLE IF NOT EXISTS heatmap_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bucket_size DECIMAL(10,2) NOT NULL,
price_bucket DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
side VARCHAR(3) NOT NULL,
exchange_count INTEGER NOT NULL,
exchanges JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
);
-- Convert to hypertable
SELECT create_hypertable('heatmap_data', 'timestamp', if_not_exists => TRUE);
-- Create indexes for heatmap data
CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_bucket ON heatmap_data (symbol, bucket_size, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_heatmap_timestamp ON heatmap_data (timestamp DESC);
-- OHLCV data table
CREATE TABLE IF NOT EXISTS ohlcv_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
timeframe VARCHAR(10) NOT NULL,
open_price DECIMAL(20,8) NOT NULL,
high_price DECIMAL(20,8) NOT NULL,
low_price DECIMAL(20,8) NOT NULL,
close_price DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
trade_count INTEGER,
vwap DECIMAL(20,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, timeframe)
);
-- Convert to hypertable
SELECT create_hypertable('ohlcv_data', 'timestamp', if_not_exists => TRUE);
-- Create indexes for OHLCV data
CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON ohlcv_data (symbol, timeframe, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_ohlcv_timestamp ON ohlcv_data (timestamp DESC);
-- Exchange status tracking table
CREATE TABLE IF NOT EXISTS exchange_status (
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
status VARCHAR(20) NOT NULL, -- 'connected', 'disconnected', 'error'
last_message_time TIMESTAMPTZ,
error_message TEXT,
connection_count INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, exchange)
);
-- Convert to hypertable
SELECT create_hypertable('exchange_status', 'timestamp', if_not_exists => TRUE);
-- Create indexes for exchange status
CREATE INDEX IF NOT EXISTS idx_exchange_status_exchange ON exchange_status (exchange, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_exchange_status_timestamp ON exchange_status (timestamp DESC);
-- System metrics table for monitoring
CREATE TABLE IF NOT EXISTS system_metrics (
metric_name VARCHAR(50) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
value DECIMAL(20,8) NOT NULL,
labels JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, metric_name)
);
-- Convert to hypertable
SELECT create_hypertable('system_metrics', 'timestamp', if_not_exists => TRUE);
-- Create indexes for system metrics
CREATE INDEX IF NOT EXISTS idx_system_metrics_name ON system_metrics (metric_name, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON system_metrics (timestamp DESC);
-- Create retention policies (keep data for 90 days by default)
SELECT add_retention_policy('order_book_snapshots', INTERVAL '90 days', if_not_exists => TRUE);
SELECT add_retention_policy('trade_events', INTERVAL '90 days', if_not_exists => TRUE);
SELECT add_retention_policy('heatmap_data', INTERVAL '90 days', if_not_exists => TRUE);
SELECT add_retention_policy('ohlcv_data', INTERVAL '365 days', if_not_exists => TRUE);
SELECT add_retention_policy('exchange_status', INTERVAL '30 days', if_not_exists => TRUE);
SELECT add_retention_policy('system_metrics', INTERVAL '30 days', if_not_exists => TRUE);
-- Create continuous aggregates for common queries
CREATE MATERIALIZED VIEW IF NOT EXISTS hourly_ohlcv
WITH (timescaledb.continuous) AS
SELECT
symbol,
exchange,
time_bucket('1 hour', timestamp) AS hour,
first(price, timestamp) AS open_price,
max(price) AS high_price,
min(price) AS low_price,
last(price, timestamp) AS close_price,
sum(size) AS volume,
count(*) AS trade_count,
avg(price) AS vwap
FROM trade_events
GROUP BY symbol, exchange, hour
WITH NO DATA;
-- Add refresh policy for continuous aggregate
SELECT add_continuous_aggregate_policy('hourly_ohlcv',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour',
if_not_exists => TRUE);
-- Create view for latest order book data
CREATE OR REPLACE VIEW latest_order_books AS
SELECT DISTINCT ON (symbol, exchange)
symbol,
exchange,
timestamp,
bids,
asks,
mid_price,
spread,
bid_volume,
ask_volume
FROM order_book_snapshots
ORDER BY symbol, exchange, timestamp DESC;
-- Create view for latest heatmap data
CREATE OR REPLACE VIEW latest_heatmaps AS
SELECT DISTINCT ON (symbol, bucket_size, price_bucket, side)
symbol,
bucket_size,
price_bucket,
side,
timestamp,
volume,
exchange_count,
exchanges
FROM heatmap_data
ORDER BY symbol, bucket_size, price_bucket, side, timestamp DESC;
-- Grant permissions to market_user
GRANT ALL PRIVILEGES ON SCHEMA market_data TO market_user;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA market_data TO market_user;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA market_data TO market_user;
GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA market_data TO market_user;
-- Set default privileges for future objects
ALTER DEFAULT PRIVILEGES IN SCHEMA market_data GRANT ALL ON TABLES TO market_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA market_data GRANT ALL ON SEQUENCES TO market_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA market_data GRANT ALL ON FUNCTIONS TO market_user;
-- Create database user for read-only access (for dashboards)
CREATE USER IF NOT EXISTS dashboard_user WITH PASSWORD 'dashboard_read_2024';
GRANT CONNECT ON DATABASE market_data TO dashboard_user;
GRANT USAGE ON SCHEMA market_data TO dashboard_user;
GRANT SELECT ON ALL TABLES IN SCHEMA market_data TO dashboard_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA market_data GRANT SELECT ON TABLES TO dashboard_user;

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Manual database initialization script
# Run this to initialize the TimescaleDB schema
echo "🔧 Initializing TimescaleDB schema..."
# Check if we can connect to the database
echo "📡 Testing connection to TimescaleDB..."
# You can run this command on your Docker host (192.168.0.10)
# Replace with your actual password from the .env file
PGPASSWORD="market_data_secure_pass_2024" psql -h 192.168.0.10 -p 5432 -U market_user -d market_data -c "SELECT version();"
if [ $? -eq 0 ]; then
echo "✅ Connection successful!"
echo "🏗️ Creating database schema..."
# Execute the initialization script
PGPASSWORD="market_data_secure_pass_2024" psql -h 192.168.0.10 -p 5432 -U market_user -d market_data -f ../docker/init-scripts/01-init-timescaledb.sql
if [ $? -eq 0 ]; then
echo "✅ Database schema initialized successfully!"
echo "📊 Verifying tables..."
PGPASSWORD="market_data_secure_pass_2024" psql -h 192.168.0.10 -p 5432 -U market_user -d market_data -c "\dt market_data.*"
else
echo "❌ Schema initialization failed"
exit 1
fi
else
echo "❌ Cannot connect to database"
exit 1
fi

131
COBY/docker/redis.conf Normal file
View File

@ -0,0 +1,131 @@
# Redis configuration for market data caching
# Optimized for high-frequency trading data
# Network settings
bind 0.0.0.0
port 6379
tcp-backlog 511
timeout 0
tcp-keepalive 300
# General settings
daemonize no
supervised no
pidfile /var/run/redis_6379.pid
loglevel notice
logfile ""
databases 16
# Snapshotting (persistence)
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
dir /data
# Replication
replica-serve-stale-data yes
replica-read-only yes
repl-diskless-sync no
repl-diskless-sync-delay 5
repl-ping-replica-period 10
repl-timeout 60
repl-disable-tcp-nodelay no
repl-backlog-size 1mb
repl-backlog-ttl 3600
# Security
requirepass market_data_redis_2024
# Memory management
maxmemory 2gb
maxmemory-policy allkeys-lru
maxmemory-samples 5
# Lazy freeing
lazyfree-lazy-eviction no
lazyfree-lazy-expire no
lazyfree-lazy-server-del no
replica-lazy-flush no
# Threaded I/O
io-threads 4
io-threads-do-reads yes
# Append only file (AOF)
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
aof-use-rdb-preamble yes
# Lua scripting
lua-time-limit 5000
# Slow log
slowlog-log-slower-than 10000
slowlog-max-len 128
# Latency monitor
latency-monitor-threshold 100
# Event notification
notify-keyspace-events ""
# Hash settings (optimized for order book data)
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
# List settings
list-max-ziplist-size -2
list-compress-depth 0
# Set settings
set-max-intset-entries 512
# Sorted set settings
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
# HyperLogLog settings
hll-sparse-max-bytes 3000
# Streams settings
stream-node-max-bytes 4096
stream-node-max-entries 100
# Active rehashing
activerehashing yes
# Client settings
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit replica 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
client-query-buffer-limit 1gb
# Protocol settings
proto-max-bulk-len 512mb
# Frequency settings
hz 10
# Dynamic HZ
dynamic-hz yes
# AOF rewrite settings
aof-rewrite-incremental-fsync yes
# RDB settings
rdb-save-incremental-fsync yes
# Jemalloc settings
jemalloc-bg-thread yes
# TLS settings (disabled for internal network)
tls-port 0

188
COBY/docker/restore.sh Normal file
View File

@ -0,0 +1,188 @@
#!/bin/bash
# Restore script for market data infrastructure
# Usage: ./restore.sh <backup_file.tar.gz>
set -e
# Check if backup file is provided
if [ $# -eq 0 ]; then
echo "❌ Usage: $0 <backup_file.tar.gz>"
echo "Available backups:"
ls -la ./backups/market_data_backup_*.tar.gz 2>/dev/null || echo "No backups found"
exit 1
fi
BACKUP_FILE="$1"
RESTORE_DIR="./restore_temp"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
# Load environment variables
if [ -f .env ]; then
source .env
fi
echo "🔄 Starting restore process..."
echo "📁 Backup file: $BACKUP_FILE"
# Check if backup file exists
if [ ! -f "$BACKUP_FILE" ]; then
echo "❌ Backup file not found: $BACKUP_FILE"
exit 1
fi
# Create temporary restore directory
mkdir -p "$RESTORE_DIR"
# Extract backup
echo "📦 Extracting backup..."
tar -xzf "$BACKUP_FILE" -C "$RESTORE_DIR"
# Find extracted files
TIMESCALE_BACKUP=$(find "$RESTORE_DIR" -name "timescaledb_backup_*.dump" | head -1)
REDIS_BACKUP=$(find "$RESTORE_DIR" -name "redis_backup_*.rdb" | head -1)
BACKUP_INFO=$(find "$RESTORE_DIR" -name "backup_*.info" | head -1)
if [ -z "$TIMESCALE_BACKUP" ] || [ -z "$REDIS_BACKUP" ]; then
echo "❌ Invalid backup file structure"
rm -rf "$RESTORE_DIR"
exit 1
fi
# Display backup information
if [ -f "$BACKUP_INFO" ]; then
echo "📋 Backup Information:"
cat "$BACKUP_INFO"
echo ""
fi
# Confirm restore
read -p "⚠️ This will replace all existing data. Continue? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "❌ Restore cancelled"
rm -rf "$RESTORE_DIR"
exit 1
fi
# Stop services
echo "🛑 Stopping services..."
docker-compose -f timescaledb-compose.yml down
# Backup current data (just in case)
echo "💾 Creating safety backup of current data..."
mkdir -p "./backups/pre_restore_$TIMESTAMP"
docker run --rm -v market_data_timescale_data:/data -v "$(pwd)/backups/pre_restore_$TIMESTAMP":/backup alpine tar czf /backup/current_timescale.tar.gz -C /data .
docker run --rm -v market_data_redis_data:/data -v "$(pwd)/backups/pre_restore_$TIMESTAMP":/backup alpine tar czf /backup/current_redis.tar.gz -C /data .
# Start only TimescaleDB for restore
echo "🏃 Starting TimescaleDB for restore..."
docker-compose -f timescaledb-compose.yml up -d timescaledb
# Wait for TimescaleDB to be ready
echo "⏳ Waiting for TimescaleDB to be ready..."
sleep 30
# Check if TimescaleDB is ready
if ! docker exec market_data_timescaledb pg_isready -U market_user -d market_data; then
echo "❌ TimescaleDB is not ready"
exit 1
fi
# Drop existing database and recreate
echo "🗑️ Dropping existing database..."
docker exec market_data_timescaledb psql -U postgres -c "DROP DATABASE IF EXISTS market_data;"
docker exec market_data_timescaledb psql -U postgres -c "CREATE DATABASE market_data OWNER market_user;"
# Restore TimescaleDB
echo "📊 Restoring TimescaleDB..."
docker cp "$TIMESCALE_BACKUP" market_data_timescaledb:/tmp/restore.dump
docker exec market_data_timescaledb pg_restore \
-U market_user \
-d market_data \
--verbose \
--no-password \
/tmp/restore.dump
if [ $? -eq 0 ]; then
echo "✅ TimescaleDB restore completed"
else
echo "❌ TimescaleDB restore failed"
exit 1
fi
# Stop TimescaleDB
docker-compose -f timescaledb-compose.yml stop timescaledb
# Restore Redis data
echo "📦 Restoring Redis data..."
# Remove existing Redis data
docker volume rm market_data_redis_data 2>/dev/null || true
docker volume create market_data_redis_data
# Copy Redis backup to volume
docker run --rm -v market_data_redis_data:/data -v "$(pwd)/$RESTORE_DIR":/backup alpine cp "/backup/$(basename "$REDIS_BACKUP")" /data/dump.rdb
# Start all services
echo "🏃 Starting all services..."
docker-compose -f timescaledb-compose.yml up -d
# Wait for services to be ready
echo "⏳ Waiting for services to be ready..."
sleep 30
# Verify restore
echo "🔍 Verifying restore..."
# Check TimescaleDB
if docker exec market_data_timescaledb pg_isready -U market_user -d market_data; then
echo "✅ TimescaleDB is ready"
# Show table counts
echo "📊 Database table counts:"
docker exec market_data_timescaledb psql -U market_user -d market_data -c "
SELECT
schemaname,
tablename,
n_tup_ins as row_count
FROM pg_stat_user_tables
WHERE schemaname = 'market_data'
ORDER BY tablename;
"
else
echo "❌ TimescaleDB verification failed"
exit 1
fi
# Check Redis
if docker exec market_data_redis redis-cli -a "$REDIS_PASSWORD" ping | grep -q PONG; then
echo "✅ Redis is ready"
# Show Redis info
echo "📦 Redis database info:"
docker exec market_data_redis redis-cli -a "$REDIS_PASSWORD" INFO keyspace
else
echo "❌ Redis verification failed"
exit 1
fi
# Clean up
echo "🧹 Cleaning up temporary files..."
rm -rf "$RESTORE_DIR"
echo ""
echo "🎉 Restore completed successfully!"
echo ""
echo "📋 Restore Summary:"
echo " Source: $BACKUP_FILE"
echo " Timestamp: $TIMESTAMP"
echo " Safety backup: ./backups/pre_restore_$TIMESTAMP/"
echo ""
echo "⚠️ If you encounter any issues, you can restore the safety backup:"
echo " docker-compose -f timescaledb-compose.yml down"
echo " docker volume rm market_data_timescale_data market_data_redis_data"
echo " docker volume create market_data_timescale_data"
echo " docker volume create market_data_redis_data"
echo " docker run --rm -v market_data_timescale_data:/data -v $(pwd)/backups/pre_restore_$TIMESTAMP:/backup alpine tar xzf /backup/current_timescale.tar.gz -C /data"
echo " docker run --rm -v market_data_redis_data:/data -v $(pwd)/backups/pre_restore_$TIMESTAMP:/backup alpine tar xzf /backup/current_redis.tar.gz -C /data"
echo " docker-compose -f timescaledb-compose.yml up -d"

View File

@ -0,0 +1,78 @@
version: '3.8'
services:
timescaledb:
image: timescale/timescaledb:latest-pg15
container_name: market_data_timescaledb
restart: unless-stopped
environment:
POSTGRES_DB: market_data
POSTGRES_USER: market_user
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-market_data_secure_pass_2024}
POSTGRES_INITDB_ARGS: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
# TimescaleDB specific settings
TIMESCALEDB_TELEMETRY: 'off'
ports:
- "5432:5432"
volumes:
- timescale_data:/var/lib/postgresql/data
- ./init-scripts:/docker-entrypoint-initdb.d
command: >
postgres
-c shared_preload_libraries=timescaledb
-c max_connections=200
-c shared_buffers=256MB
-c effective_cache_size=1GB
-c maintenance_work_mem=64MB
-c checkpoint_completion_target=0.9
-c wal_buffers=16MB
-c default_statistics_target=100
-c random_page_cost=1.1
-c effective_io_concurrency=200
-c work_mem=4MB
-c min_wal_size=1GB
-c max_wal_size=4GB
-c max_worker_processes=8
-c max_parallel_workers_per_gather=4
-c max_parallel_workers=8
-c max_parallel_maintenance_workers=4
healthcheck:
test: ["CMD-SHELL", "pg_isready -U market_user -d market_data"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
networks:
- market_data_network
redis:
image: redis:7-alpine
container_name: market_data_redis
restart: unless-stopped
ports:
- "6379:6379"
volumes:
- redis_data:/data
- ./redis.conf:/usr/local/etc/redis/redis.conf
command: redis-server /usr/local/etc/redis/redis.conf
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
networks:
- market_data_network
volumes:
timescale_data:
driver: local
redis_data:
driver: local
networks:
market_data_network:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16

View File

@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""
Example usage of Binance connector.
"""
import asyncio
import sys
from pathlib import Path
# Add COBY to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from connectors.binance_connector import BinanceConnector
from utils.logging import setup_logging, get_logger
from models.core import OrderBookSnapshot, TradeEvent
# Setup logging
setup_logging(level='INFO', console_output=True)
logger = get_logger(__name__)
class BinanceExample:
"""Example Binance connector usage"""
def __init__(self):
self.connector = BinanceConnector()
self.orderbook_count = 0
self.trade_count = 0
# Add data callbacks
self.connector.add_data_callback(self.on_data_received)
self.connector.add_status_callback(self.on_status_changed)
def on_data_received(self, data):
"""Handle received data"""
if isinstance(data, OrderBookSnapshot):
self.orderbook_count += 1
logger.info(
f"📊 Order Book {self.orderbook_count}: {data.symbol} - "
f"Mid: ${data.mid_price:.2f}, Spread: ${data.spread:.2f}, "
f"Bids: {len(data.bids)}, Asks: {len(data.asks)}"
)
elif isinstance(data, TradeEvent):
self.trade_count += 1
logger.info(
f"💰 Trade {self.trade_count}: {data.symbol} - "
f"{data.side.upper()} {data.size} @ ${data.price:.2f}"
)
def on_status_changed(self, exchange, status):
"""Handle status changes"""
logger.info(f"🔄 {exchange} status changed to: {status.value}")
async def run_example(self):
"""Run the example"""
try:
logger.info("🚀 Starting Binance connector example")
# Connect to Binance
logger.info("🔌 Connecting to Binance...")
connected = await self.connector.connect()
if not connected:
logger.error("❌ Failed to connect to Binance")
return
logger.info("✅ Connected to Binance successfully")
# Get available symbols
logger.info("📋 Getting available symbols...")
symbols = await self.connector.get_symbols()
logger.info(f"📋 Found {len(symbols)} trading symbols")
# Show some popular symbols
popular_symbols = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'BNBUSDT']
available_popular = [s for s in popular_symbols if s in symbols]
logger.info(f"📋 Popular symbols available: {available_popular}")
# Get order book snapshot
if 'BTCUSDT' in symbols:
logger.info("📊 Getting BTC order book snapshot...")
orderbook = await self.connector.get_orderbook_snapshot('BTCUSDT', depth=10)
if orderbook:
logger.info(
f"📊 BTC Order Book: Mid=${orderbook.mid_price:.2f}, "
f"Spread=${orderbook.spread:.2f}"
)
# Subscribe to real-time data
logger.info("🔔 Subscribing to real-time data...")
# Subscribe to BTC order book and trades
if 'BTCUSDT' in symbols:
await self.connector.subscribe_orderbook('BTCUSDT')
await self.connector.subscribe_trades('BTCUSDT')
logger.info("✅ Subscribed to BTCUSDT order book and trades")
# Subscribe to ETH order book
if 'ETHUSDT' in symbols:
await self.connector.subscribe_orderbook('ETHUSDT')
logger.info("✅ Subscribed to ETHUSDT order book")
# Let it run for a while
logger.info("⏳ Collecting data for 30 seconds...")
await asyncio.sleep(30)
# Show statistics
stats = self.connector.get_binance_stats()
logger.info("📈 Final Statistics:")
logger.info(f" 📊 Order books received: {self.orderbook_count}")
logger.info(f" 💰 Trades received: {self.trade_count}")
logger.info(f" 📡 Total messages: {stats['message_count']}")
logger.info(f" ❌ Errors: {stats['error_count']}")
logger.info(f" 🔗 Active streams: {stats['active_streams']}")
logger.info(f" 📋 Subscriptions: {list(stats['subscriptions'].keys())}")
# Unsubscribe and disconnect
logger.info("🔌 Cleaning up...")
if 'BTCUSDT' in self.connector.subscriptions:
await self.connector.unsubscribe_orderbook('BTCUSDT')
await self.connector.unsubscribe_trades('BTCUSDT')
if 'ETHUSDT' in self.connector.subscriptions:
await self.connector.unsubscribe_orderbook('ETHUSDT')
await self.connector.disconnect()
logger.info("✅ Disconnected successfully")
except KeyboardInterrupt:
logger.info("⏹️ Interrupted by user")
except Exception as e:
logger.error(f"❌ Example failed: {e}")
finally:
# Ensure cleanup
try:
await self.connector.disconnect()
except:
pass
async def main():
"""Main function"""
example = BinanceExample()
await example.run_example()
if __name__ == "__main__":
print("Binance Connector Example")
print("=" * 25)
print("This example will:")
print("1. Connect to Binance WebSocket")
print("2. Get available trading symbols")
print("3. Subscribe to real-time order book and trade data")
print("4. Display received data for 30 seconds")
print("5. Show statistics and disconnect")
print()
print("Press Ctrl+C to stop early")
print("=" * 25)
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\n👋 Example stopped by user")
except Exception as e:
print(f"\n❌ Example failed: {e}")
sys.exit(1)

View File

@ -0,0 +1,17 @@
"""
Interface definitions for the multi-exchange data aggregation system.
"""
from .exchange_connector import ExchangeConnector
from .data_processor import DataProcessor
from .aggregation_engine import AggregationEngine
from .storage_manager import StorageManager
from .replay_manager import ReplayManager
__all__ = [
'ExchangeConnector',
'DataProcessor',
'AggregationEngine',
'StorageManager',
'ReplayManager'
]

View File

@ -0,0 +1,139 @@
"""
Interface for data aggregation and heatmap generation.
"""
from abc import ABC, abstractmethod
from typing import Dict, List
from ..models.core import (
OrderBookSnapshot, PriceBuckets, HeatmapData,
ImbalanceMetrics, ConsolidatedOrderBook
)
class AggregationEngine(ABC):
"""Aggregates data into price buckets and heatmaps"""
@abstractmethod
def create_price_buckets(self, orderbook: OrderBookSnapshot,
bucket_size: float) -> PriceBuckets:
"""
Convert order book data to price buckets.
Args:
orderbook: Order book snapshot
bucket_size: Size of each price bucket
Returns:
PriceBuckets: Aggregated price bucket data
"""
pass
@abstractmethod
def update_heatmap(self, symbol: str, buckets: PriceBuckets) -> HeatmapData:
"""
Update heatmap data with new price buckets.
Args:
symbol: Trading symbol
buckets: Price bucket data
Returns:
HeatmapData: Updated heatmap visualization data
"""
pass
@abstractmethod
def calculate_imbalances(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics:
"""
Calculate order book imbalance metrics.
Args:
orderbook: Order book snapshot
Returns:
ImbalanceMetrics: Calculated imbalance metrics
"""
pass
@abstractmethod
def aggregate_across_exchanges(self, symbol: str,
orderbooks: List[OrderBookSnapshot]) -> ConsolidatedOrderBook:
"""
Aggregate order book data from multiple exchanges.
Args:
symbol: Trading symbol
orderbooks: List of order book snapshots from different exchanges
Returns:
ConsolidatedOrderBook: Consolidated order book data
"""
pass
@abstractmethod
def calculate_volume_weighted_price(self, orderbooks: List[OrderBookSnapshot]) -> float:
"""
Calculate volume-weighted average price across exchanges.
Args:
orderbooks: List of order book snapshots
Returns:
float: Volume-weighted average price
"""
pass
@abstractmethod
def get_market_depth(self, orderbook: OrderBookSnapshot,
depth_levels: List[float]) -> Dict[float, Dict[str, float]]:
"""
Calculate market depth at different price levels.
Args:
orderbook: Order book snapshot
depth_levels: List of depth percentages (e.g., [0.1, 0.5, 1.0])
Returns:
Dict: Market depth data {level: {'bid_volume': x, 'ask_volume': y}}
"""
pass
@abstractmethod
def smooth_heatmap(self, heatmap: HeatmapData, smoothing_factor: float) -> HeatmapData:
"""
Apply smoothing to heatmap data to reduce noise.
Args:
heatmap: Raw heatmap data
smoothing_factor: Smoothing factor (0.0 to 1.0)
Returns:
HeatmapData: Smoothed heatmap data
"""
pass
@abstractmethod
def calculate_liquidity_score(self, orderbook: OrderBookSnapshot) -> float:
"""
Calculate liquidity score for an order book.
Args:
orderbook: Order book snapshot
Returns:
float: Liquidity score (0.0 to 1.0)
"""
pass
@abstractmethod
def detect_support_resistance(self, heatmap: HeatmapData) -> Dict[str, List[float]]:
"""
Detect support and resistance levels from heatmap data.
Args:
heatmap: Heatmap data
Returns:
Dict: {'support': [prices], 'resistance': [prices]}
"""
pass

View File

@ -0,0 +1,119 @@
"""
Interface for data processing and normalization.
"""
from abc import ABC, abstractmethod
from typing import Dict, Union, List, Optional
from ..models.core import OrderBookSnapshot, TradeEvent, OrderBookMetrics
class DataProcessor(ABC):
"""Processes and normalizes raw exchange data"""
@abstractmethod
def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot:
"""
Normalize raw order book data to standard format.
Args:
raw_data: Raw order book data from exchange
exchange: Exchange name
Returns:
OrderBookSnapshot: Normalized order book data
"""
pass
@abstractmethod
def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent:
"""
Normalize raw trade data to standard format.
Args:
raw_data: Raw trade data from exchange
exchange: Exchange name
Returns:
TradeEvent: Normalized trade data
"""
pass
@abstractmethod
def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool:
"""
Validate normalized data for quality and consistency.
Args:
data: Normalized data to validate
Returns:
bool: True if data is valid, False otherwise
"""
pass
@abstractmethod
def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
"""
Calculate metrics from order book data.
Args:
orderbook: Order book snapshot
Returns:
OrderBookMetrics: Calculated metrics
"""
pass
@abstractmethod
def detect_anomalies(self, data: Union[OrderBookSnapshot, TradeEvent]) -> List[str]:
"""
Detect anomalies in the data.
Args:
data: Data to analyze for anomalies
Returns:
List[str]: List of detected anomaly descriptions
"""
pass
@abstractmethod
def filter_data(self, data: Union[OrderBookSnapshot, TradeEvent],
criteria: Dict) -> bool:
"""
Filter data based on criteria.
Args:
data: Data to filter
criteria: Filtering criteria
Returns:
bool: True if data passes filter, False otherwise
"""
pass
@abstractmethod
def enrich_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> Dict:
"""
Enrich data with additional metadata.
Args:
data: Data to enrich
Returns:
Dict: Enriched data with metadata
"""
pass
@abstractmethod
def get_data_quality_score(self, data: Union[OrderBookSnapshot, TradeEvent]) -> float:
"""
Calculate data quality score.
Args:
data: Data to score
Returns:
float: Quality score between 0.0 and 1.0
"""
pass

View File

@ -0,0 +1,189 @@
"""
Base interface for exchange WebSocket connectors.
"""
from abc import ABC, abstractmethod
from typing import Callable, List, Optional
from ..models.core import ConnectionStatus, OrderBookSnapshot, TradeEvent
class ExchangeConnector(ABC):
"""Base interface for exchange WebSocket connectors"""
def __init__(self, exchange_name: str):
self.exchange_name = exchange_name
self._data_callbacks: List[Callable] = []
self._status_callbacks: List[Callable] = []
self._connection_status = ConnectionStatus.DISCONNECTED
@abstractmethod
async def connect(self) -> bool:
"""
Establish connection to the exchange WebSocket.
Returns:
bool: True if connection successful, False otherwise
"""
pass
@abstractmethod
async def disconnect(self) -> None:
"""Disconnect from the exchange WebSocket."""
pass
@abstractmethod
async def subscribe_orderbook(self, symbol: str) -> None:
"""
Subscribe to order book updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
pass
@abstractmethod
async def subscribe_trades(self, symbol: str) -> None:
"""
Subscribe to trade updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
pass
@abstractmethod
async def unsubscribe_orderbook(self, symbol: str) -> None:
"""
Unsubscribe from order book updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
pass
@abstractmethod
async def unsubscribe_trades(self, symbol: str) -> None:
"""
Unsubscribe from trade updates for a symbol.
Args:
symbol: Trading symbol (e.g., 'BTCUSDT')
"""
pass
def get_connection_status(self) -> ConnectionStatus:
"""
Get current connection status.
Returns:
ConnectionStatus: Current connection status
"""
return self._connection_status
def add_data_callback(self, callback: Callable) -> None:
"""
Add callback for data updates.
Args:
callback: Function to call when data is received
Signature: callback(data: Union[OrderBookSnapshot, TradeEvent])
"""
if callback not in self._data_callbacks:
self._data_callbacks.append(callback)
def remove_data_callback(self, callback: Callable) -> None:
"""
Remove data callback.
Args:
callback: Callback function to remove
"""
if callback in self._data_callbacks:
self._data_callbacks.remove(callback)
def add_status_callback(self, callback: Callable) -> None:
"""
Add callback for status updates.
Args:
callback: Function to call when status changes
Signature: callback(exchange: str, status: ConnectionStatus)
"""
if callback not in self._status_callbacks:
self._status_callbacks.append(callback)
def remove_status_callback(self, callback: Callable) -> None:
"""
Remove status callback.
Args:
callback: Callback function to remove
"""
if callback in self._status_callbacks:
self._status_callbacks.remove(callback)
def _notify_data_callbacks(self, data):
"""Notify all data callbacks of new data."""
for callback in self._data_callbacks:
try:
callback(data)
except Exception as e:
# Log error but don't stop other callbacks
print(f"Error in data callback: {e}")
def _notify_status_callbacks(self, status: ConnectionStatus):
"""Notify all status callbacks of status change."""
self._connection_status = status
for callback in self._status_callbacks:
try:
callback(self.exchange_name, status)
except Exception as e:
# Log error but don't stop other callbacks
print(f"Error in status callback: {e}")
@abstractmethod
async def get_symbols(self) -> List[str]:
"""
Get list of available trading symbols.
Returns:
List[str]: List of available symbols
"""
pass
@abstractmethod
def normalize_symbol(self, symbol: str) -> str:
"""
Normalize symbol to exchange format.
Args:
symbol: Standard symbol format (e.g., 'BTCUSDT')
Returns:
str: Exchange-specific symbol format
"""
pass
@abstractmethod
async def get_orderbook_snapshot(self, symbol: str, depth: int = 20) -> Optional[OrderBookSnapshot]:
"""
Get current order book snapshot.
Args:
symbol: Trading symbol
depth: Number of price levels to retrieve
Returns:
OrderBookSnapshot: Current order book or None if unavailable
"""
pass
@property
def name(self) -> str:
"""Get exchange name."""
return self.exchange_name
@property
def is_connected(self) -> bool:
"""Check if connector is connected."""
return self._connection_status == ConnectionStatus.CONNECTED

View File

@ -0,0 +1,212 @@
"""
Interface for historical data replay functionality.
"""
from abc import ABC, abstractmethod
from datetime import datetime
from typing import List, Optional, Callable, Dict, Any
from ..models.core import ReplaySession, ReplayStatus
class ReplayManager(ABC):
"""Provides historical data replay functionality"""
@abstractmethod
def create_replay_session(self, start_time: datetime, end_time: datetime,
speed: float = 1.0, symbols: Optional[List[str]] = None,
exchanges: Optional[List[str]] = None) -> str:
"""
Create a new replay session.
Args:
start_time: Replay start time
end_time: Replay end time
speed: Playback speed multiplier (1.0 = real-time)
symbols: List of symbols to replay (None = all)
exchanges: List of exchanges to replay (None = all)
Returns:
str: Session ID
"""
pass
@abstractmethod
async def start_replay(self, session_id: str) -> None:
"""
Start replay session.
Args:
session_id: Session ID to start
"""
pass
@abstractmethod
async def pause_replay(self, session_id: str) -> None:
"""
Pause replay session.
Args:
session_id: Session ID to pause
"""
pass
@abstractmethod
async def resume_replay(self, session_id: str) -> None:
"""
Resume paused replay session.
Args:
session_id: Session ID to resume
"""
pass
@abstractmethod
async def stop_replay(self, session_id: str) -> None:
"""
Stop replay session.
Args:
session_id: Session ID to stop
"""
pass
@abstractmethod
def get_replay_status(self, session_id: str) -> Optional[ReplaySession]:
"""
Get replay session status.
Args:
session_id: Session ID
Returns:
ReplaySession: Session status or None if not found
"""
pass
@abstractmethod
def list_replay_sessions(self) -> List[ReplaySession]:
"""
List all replay sessions.
Returns:
List[ReplaySession]: List of all sessions
"""
pass
@abstractmethod
def delete_replay_session(self, session_id: str) -> bool:
"""
Delete replay session.
Args:
session_id: Session ID to delete
Returns:
bool: True if deleted successfully, False otherwise
"""
pass
@abstractmethod
def set_replay_speed(self, session_id: str, speed: float) -> bool:
"""
Change replay speed for active session.
Args:
session_id: Session ID
speed: New playback speed multiplier
Returns:
bool: True if speed changed successfully, False otherwise
"""
pass
@abstractmethod
def seek_replay(self, session_id: str, timestamp: datetime) -> bool:
"""
Seek to specific timestamp in replay.
Args:
session_id: Session ID
timestamp: Target timestamp
Returns:
bool: True if seek successful, False otherwise
"""
pass
@abstractmethod
def add_data_callback(self, session_id: str, callback: Callable) -> bool:
"""
Add callback for replay data.
Args:
session_id: Session ID
callback: Function to call with replay data
Signature: callback(data: Union[OrderBookSnapshot, TradeEvent])
Returns:
bool: True if callback added successfully, False otherwise
"""
pass
@abstractmethod
def remove_data_callback(self, session_id: str, callback: Callable) -> bool:
"""
Remove data callback from replay session.
Args:
session_id: Session ID
callback: Callback function to remove
Returns:
bool: True if callback removed successfully, False otherwise
"""
pass
@abstractmethod
def add_status_callback(self, session_id: str, callback: Callable) -> bool:
"""
Add callback for replay status changes.
Args:
session_id: Session ID
callback: Function to call on status change
Signature: callback(session_id: str, status: ReplayStatus)
Returns:
bool: True if callback added successfully, False otherwise
"""
pass
@abstractmethod
async def get_available_data_range(self, symbol: str,
exchange: Optional[str] = None) -> Optional[Dict[str, datetime]]:
"""
Get available data time range for replay.
Args:
symbol: Trading symbol
exchange: Exchange name (None = all exchanges)
Returns:
Dict: {'start': datetime, 'end': datetime} or None if no data
"""
pass
@abstractmethod
def validate_replay_request(self, start_time: datetime, end_time: datetime,
symbols: Optional[List[str]] = None,
exchanges: Optional[List[str]] = None) -> List[str]:
"""
Validate replay request parameters.
Args:
start_time: Requested start time
end_time: Requested end time
symbols: Requested symbols
exchanges: Requested exchanges
Returns:
List[str]: List of validation errors (empty if valid)
"""
pass

View File

@ -0,0 +1,215 @@
"""
Interface for data storage and retrieval.
"""
from abc import ABC, abstractmethod
from datetime import datetime
from typing import List, Dict, Optional, Any
from ..models.core import OrderBookSnapshot, TradeEvent, HeatmapData, SystemMetrics
class StorageManager(ABC):
"""Manages data persistence and retrieval"""
@abstractmethod
async def store_orderbook(self, data: OrderBookSnapshot) -> bool:
"""
Store order book snapshot to database.
Args:
data: Order book snapshot to store
Returns:
bool: True if stored successfully, False otherwise
"""
pass
@abstractmethod
async def store_trade(self, data: TradeEvent) -> bool:
"""
Store trade event to database.
Args:
data: Trade event to store
Returns:
bool: True if stored successfully, False otherwise
"""
pass
@abstractmethod
async def store_heatmap(self, data: HeatmapData) -> bool:
"""
Store heatmap data to database.
Args:
data: Heatmap data to store
Returns:
bool: True if stored successfully, False otherwise
"""
pass
@abstractmethod
async def store_metrics(self, data: SystemMetrics) -> bool:
"""
Store system metrics to database.
Args:
data: System metrics to store
Returns:
bool: True if stored successfully, False otherwise
"""
pass
@abstractmethod
async def get_historical_orderbooks(self, symbol: str, exchange: str,
start: datetime, end: datetime,
limit: Optional[int] = None) -> List[OrderBookSnapshot]:
"""
Retrieve historical order book data.
Args:
symbol: Trading symbol
exchange: Exchange name
start: Start timestamp
end: End timestamp
limit: Maximum number of records to return
Returns:
List[OrderBookSnapshot]: Historical order book data
"""
pass
@abstractmethod
async def get_historical_trades(self, symbol: str, exchange: str,
start: datetime, end: datetime,
limit: Optional[int] = None) -> List[TradeEvent]:
"""
Retrieve historical trade data.
Args:
symbol: Trading symbol
exchange: Exchange name
start: Start timestamp
end: End timestamp
limit: Maximum number of records to return
Returns:
List[TradeEvent]: Historical trade data
"""
pass
@abstractmethod
async def get_latest_orderbook(self, symbol: str, exchange: str) -> Optional[OrderBookSnapshot]:
"""
Get latest order book snapshot.
Args:
symbol: Trading symbol
exchange: Exchange name
Returns:
OrderBookSnapshot: Latest order book or None if not found
"""
pass
@abstractmethod
async def get_latest_heatmap(self, symbol: str, bucket_size: float) -> Optional[HeatmapData]:
"""
Get latest heatmap data.
Args:
symbol: Trading symbol
bucket_size: Price bucket size
Returns:
HeatmapData: Latest heatmap or None if not found
"""
pass
@abstractmethod
async def get_ohlcv_data(self, symbol: str, exchange: str, timeframe: str,
start: datetime, end: datetime) -> List[Dict[str, Any]]:
"""
Get OHLCV candlestick data.
Args:
symbol: Trading symbol
exchange: Exchange name
timeframe: Timeframe (e.g., '1m', '5m', '1h')
start: Start timestamp
end: End timestamp
Returns:
List[Dict]: OHLCV data
"""
pass
@abstractmethod
async def batch_store_orderbooks(self, data: List[OrderBookSnapshot]) -> int:
"""
Store multiple order book snapshots in batch.
Args:
data: List of order book snapshots
Returns:
int: Number of records stored successfully
"""
pass
@abstractmethod
async def batch_store_trades(self, data: List[TradeEvent]) -> int:
"""
Store multiple trade events in batch.
Args:
data: List of trade events
Returns:
int: Number of records stored successfully
"""
pass
@abstractmethod
def setup_database_schema(self) -> None:
"""
Set up database schema and tables.
Should be idempotent - safe to call multiple times.
"""
pass
@abstractmethod
async def cleanup_old_data(self, retention_days: int) -> int:
"""
Clean up old data based on retention policy.
Args:
retention_days: Number of days to retain data
Returns:
int: Number of records deleted
"""
pass
@abstractmethod
async def get_storage_stats(self) -> Dict[str, Any]:
"""
Get storage statistics.
Returns:
Dict: Storage statistics (table sizes, record counts, etc.)
"""
pass
@abstractmethod
async def health_check(self) -> bool:
"""
Check storage system health.
Returns:
bool: True if healthy, False otherwise
"""
pass

31
COBY/models/__init__.py Normal file
View File

@ -0,0 +1,31 @@
"""
Data models for the multi-exchange data aggregation system.
"""
from .core import (
OrderBookSnapshot,
PriceLevel,
TradeEvent,
PriceBuckets,
HeatmapData,
HeatmapPoint,
ConnectionStatus,
OrderBookMetrics,
ImbalanceMetrics,
ConsolidatedOrderBook,
ReplayStatus
)
__all__ = [
'OrderBookSnapshot',
'PriceLevel',
'TradeEvent',
'PriceBuckets',
'HeatmapData',
'HeatmapPoint',
'ConnectionStatus',
'OrderBookMetrics',
'ImbalanceMetrics',
'ConsolidatedOrderBook',
'ReplayStatus'
]

324
COBY/models/core.py Normal file
View File

@ -0,0 +1,324 @@
"""
Core data models for the multi-exchange data aggregation system.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import List, Dict, Optional, Any
from enum import Enum
class ConnectionStatus(Enum):
"""Exchange connection status"""
DISCONNECTED = "disconnected"
CONNECTING = "connecting"
CONNECTED = "connected"
RECONNECTING = "reconnecting"
ERROR = "error"
class ReplayStatus(Enum):
"""Replay session status"""
CREATED = "created"
RUNNING = "running"
PAUSED = "paused"
STOPPED = "stopped"
COMPLETED = "completed"
ERROR = "error"
@dataclass
class PriceLevel:
"""Individual price level in order book"""
price: float
size: float
count: Optional[int] = None
def __post_init__(self):
"""Validate price level data"""
if self.price <= 0:
raise ValueError("Price must be positive")
if self.size < 0:
raise ValueError("Size cannot be negative")
@dataclass
class OrderBookSnapshot:
"""Standardized order book snapshot"""
symbol: str
exchange: str
timestamp: datetime
bids: List[PriceLevel]
asks: List[PriceLevel]
sequence_id: Optional[int] = None
def __post_init__(self):
"""Validate and sort order book data"""
if not self.symbol:
raise ValueError("Symbol cannot be empty")
if not self.exchange:
raise ValueError("Exchange cannot be empty")
# Sort bids descending (highest price first)
self.bids.sort(key=lambda x: x.price, reverse=True)
# Sort asks ascending (lowest price first)
self.asks.sort(key=lambda x: x.price)
@property
def mid_price(self) -> Optional[float]:
"""Calculate mid price"""
if self.bids and self.asks:
return (self.bids[0].price + self.asks[0].price) / 2
return None
@property
def spread(self) -> Optional[float]:
"""Calculate bid-ask spread"""
if self.bids and self.asks:
return self.asks[0].price - self.bids[0].price
return None
@property
def bid_volume(self) -> float:
"""Total bid volume"""
return sum(level.size for level in self.bids)
@property
def ask_volume(self) -> float:
"""Total ask volume"""
return sum(level.size for level in self.asks)
@dataclass
class TradeEvent:
"""Standardized trade event"""
symbol: str
exchange: str
timestamp: datetime
price: float
size: float
side: str # 'buy' or 'sell'
trade_id: str
def __post_init__(self):
"""Validate trade event data"""
if not self.symbol:
raise ValueError("Symbol cannot be empty")
if not self.exchange:
raise ValueError("Exchange cannot be empty")
if self.price <= 0:
raise ValueError("Price must be positive")
if self.size <= 0:
raise ValueError("Size must be positive")
if self.side not in ['buy', 'sell']:
raise ValueError("Side must be 'buy' or 'sell'")
if not self.trade_id:
raise ValueError("Trade ID cannot be empty")
@dataclass
class PriceBuckets:
"""Aggregated price buckets for heatmap"""
symbol: str
timestamp: datetime
bucket_size: float
bid_buckets: Dict[float, float] = field(default_factory=dict) # price -> volume
ask_buckets: Dict[float, float] = field(default_factory=dict) # price -> volume
def __post_init__(self):
"""Validate price buckets"""
if self.bucket_size <= 0:
raise ValueError("Bucket size must be positive")
def get_bucket_price(self, price: float) -> float:
"""Get bucket price for a given price"""
return round(price / self.bucket_size) * self.bucket_size
def add_bid(self, price: float, volume: float):
"""Add bid volume to appropriate bucket"""
bucket_price = self.get_bucket_price(price)
self.bid_buckets[bucket_price] = self.bid_buckets.get(bucket_price, 0) + volume
def add_ask(self, price: float, volume: float):
"""Add ask volume to appropriate bucket"""
bucket_price = self.get_bucket_price(price)
self.ask_buckets[bucket_price] = self.ask_buckets.get(bucket_price, 0) + volume
@dataclass
class HeatmapPoint:
"""Individual heatmap data point"""
price: float
volume: float
intensity: float # 0.0 to 1.0
side: str # 'bid' or 'ask'
def __post_init__(self):
"""Validate heatmap point"""
if self.price <= 0:
raise ValueError("Price must be positive")
if self.volume < 0:
raise ValueError("Volume cannot be negative")
if not 0 <= self.intensity <= 1:
raise ValueError("Intensity must be between 0 and 1")
if self.side not in ['bid', 'ask']:
raise ValueError("Side must be 'bid' or 'ask'")
@dataclass
class HeatmapData:
"""Heatmap visualization data"""
symbol: str
timestamp: datetime
bucket_size: float
data: List[HeatmapPoint] = field(default_factory=list)
def __post_init__(self):
"""Validate heatmap data"""
if self.bucket_size <= 0:
raise ValueError("Bucket size must be positive")
def add_point(self, price: float, volume: float, side: str, max_volume: float = None):
"""Add a heatmap point with calculated intensity"""
if max_volume is None:
max_volume = max((point.volume for point in self.data), default=volume)
intensity = min(volume / max_volume, 1.0) if max_volume > 0 else 0.0
point = HeatmapPoint(price=price, volume=volume, intensity=intensity, side=side)
self.data.append(point)
def get_bids(self) -> List[HeatmapPoint]:
"""Get bid points sorted by price descending"""
bids = [point for point in self.data if point.side == 'bid']
return sorted(bids, key=lambda x: x.price, reverse=True)
def get_asks(self) -> List[HeatmapPoint]:
"""Get ask points sorted by price ascending"""
asks = [point for point in self.data if point.side == 'ask']
return sorted(asks, key=lambda x: x.price)
@dataclass
class OrderBookMetrics:
"""Order book analysis metrics"""
symbol: str
exchange: str
timestamp: datetime
mid_price: float
spread: float
spread_percentage: float
bid_volume: float
ask_volume: float
volume_imbalance: float # (bid_volume - ask_volume) / (bid_volume + ask_volume)
depth_10: float # Volume within 10 price levels
depth_50: float # Volume within 50 price levels
def __post_init__(self):
"""Validate metrics"""
if self.mid_price <= 0:
raise ValueError("Mid price must be positive")
if self.spread < 0:
raise ValueError("Spread cannot be negative")
@dataclass
class ImbalanceMetrics:
"""Order book imbalance metrics"""
symbol: str
timestamp: datetime
volume_imbalance: float
price_imbalance: float
depth_imbalance: float
momentum_score: float # Derived from recent imbalance changes
def __post_init__(self):
"""Validate imbalance metrics"""
if not -1 <= self.volume_imbalance <= 1:
raise ValueError("Volume imbalance must be between -1 and 1")
@dataclass
class ConsolidatedOrderBook:
"""Consolidated order book from multiple exchanges"""
symbol: str
timestamp: datetime
exchanges: List[str]
bids: List[PriceLevel]
asks: List[PriceLevel]
weighted_mid_price: float
total_bid_volume: float
total_ask_volume: float
exchange_weights: Dict[str, float] = field(default_factory=dict)
def __post_init__(self):
"""Validate consolidated order book"""
if not self.exchanges:
raise ValueError("At least one exchange must be specified")
if self.weighted_mid_price <= 0:
raise ValueError("Weighted mid price must be positive")
@dataclass
class ExchangeStatus:
"""Exchange connection and health status"""
exchange: str
status: ConnectionStatus
last_message_time: Optional[datetime] = None
error_message: Optional[str] = None
connection_count: int = 0
uptime_percentage: float = 0.0
message_rate: float = 0.0 # Messages per second
def __post_init__(self):
"""Validate exchange status"""
if not self.exchange:
raise ValueError("Exchange name cannot be empty")
if not 0 <= self.uptime_percentage <= 100:
raise ValueError("Uptime percentage must be between 0 and 100")
@dataclass
class SystemMetrics:
"""System performance metrics"""
timestamp: datetime
cpu_usage: float
memory_usage: float
disk_usage: float
network_io: Dict[str, float] = field(default_factory=dict)
database_connections: int = 0
redis_connections: int = 0
active_websockets: int = 0
messages_per_second: float = 0.0
processing_latency: float = 0.0 # Milliseconds
def __post_init__(self):
"""Validate system metrics"""
if not 0 <= self.cpu_usage <= 100:
raise ValueError("CPU usage must be between 0 and 100")
if not 0 <= self.memory_usage <= 100:
raise ValueError("Memory usage must be between 0 and 100")
@dataclass
class ReplaySession:
"""Historical data replay session"""
session_id: str
start_time: datetime
end_time: datetime
speed: float # Playback speed multiplier
status: ReplayStatus
current_time: Optional[datetime] = None
progress: float = 0.0 # 0.0 to 1.0
symbols: List[str] = field(default_factory=list)
exchanges: List[str] = field(default_factory=list)
def __post_init__(self):
"""Validate replay session"""
if not self.session_id:
raise ValueError("Session ID cannot be empty")
if self.start_time >= self.end_time:
raise ValueError("Start time must be before end time")
if self.speed <= 0:
raise ValueError("Speed must be positive")
if not 0 <= self.progress <= 1:
raise ValueError("Progress must be between 0 and 1")

View File

@ -0,0 +1,15 @@
"""
Data processing and normalization components for the COBY system.
"""
from .data_processor import StandardDataProcessor
from .quality_checker import DataQualityChecker
from .anomaly_detector import AnomalyDetector
from .metrics_calculator import MetricsCalculator
__all__ = [
'StandardDataProcessor',
'DataQualityChecker',
'AnomalyDetector',
'MetricsCalculator'
]

View File

@ -0,0 +1,329 @@
"""
Anomaly detection for market data.
"""
import statistics
from typing import Dict, List, Union, Optional, Deque
from collections import deque
from datetime import datetime, timedelta
from ..models.core import OrderBookSnapshot, TradeEvent
from ..utils.logging import get_logger
from ..utils.timing import get_current_timestamp
logger = get_logger(__name__)
class AnomalyDetector:
"""
Detects anomalies in market data using statistical methods.
Detects:
- Price spikes and drops
- Volume anomalies
- Spread anomalies
- Frequency anomalies
"""
def __init__(self, window_size: int = 100, z_score_threshold: float = 3.0):
"""
Initialize anomaly detector.
Args:
window_size: Size of rolling window for statistics
z_score_threshold: Z-score threshold for anomaly detection
"""
self.window_size = window_size
self.z_score_threshold = z_score_threshold
# Rolling windows for statistics
self.price_windows: Dict[str, Deque[float]] = {}
self.volume_windows: Dict[str, Deque[float]] = {}
self.spread_windows: Dict[str, Deque[float]] = {}
self.timestamp_windows: Dict[str, Deque[datetime]] = {}
logger.info(f"Anomaly detector initialized with window_size={window_size}, threshold={z_score_threshold}")
def detect_orderbook_anomalies(self, orderbook: OrderBookSnapshot) -> List[str]:
"""
Detect anomalies in order book data.
Args:
orderbook: Order book snapshot to analyze
Returns:
List[str]: List of detected anomalies
"""
anomalies = []
key = f"{orderbook.symbol}_{orderbook.exchange}"
try:
# Price anomalies
if orderbook.mid_price:
price_anomalies = self._detect_price_anomalies(key, orderbook.mid_price)
anomalies.extend(price_anomalies)
# Volume anomalies
total_volume = orderbook.bid_volume + orderbook.ask_volume
volume_anomalies = self._detect_volume_anomalies(key, total_volume)
anomalies.extend(volume_anomalies)
# Spread anomalies
if orderbook.spread and orderbook.mid_price:
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
spread_anomalies = self._detect_spread_anomalies(key, spread_pct)
anomalies.extend(spread_anomalies)
# Frequency anomalies
frequency_anomalies = self._detect_frequency_anomalies(key, orderbook.timestamp)
anomalies.extend(frequency_anomalies)
# Update windows
self._update_windows(key, orderbook)
except Exception as e:
logger.error(f"Error detecting order book anomalies: {e}")
anomalies.append(f"Anomaly detection error: {e}")
if anomalies:
logger.warning(f"Anomalies detected in {orderbook.symbol}@{orderbook.exchange}: {anomalies}")
return anomalies
def detect_trade_anomalies(self, trade: TradeEvent) -> List[str]:
"""
Detect anomalies in trade data.
Args:
trade: Trade event to analyze
Returns:
List[str]: List of detected anomalies
"""
anomalies = []
key = f"{trade.symbol}_{trade.exchange}_trade"
try:
# Price anomalies
price_anomalies = self._detect_price_anomalies(key, trade.price)
anomalies.extend(price_anomalies)
# Volume anomalies
volume_anomalies = self._detect_volume_anomalies(key, trade.size)
anomalies.extend(volume_anomalies)
# Update windows
self._update_trade_windows(key, trade)
except Exception as e:
logger.error(f"Error detecting trade anomalies: {e}")
anomalies.append(f"Anomaly detection error: {e}")
if anomalies:
logger.warning(f"Trade anomalies detected in {trade.symbol}@{trade.exchange}: {anomalies}")
return anomalies
def _detect_price_anomalies(self, key: str, price: float) -> List[str]:
"""Detect price anomalies using z-score"""
anomalies = []
if key not in self.price_windows:
self.price_windows[key] = deque(maxlen=self.window_size)
return anomalies
window = self.price_windows[key]
if len(window) < 10: # Need minimum data points
return anomalies
try:
mean_price = statistics.mean(window)
std_price = statistics.stdev(window)
if std_price > 0:
z_score = abs(price - mean_price) / std_price
if z_score > self.z_score_threshold:
direction = "spike" if price > mean_price else "drop"
anomalies.append(f"Price {direction}: {price:.6f} (z-score: {z_score:.2f})")
except statistics.StatisticsError:
pass # Not enough data or all values are the same
return anomalies
def _detect_volume_anomalies(self, key: str, volume: float) -> List[str]:
"""Detect volume anomalies using z-score"""
anomalies = []
volume_key = f"{key}_volume"
if volume_key not in self.volume_windows:
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
return anomalies
window = self.volume_windows[volume_key]
if len(window) < 10:
return anomalies
try:
mean_volume = statistics.mean(window)
std_volume = statistics.stdev(window)
if std_volume > 0:
z_score = abs(volume - mean_volume) / std_volume
if z_score > self.z_score_threshold:
direction = "spike" if volume > mean_volume else "drop"
anomalies.append(f"Volume {direction}: {volume:.6f} (z-score: {z_score:.2f})")
except statistics.StatisticsError:
pass
return anomalies
def _detect_spread_anomalies(self, key: str, spread_pct: float) -> List[str]:
"""Detect spread anomalies using z-score"""
anomalies = []
spread_key = f"{key}_spread"
if spread_key not in self.spread_windows:
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
return anomalies
window = self.spread_windows[spread_key]
if len(window) < 10:
return anomalies
try:
mean_spread = statistics.mean(window)
std_spread = statistics.stdev(window)
if std_spread > 0:
z_score = abs(spread_pct - mean_spread) / std_spread
if z_score > self.z_score_threshold:
direction = "widening" if spread_pct > mean_spread else "tightening"
anomalies.append(f"Spread {direction}: {spread_pct:.4f}% (z-score: {z_score:.2f})")
except statistics.StatisticsError:
pass
return anomalies
def _detect_frequency_anomalies(self, key: str, timestamp: datetime) -> List[str]:
"""Detect frequency anomalies in data updates"""
anomalies = []
timestamp_key = f"{key}_timestamp"
if timestamp_key not in self.timestamp_windows:
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
return anomalies
window = self.timestamp_windows[timestamp_key]
if len(window) < 5:
return anomalies
try:
# Calculate intervals between updates
intervals = []
for i in range(1, len(window)):
interval = (window[i] - window[i-1]).total_seconds()
intervals.append(interval)
if len(intervals) >= 5:
mean_interval = statistics.mean(intervals)
std_interval = statistics.stdev(intervals)
# Check current interval
current_interval = (timestamp - window[-1]).total_seconds()
if std_interval > 0:
z_score = abs(current_interval - mean_interval) / std_interval
if z_score > self.z_score_threshold:
if current_interval > mean_interval:
anomalies.append(f"Update delay: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
else:
anomalies.append(f"Update burst: {current_interval:.1f}s (expected: {mean_interval:.1f}s)")
except (statistics.StatisticsError, IndexError):
pass
return anomalies
def _update_windows(self, key: str, orderbook: OrderBookSnapshot) -> None:
"""Update rolling windows with new data"""
# Update price window
if orderbook.mid_price:
if key not in self.price_windows:
self.price_windows[key] = deque(maxlen=self.window_size)
self.price_windows[key].append(orderbook.mid_price)
# Update volume window
total_volume = orderbook.bid_volume + orderbook.ask_volume
volume_key = f"{key}_volume"
if volume_key not in self.volume_windows:
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
self.volume_windows[volume_key].append(total_volume)
# Update spread window
if orderbook.spread and orderbook.mid_price:
spread_pct = (orderbook.spread / orderbook.mid_price) * 100
spread_key = f"{key}_spread"
if spread_key not in self.spread_windows:
self.spread_windows[spread_key] = deque(maxlen=self.window_size)
self.spread_windows[spread_key].append(spread_pct)
# Update timestamp window
timestamp_key = f"{key}_timestamp"
if timestamp_key not in self.timestamp_windows:
self.timestamp_windows[timestamp_key] = deque(maxlen=self.window_size)
self.timestamp_windows[timestamp_key].append(orderbook.timestamp)
def _update_trade_windows(self, key: str, trade: TradeEvent) -> None:
"""Update rolling windows with trade data"""
# Update price window
if key not in self.price_windows:
self.price_windows[key] = deque(maxlen=self.window_size)
self.price_windows[key].append(trade.price)
# Update volume window
volume_key = f"{key}_volume"
if volume_key not in self.volume_windows:
self.volume_windows[volume_key] = deque(maxlen=self.window_size)
self.volume_windows[volume_key].append(trade.size)
def get_statistics(self) -> Dict[str, Dict[str, float]]:
"""Get current statistics for all tracked symbols"""
stats = {}
for key, window in self.price_windows.items():
if len(window) >= 2:
try:
stats[key] = {
'price_mean': statistics.mean(window),
'price_std': statistics.stdev(window),
'price_min': min(window),
'price_max': max(window),
'data_points': len(window)
}
except statistics.StatisticsError:
stats[key] = {'error': 'insufficient_data'}
return stats
def reset_windows(self, key: Optional[str] = None) -> None:
"""Reset rolling windows for a specific key or all keys"""
if key:
# Reset specific key
self.price_windows.pop(key, None)
self.volume_windows.pop(f"{key}_volume", None)
self.spread_windows.pop(f"{key}_spread", None)
self.timestamp_windows.pop(f"{key}_timestamp", None)
else:
# Reset all windows
self.price_windows.clear()
self.volume_windows.clear()
self.spread_windows.clear()
self.timestamp_windows.clear()
logger.info(f"Reset anomaly detection windows for {key or 'all keys'}")

View File

@ -0,0 +1,378 @@
"""
Main data processor implementation.
"""
from typing import Dict, Union, List, Optional, Any
from ..interfaces.data_processor import DataProcessor
from ..models.core import OrderBookSnapshot, TradeEvent, OrderBookMetrics
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import ValidationError, ProcessingError
from ..utils.timing import get_current_timestamp
from .quality_checker import DataQualityChecker
from .anomaly_detector import AnomalyDetector
from .metrics_calculator import MetricsCalculator
logger = get_logger(__name__)
class StandardDataProcessor(DataProcessor):
"""
Standard implementation of data processor interface.
Provides:
- Data normalization and validation
- Quality checking
- Anomaly detection
- Metrics calculation
- Data enrichment
"""
def __init__(self):
"""Initialize data processor with components"""
self.quality_checker = DataQualityChecker()
self.anomaly_detector = AnomalyDetector()
self.metrics_calculator = MetricsCalculator()
# Processing statistics
self.processed_orderbooks = 0
self.processed_trades = 0
self.quality_failures = 0
self.anomalies_detected = 0
logger.info("Standard data processor initialized")
def normalize_orderbook(self, raw_data: Dict, exchange: str) -> OrderBookSnapshot:
"""
Normalize raw order book data to standard format.
Args:
raw_data: Raw order book data from exchange
exchange: Exchange name
Returns:
OrderBookSnapshot: Normalized order book data
"""
try:
set_correlation_id()
# This is a generic implementation - specific exchanges would override
# For now, assume data is already in correct format
if isinstance(raw_data, OrderBookSnapshot):
return raw_data
# If raw_data is a dict, try to construct OrderBookSnapshot
# This would be customized per exchange
raise NotImplementedError(
"normalize_orderbook should be implemented by exchange-specific processors"
)
except Exception as e:
logger.error(f"Error normalizing order book data: {e}")
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
def normalize_trade(self, raw_data: Dict, exchange: str) -> TradeEvent:
"""
Normalize raw trade data to standard format.
Args:
raw_data: Raw trade data from exchange
exchange: Exchange name
Returns:
TradeEvent: Normalized trade data
"""
try:
set_correlation_id()
# This is a generic implementation - specific exchanges would override
if isinstance(raw_data, TradeEvent):
return raw_data
# If raw_data is a dict, try to construct TradeEvent
# This would be customized per exchange
raise NotImplementedError(
"normalize_trade should be implemented by exchange-specific processors"
)
except Exception as e:
logger.error(f"Error normalizing trade data: {e}")
raise ProcessingError(f"Normalization failed: {e}", "NORMALIZE_ERROR")
def validate_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> bool:
"""
Validate normalized data for quality and consistency.
Args:
data: Normalized data to validate
Returns:
bool: True if data is valid, False otherwise
"""
try:
set_correlation_id()
if isinstance(data, OrderBookSnapshot):
quality_score, issues = self.quality_checker.check_orderbook_quality(data)
self.processed_orderbooks += 1
if quality_score < 0.5: # Threshold for acceptable quality
self.quality_failures += 1
logger.warning(f"Low quality order book data: score={quality_score:.2f}, issues={issues}")
return False
return True
elif isinstance(data, TradeEvent):
quality_score, issues = self.quality_checker.check_trade_quality(data)
self.processed_trades += 1
if quality_score < 0.5:
self.quality_failures += 1
logger.warning(f"Low quality trade data: score={quality_score:.2f}, issues={issues}")
return False
return True
else:
logger.error(f"Unknown data type for validation: {type(data)}")
return False
except Exception as e:
logger.error(f"Error validating data: {e}")
return False
def calculate_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
"""
Calculate metrics from order book data.
Args:
orderbook: Order book snapshot
Returns:
OrderBookMetrics: Calculated metrics
"""
try:
set_correlation_id()
return self.metrics_calculator.calculate_orderbook_metrics(orderbook)
except Exception as e:
logger.error(f"Error calculating metrics: {e}")
raise ProcessingError(f"Metrics calculation failed: {e}", "METRICS_ERROR")
def detect_anomalies(self, data: Union[OrderBookSnapshot, TradeEvent]) -> List[str]:
"""
Detect anomalies in the data.
Args:
data: Data to analyze for anomalies
Returns:
List[str]: List of detected anomaly descriptions
"""
try:
set_correlation_id()
if isinstance(data, OrderBookSnapshot):
anomalies = self.anomaly_detector.detect_orderbook_anomalies(data)
elif isinstance(data, TradeEvent):
anomalies = self.anomaly_detector.detect_trade_anomalies(data)
else:
logger.error(f"Unknown data type for anomaly detection: {type(data)}")
return ["Unknown data type"]
if anomalies:
self.anomalies_detected += len(anomalies)
return anomalies
except Exception as e:
logger.error(f"Error detecting anomalies: {e}")
return [f"Anomaly detection error: {e}"]
def filter_data(self, data: Union[OrderBookSnapshot, TradeEvent], criteria: Dict) -> bool:
"""
Filter data based on criteria.
Args:
data: Data to filter
criteria: Filtering criteria
Returns:
bool: True if data passes filter, False otherwise
"""
try:
set_correlation_id()
# Symbol filter
if 'symbols' in criteria:
allowed_symbols = criteria['symbols']
if data.symbol not in allowed_symbols:
return False
# Exchange filter
if 'exchanges' in criteria:
allowed_exchanges = criteria['exchanges']
if data.exchange not in allowed_exchanges:
return False
# Quality filter
if 'min_quality' in criteria:
min_quality = criteria['min_quality']
if isinstance(data, OrderBookSnapshot):
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
elif isinstance(data, TradeEvent):
quality_score, _ = self.quality_checker.check_trade_quality(data)
else:
quality_score = 0.0
if quality_score < min_quality:
return False
# Price range filter
if 'price_range' in criteria:
price_range = criteria['price_range']
min_price, max_price = price_range
if isinstance(data, OrderBookSnapshot):
price = data.mid_price
elif isinstance(data, TradeEvent):
price = data.price
else:
return False
if price and (price < min_price or price > max_price):
return False
# Volume filter for trades
if 'min_volume' in criteria and isinstance(data, TradeEvent):
min_volume = criteria['min_volume']
if data.size < min_volume:
return False
return True
except Exception as e:
logger.error(f"Error filtering data: {e}")
return False
def enrich_data(self, data: Union[OrderBookSnapshot, TradeEvent]) -> Dict:
"""
Enrich data with additional metadata.
Args:
data: Data to enrich
Returns:
Dict: Enriched data with metadata
"""
try:
set_correlation_id()
enriched = {
'original_data': data,
'processing_timestamp': get_current_timestamp(),
'processor_version': '1.0.0'
}
# Add quality metrics
if isinstance(data, OrderBookSnapshot):
quality_score, quality_issues = self.quality_checker.check_orderbook_quality(data)
enriched['quality_score'] = quality_score
enriched['quality_issues'] = quality_issues
# Add calculated metrics
try:
metrics = self.calculate_metrics(data)
enriched['metrics'] = {
'mid_price': metrics.mid_price,
'spread': metrics.spread,
'spread_percentage': metrics.spread_percentage,
'volume_imbalance': metrics.volume_imbalance,
'depth_10': metrics.depth_10,
'depth_50': metrics.depth_50
}
except Exception as e:
enriched['metrics_error'] = str(e)
# Add liquidity score
try:
liquidity_score = self.metrics_calculator.calculate_liquidity_score(data)
enriched['liquidity_score'] = liquidity_score
except Exception as e:
enriched['liquidity_error'] = str(e)
elif isinstance(data, TradeEvent):
quality_score, quality_issues = self.quality_checker.check_trade_quality(data)
enriched['quality_score'] = quality_score
enriched['quality_issues'] = quality_issues
# Add trade-specific enrichments
enriched['trade_value'] = data.price * data.size
enriched['side_numeric'] = 1 if data.side == 'buy' else -1
# Add anomaly detection results
anomalies = self.detect_anomalies(data)
enriched['anomalies'] = anomalies
enriched['anomaly_count'] = len(anomalies)
return enriched
except Exception as e:
logger.error(f"Error enriching data: {e}")
return {
'original_data': data,
'enrichment_error': str(e)
}
def get_data_quality_score(self, data: Union[OrderBookSnapshot, TradeEvent]) -> float:
"""
Calculate data quality score.
Args:
data: Data to score
Returns:
float: Quality score between 0.0 and 1.0
"""
try:
set_correlation_id()
if isinstance(data, OrderBookSnapshot):
quality_score, _ = self.quality_checker.check_orderbook_quality(data)
elif isinstance(data, TradeEvent):
quality_score, _ = self.quality_checker.check_trade_quality(data)
else:
logger.error(f"Unknown data type for quality scoring: {type(data)}")
return 0.0
return quality_score
except Exception as e:
logger.error(f"Error calculating quality score: {e}")
return 0.0
def get_processing_stats(self) -> Dict[str, Any]:
"""Get processing statistics"""
return {
'processed_orderbooks': self.processed_orderbooks,
'processed_trades': self.processed_trades,
'quality_failures': self.quality_failures,
'anomalies_detected': self.anomalies_detected,
'quality_failure_rate': (
self.quality_failures / max(1, self.processed_orderbooks + self.processed_trades)
),
'anomaly_rate': (
self.anomalies_detected / max(1, self.processed_orderbooks + self.processed_trades)
),
'quality_checker_summary': self.quality_checker.get_quality_summary(),
'anomaly_detector_stats': self.anomaly_detector.get_statistics()
}
def reset_stats(self) -> None:
"""Reset processing statistics"""
self.processed_orderbooks = 0
self.processed_trades = 0
self.quality_failures = 0
self.anomalies_detected = 0
logger.info("Processing statistics reset")

View File

@ -0,0 +1,275 @@
"""
Metrics calculation for order book analysis.
"""
from typing import Dict, List, Optional
from ..models.core import OrderBookSnapshot, OrderBookMetrics, ImbalanceMetrics
from ..utils.logging import get_logger
logger = get_logger(__name__)
class MetricsCalculator:
"""
Calculates various metrics from order book data.
Metrics include:
- Basic metrics (mid price, spread, volumes)
- Imbalance metrics
- Depth metrics
- Liquidity metrics
"""
def __init__(self):
"""Initialize metrics calculator"""
logger.info("Metrics calculator initialized")
def calculate_orderbook_metrics(self, orderbook: OrderBookSnapshot) -> OrderBookMetrics:
"""
Calculate comprehensive order book metrics.
Args:
orderbook: Order book snapshot
Returns:
OrderBookMetrics: Calculated metrics
"""
try:
# Basic calculations
mid_price = self._calculate_mid_price(orderbook)
spread = self._calculate_spread(orderbook)
spread_percentage = (spread / mid_price * 100) if mid_price > 0 else 0.0
# Volume calculations
bid_volume = sum(level.size for level in orderbook.bids)
ask_volume = sum(level.size for level in orderbook.asks)
# Imbalance calculation
total_volume = bid_volume + ask_volume
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
# Depth calculations
depth_10 = self._calculate_depth(orderbook, 10)
depth_50 = self._calculate_depth(orderbook, 50)
return OrderBookMetrics(
symbol=orderbook.symbol,
exchange=orderbook.exchange,
timestamp=orderbook.timestamp,
mid_price=mid_price,
spread=spread,
spread_percentage=spread_percentage,
bid_volume=bid_volume,
ask_volume=ask_volume,
volume_imbalance=volume_imbalance,
depth_10=depth_10,
depth_50=depth_50
)
except Exception as e:
logger.error(f"Error calculating order book metrics: {e}")
raise
def calculate_imbalance_metrics(self, orderbook: OrderBookSnapshot) -> ImbalanceMetrics:
"""
Calculate order book imbalance metrics.
Args:
orderbook: Order book snapshot
Returns:
ImbalanceMetrics: Calculated imbalance metrics
"""
try:
# Volume imbalance
bid_volume = sum(level.size for level in orderbook.bids)
ask_volume = sum(level.size for level in orderbook.asks)
total_volume = bid_volume + ask_volume
volume_imbalance = ((bid_volume - ask_volume) / total_volume) if total_volume > 0 else 0.0
# Price imbalance (weighted by volume)
price_imbalance = self._calculate_price_imbalance(orderbook)
# Depth imbalance
depth_imbalance = self._calculate_depth_imbalance(orderbook)
# Momentum score (simplified - would need historical data for full implementation)
momentum_score = volume_imbalance * 0.5 + price_imbalance * 0.3 + depth_imbalance * 0.2
return ImbalanceMetrics(
symbol=orderbook.symbol,
timestamp=orderbook.timestamp,
volume_imbalance=volume_imbalance,
price_imbalance=price_imbalance,
depth_imbalance=depth_imbalance,
momentum_score=momentum_score
)
except Exception as e:
logger.error(f"Error calculating imbalance metrics: {e}")
raise
def _calculate_mid_price(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate mid price"""
if not orderbook.bids or not orderbook.asks:
return 0.0
best_bid = orderbook.bids[0].price
best_ask = orderbook.asks[0].price
return (best_bid + best_ask) / 2.0
def _calculate_spread(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate bid-ask spread"""
if not orderbook.bids or not orderbook.asks:
return 0.0
best_bid = orderbook.bids[0].price
best_ask = orderbook.asks[0].price
return best_ask - best_bid
def _calculate_depth(self, orderbook: OrderBookSnapshot, levels: int) -> float:
"""Calculate market depth for specified number of levels"""
bid_depth = sum(
level.size for level in orderbook.bids[:levels]
)
ask_depth = sum(
level.size for level in orderbook.asks[:levels]
)
return bid_depth + ask_depth
def _calculate_price_imbalance(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate price-weighted imbalance"""
if not orderbook.bids or not orderbook.asks:
return 0.0
# Calculate volume-weighted average prices for top levels
bid_vwap = self._calculate_vwap(orderbook.bids[:5])
ask_vwap = self._calculate_vwap(orderbook.asks[:5])
if bid_vwap == 0 or ask_vwap == 0:
return 0.0
mid_price = (bid_vwap + ask_vwap) / 2.0
# Normalize imbalance
price_imbalance = (bid_vwap - ask_vwap) / mid_price if mid_price > 0 else 0.0
return max(-1.0, min(1.0, price_imbalance))
def _calculate_depth_imbalance(self, orderbook: OrderBookSnapshot) -> float:
"""Calculate depth imbalance across multiple levels"""
levels_to_check = [5, 10, 20]
imbalances = []
for levels in levels_to_check:
bid_depth = sum(level.size for level in orderbook.bids[:levels])
ask_depth = sum(level.size for level in orderbook.asks[:levels])
total_depth = bid_depth + ask_depth
if total_depth > 0:
imbalance = (bid_depth - ask_depth) / total_depth
imbalances.append(imbalance)
# Return weighted average of imbalances
if imbalances:
return sum(imbalances) / len(imbalances)
return 0.0
def _calculate_vwap(self, levels: List) -> float:
"""Calculate volume-weighted average price for price levels"""
if not levels:
return 0.0
total_volume = sum(level.size for level in levels)
if total_volume == 0:
return 0.0
weighted_sum = sum(level.price * level.size for level in levels)
return weighted_sum / total_volume
def calculate_liquidity_score(self, orderbook: OrderBookSnapshot) -> float:
"""
Calculate liquidity score based on depth and spread.
Args:
orderbook: Order book snapshot
Returns:
float: Liquidity score (0.0 to 1.0)
"""
try:
if not orderbook.bids or not orderbook.asks:
return 0.0
# Spread component (lower spread = higher liquidity)
spread = self._calculate_spread(orderbook)
mid_price = self._calculate_mid_price(orderbook)
if mid_price == 0:
return 0.0
spread_pct = (spread / mid_price) * 100
spread_score = max(0.0, 1.0 - (spread_pct / 5.0)) # Normalize to 5% max spread
# Depth component (higher depth = higher liquidity)
total_depth = self._calculate_depth(orderbook, 10)
depth_score = min(1.0, total_depth / 100.0) # Normalize to 100 units max depth
# Volume balance component (more balanced = higher liquidity)
bid_volume = sum(level.size for level in orderbook.bids[:10])
ask_volume = sum(level.size for level in orderbook.asks[:10])
total_volume = bid_volume + ask_volume
if total_volume > 0:
imbalance = abs(bid_volume - ask_volume) / total_volume
balance_score = 1.0 - imbalance
else:
balance_score = 0.0
# Weighted combination
liquidity_score = (spread_score * 0.4 + depth_score * 0.4 + balance_score * 0.2)
return max(0.0, min(1.0, liquidity_score))
except Exception as e:
logger.error(f"Error calculating liquidity score: {e}")
return 0.0
def get_market_summary(self, orderbook: OrderBookSnapshot) -> Dict[str, float]:
"""
Get comprehensive market summary.
Args:
orderbook: Order book snapshot
Returns:
Dict[str, float]: Market summary metrics
"""
try:
metrics = self.calculate_orderbook_metrics(orderbook)
imbalance = self.calculate_imbalance_metrics(orderbook)
liquidity = self.calculate_liquidity_score(orderbook)
return {
'mid_price': metrics.mid_price,
'spread': metrics.spread,
'spread_percentage': metrics.spread_percentage,
'bid_volume': metrics.bid_volume,
'ask_volume': metrics.ask_volume,
'volume_imbalance': metrics.volume_imbalance,
'depth_10': metrics.depth_10,
'depth_50': metrics.depth_50,
'price_imbalance': imbalance.price_imbalance,
'depth_imbalance': imbalance.depth_imbalance,
'momentum_score': imbalance.momentum_score,
'liquidity_score': liquidity
}
except Exception as e:
logger.error(f"Error generating market summary: {e}")
return {}

View File

@ -0,0 +1,288 @@
"""
Data quality checking and validation for market data.
"""
from typing import Dict, List, Union, Optional, Tuple
from datetime import datetime, timezone
from ..models.core import OrderBookSnapshot, TradeEvent
from ..utils.logging import get_logger
from ..utils.validation import validate_price, validate_volume, validate_symbol
from ..utils.timing import get_current_timestamp
logger = get_logger(__name__)
class DataQualityChecker:
"""
Comprehensive data quality checker for market data.
Validates:
- Data structure integrity
- Price and volume ranges
- Timestamp consistency
- Cross-validation between related data points
"""
def __init__(self):
"""Initialize quality checker with default thresholds"""
# Quality thresholds
self.max_spread_percentage = 10.0 # Maximum spread as % of mid price
self.max_price_change_percentage = 50.0 # Maximum price change between updates
self.min_volume_threshold = 0.000001 # Minimum meaningful volume
self.max_timestamp_drift = 300 # Maximum seconds drift from current time
# Price history for validation
self.price_history: Dict[str, Dict[str, float]] = {} # symbol -> exchange -> last_price
logger.info("Data quality checker initialized")
def check_orderbook_quality(self, orderbook: OrderBookSnapshot) -> Tuple[float, List[str]]:
"""
Check order book data quality.
Args:
orderbook: Order book snapshot to validate
Returns:
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
"""
issues = []
quality_score = 1.0
try:
# Basic structure validation
structure_issues = self._check_orderbook_structure(orderbook)
issues.extend(structure_issues)
quality_score -= len(structure_issues) * 0.1
# Price validation
price_issues = self._check_orderbook_prices(orderbook)
issues.extend(price_issues)
quality_score -= len(price_issues) * 0.15
# Volume validation
volume_issues = self._check_orderbook_volumes(orderbook)
issues.extend(volume_issues)
quality_score -= len(volume_issues) * 0.1
# Spread validation
spread_issues = self._check_orderbook_spread(orderbook)
issues.extend(spread_issues)
quality_score -= len(spread_issues) * 0.2
# Timestamp validation
timestamp_issues = self._check_timestamp(orderbook.timestamp)
issues.extend(timestamp_issues)
quality_score -= len(timestamp_issues) * 0.1
# Cross-validation with history
history_issues = self._check_price_history(orderbook)
issues.extend(history_issues)
quality_score -= len(history_issues) * 0.15
# Update price history
self._update_price_history(orderbook)
except Exception as e:
logger.error(f"Error checking order book quality: {e}")
issues.append(f"Quality check error: {e}")
quality_score = 0.0
# Ensure score is within bounds
quality_score = max(0.0, min(1.0, quality_score))
if issues:
logger.debug(f"Order book quality issues for {orderbook.symbol}@{orderbook.exchange}: {issues}")
return quality_score, issues de
f check_trade_quality(self, trade: TradeEvent) -> Tuple[float, List[str]]:
"""
Check trade data quality.
Args:
trade: Trade event to validate
Returns:
Tuple[float, List[str]]: Quality score (0.0-1.0) and list of issues
"""
issues = []
quality_score = 1.0
try:
# Basic structure validation
if not validate_symbol(trade.symbol):
issues.append("Invalid symbol format")
if not trade.exchange:
issues.append("Missing exchange")
if not trade.trade_id:
issues.append("Missing trade ID")
# Price validation
if not validate_price(trade.price):
issues.append(f"Invalid price: {trade.price}")
# Volume validation
if not validate_volume(trade.size):
issues.append(f"Invalid size: {trade.size}")
if trade.size < self.min_volume_threshold:
issues.append(f"Size below threshold: {trade.size}")
# Side validation
if trade.side not in ['buy', 'sell']:
issues.append(f"Invalid side: {trade.side}")
# Timestamp validation
timestamp_issues = self._check_timestamp(trade.timestamp)
issues.extend(timestamp_issues)
# Calculate quality score
quality_score -= len(issues) * 0.2
except Exception as e:
logger.error(f"Error checking trade quality: {e}")
issues.append(f"Quality check error: {e}")
quality_score = 0.0
# Ensure score is within bounds
quality_score = max(0.0, min(1.0, quality_score))
if issues:
logger.debug(f"Trade quality issues for {trade.symbol}@{trade.exchange}: {issues}")
return quality_score, issues
def _check_orderbook_structure(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check basic order book structure"""
issues = []
if not validate_symbol(orderbook.symbol):
issues.append("Invalid symbol format")
if not orderbook.exchange:
issues.append("Missing exchange")
if not orderbook.bids:
issues.append("No bid levels")
if not orderbook.asks:
issues.append("No ask levels")
return issues
def _check_orderbook_prices(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check order book price validity"""
issues = []
# Check bid prices (should be descending)
for i, bid in enumerate(orderbook.bids):
if not validate_price(bid.price):
issues.append(f"Invalid bid price at level {i}: {bid.price}")
if i > 0 and bid.price >= orderbook.bids[i-1].price:
issues.append(f"Bid prices not descending at level {i}")
# Check ask prices (should be ascending)
for i, ask in enumerate(orderbook.asks):
if not validate_price(ask.price):
issues.append(f"Invalid ask price at level {i}: {ask.price}")
if i > 0 and ask.price <= orderbook.asks[i-1].price:
issues.append(f"Ask prices not ascending at level {i}")
# Check bid-ask ordering
if orderbook.bids and orderbook.asks:
if orderbook.bids[0].price >= orderbook.asks[0].price:
issues.append("Best bid >= best ask (crossed book)")
return issues def
_check_orderbook_volumes(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check order book volume validity"""
issues = []
# Check bid volumes
for i, bid in enumerate(orderbook.bids):
if not validate_volume(bid.size):
issues.append(f"Invalid bid volume at level {i}: {bid.size}")
if bid.size < self.min_volume_threshold:
issues.append(f"Bid volume below threshold at level {i}: {bid.size}")
# Check ask volumes
for i, ask in enumerate(orderbook.asks):
if not validate_volume(ask.size):
issues.append(f"Invalid ask volume at level {i}: {ask.size}")
if ask.size < self.min_volume_threshold:
issues.append(f"Ask volume below threshold at level {i}: {ask.size}")
return issues
def _check_orderbook_spread(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check order book spread validity"""
issues = []
if orderbook.mid_price and orderbook.spread:
spread_percentage = (orderbook.spread / orderbook.mid_price) * 100
if spread_percentage > self.max_spread_percentage:
issues.append(f"Spread too wide: {spread_percentage:.2f}%")
if spread_percentage < 0:
issues.append(f"Negative spread: {spread_percentage:.2f}%")
return issues
def _check_timestamp(self, timestamp: datetime) -> List[str]:
"""Check timestamp validity"""
issues = []
if not timestamp:
issues.append("Missing timestamp")
return issues
# Check if timestamp is timezone-aware
if timestamp.tzinfo is None:
issues.append("Timestamp missing timezone info")
# Check timestamp drift
current_time = get_current_timestamp()
time_diff = abs((timestamp - current_time).total_seconds())
if time_diff > self.max_timestamp_drift:
issues.append(f"Timestamp drift too large: {time_diff:.1f}s")
return issues
def _check_price_history(self, orderbook: OrderBookSnapshot) -> List[str]:
"""Check price consistency with history"""
issues = []
key = f"{orderbook.symbol}_{orderbook.exchange}"
if key in self.price_history and orderbook.mid_price:
last_price = self.price_history[key]
price_change = abs(orderbook.mid_price - last_price) / last_price * 100
if price_change > self.max_price_change_percentage:
issues.append(f"Large price change: {price_change:.2f}%")
return issues
def _update_price_history(self, orderbook: OrderBookSnapshot) -> None:
"""Update price history for future validation"""
if orderbook.mid_price:
key = f"{orderbook.symbol}_{orderbook.exchange}"
self.price_history[key] = orderbook.mid_price
def get_quality_summary(self) -> Dict[str, int]:
"""Get summary of quality checks performed"""
return {
'symbols_tracked': len(self.price_history),
'max_spread_percentage': self.max_spread_percentage,
'max_price_change_percentage': self.max_price_change_percentage,
'min_volume_threshold': self.min_volume_threshold,
'max_timestamp_drift': self.max_timestamp_drift
}

34
COBY/requirements.txt Normal file
View File

@ -0,0 +1,34 @@
# Core dependencies for COBY system
asyncpg>=0.29.0 # PostgreSQL/TimescaleDB async driver
redis>=5.0.0 # Redis client
websockets>=12.0 # WebSocket client library
aiohttp>=3.9.0 # Async HTTP client/server
fastapi>=0.104.0 # API framework
uvicorn>=0.24.0 # ASGI server
pydantic>=2.5.0 # Data validation
python-multipart>=0.0.6 # Form data parsing
# Data processing
pandas>=2.1.0 # Data manipulation
numpy>=1.24.0 # Numerical computing
scipy>=1.11.0 # Scientific computing
# Utilities
python-dotenv>=1.0.0 # Environment variable loading
structlog>=23.2.0 # Structured logging
click>=8.1.0 # CLI framework
rich>=13.7.0 # Rich text and beautiful formatting
# Development dependencies
pytest>=7.4.0 # Testing framework
pytest-asyncio>=0.21.0 # Async testing
pytest-cov>=4.1.0 # Coverage reporting
black>=23.11.0 # Code formatting
isort>=5.12.0 # Import sorting
flake8>=6.1.0 # Linting
mypy>=1.7.0 # Type checking
# Optional dependencies for enhanced features
prometheus-client>=0.19.0 # Metrics collection
grafana-api>=1.0.3 # Grafana integration
psutil>=5.9.0 # System monitoring

11
COBY/storage/__init__.py Normal file
View File

@ -0,0 +1,11 @@
"""
Storage layer for the COBY system.
"""
from .timescale_manager import TimescaleManager
from .connection_pool import DatabaseConnectionPool
__all__ = [
'TimescaleManager',
'DatabaseConnectionPool'
]

View File

@ -0,0 +1,140 @@
"""
Database connection pool management for TimescaleDB.
"""
import asyncio
import asyncpg
from typing import Optional, Dict, Any
from contextlib import asynccontextmanager
from ..config import config
from ..utils.logging import get_logger
from ..utils.exceptions import StorageError
logger = get_logger(__name__)
class DatabaseConnectionPool:
"""Manages database connection pool for TimescaleDB"""
def __init__(self):
self._pool: Optional[asyncpg.Pool] = None
self._is_initialized = False
async def initialize(self) -> None:
"""Initialize the connection pool"""
if self._is_initialized:
return
try:
# Build connection string
dsn = (
f"postgresql://{config.database.user}:{config.database.password}"
f"@{config.database.host}:{config.database.port}/{config.database.name}"
)
# Create connection pool
self._pool = await asyncpg.create_pool(
dsn,
min_size=5,
max_size=config.database.pool_size,
max_queries=50000,
max_inactive_connection_lifetime=300,
command_timeout=config.database.pool_timeout,
server_settings={
'search_path': config.database.schema,
'timezone': 'UTC'
}
)
self._is_initialized = True
logger.info(f"Database connection pool initialized with {config.database.pool_size} connections")
# Test connection
await self.health_check()
except Exception as e:
logger.error(f"Failed to initialize database connection pool: {e}")
raise StorageError(f"Database connection failed: {e}", "DB_INIT_ERROR")
async def close(self) -> None:
"""Close the connection pool"""
if self._pool:
await self._pool.close()
self._pool = None
self._is_initialized = False
logger.info("Database connection pool closed")
@asynccontextmanager
async def get_connection(self):
"""Get a database connection from the pool"""
if not self._is_initialized:
await self.initialize()
if not self._pool:
raise StorageError("Connection pool not initialized", "POOL_NOT_READY")
async with self._pool.acquire() as connection:
try:
yield connection
except Exception as e:
logger.error(f"Database operation failed: {e}")
raise
@asynccontextmanager
async def get_transaction(self):
"""Get a database transaction"""
async with self.get_connection() as conn:
async with conn.transaction():
yield conn
async def execute_query(self, query: str, *args) -> Any:
"""Execute a query and return results"""
async with self.get_connection() as conn:
return await conn.fetch(query, *args)
async def execute_command(self, command: str, *args) -> str:
"""Execute a command and return status"""
async with self.get_connection() as conn:
return await conn.execute(command, *args)
async def execute_many(self, command: str, args_list) -> None:
"""Execute a command multiple times with different arguments"""
async with self.get_connection() as conn:
await conn.executemany(command, args_list)
async def health_check(self) -> bool:
"""Check database health"""
try:
async with self.get_connection() as conn:
result = await conn.fetchval("SELECT 1")
if result == 1:
logger.debug("Database health check passed")
return True
else:
logger.warning("Database health check returned unexpected result")
return False
except Exception as e:
logger.error(f"Database health check failed: {e}")
return False
async def get_pool_stats(self) -> Dict[str, Any]:
"""Get connection pool statistics"""
if not self._pool:
return {}
return {
'size': self._pool.get_size(),
'min_size': self._pool.get_min_size(),
'max_size': self._pool.get_max_size(),
'idle_size': self._pool.get_idle_size(),
'is_closing': self._pool.is_closing()
}
@property
def is_initialized(self) -> bool:
"""Check if pool is initialized"""
return self._is_initialized
# Global connection pool instance
db_pool = DatabaseConnectionPool()

271
COBY/storage/migrations.py Normal file
View File

@ -0,0 +1,271 @@
"""
Database migration system for schema updates.
"""
from typing import List, Dict, Any
from datetime import datetime
from ..utils.logging import get_logger
from ..utils.exceptions import StorageError
from .connection_pool import db_pool
logger = get_logger(__name__)
class Migration:
"""Base class for database migrations"""
def __init__(self, version: str, description: str):
self.version = version
self.description = description
async def up(self) -> None:
"""Apply the migration"""
raise NotImplementedError
async def down(self) -> None:
"""Rollback the migration"""
raise NotImplementedError
class MigrationManager:
"""Manages database schema migrations"""
def __init__(self):
self.migrations: List[Migration] = []
def register_migration(self, migration: Migration) -> None:
"""Register a migration"""
self.migrations.append(migration)
# Sort by version
self.migrations.sort(key=lambda m: m.version)
async def initialize_migration_table(self) -> None:
"""Create migration tracking table"""
query = """
CREATE TABLE IF NOT EXISTS market_data.schema_migrations (
version VARCHAR(50) PRIMARY KEY,
description TEXT NOT NULL,
applied_at TIMESTAMPTZ DEFAULT NOW()
);
"""
await db_pool.execute_command(query)
logger.debug("Migration table initialized")
async def get_applied_migrations(self) -> List[str]:
"""Get list of applied migration versions"""
try:
query = "SELECT version FROM market_data.schema_migrations ORDER BY version"
rows = await db_pool.execute_query(query)
return [row['version'] for row in rows]
except Exception:
# Table might not exist yet
return []
async def apply_migration(self, migration: Migration) -> bool:
"""Apply a single migration"""
try:
logger.info(f"Applying migration {migration.version}: {migration.description}")
async with db_pool.get_transaction() as conn:
# Apply the migration
await migration.up()
# Record the migration
await conn.execute(
"INSERT INTO market_data.schema_migrations (version, description) VALUES ($1, $2)",
migration.version,
migration.description
)
logger.info(f"Migration {migration.version} applied successfully")
return True
except Exception as e:
logger.error(f"Failed to apply migration {migration.version}: {e}")
return False
async def rollback_migration(self, migration: Migration) -> bool:
"""Rollback a single migration"""
try:
logger.info(f"Rolling back migration {migration.version}: {migration.description}")
async with db_pool.get_transaction() as conn:
# Rollback the migration
await migration.down()
# Remove the migration record
await conn.execute(
"DELETE FROM market_data.schema_migrations WHERE version = $1",
migration.version
)
logger.info(f"Migration {migration.version} rolled back successfully")
return True
except Exception as e:
logger.error(f"Failed to rollback migration {migration.version}: {e}")
return False
async def migrate_up(self, target_version: str = None) -> bool:
"""Apply all pending migrations up to target version"""
try:
await self.initialize_migration_table()
applied_migrations = await self.get_applied_migrations()
pending_migrations = [
m for m in self.migrations
if m.version not in applied_migrations
]
if target_version:
pending_migrations = [
m for m in pending_migrations
if m.version <= target_version
]
if not pending_migrations:
logger.info("No pending migrations to apply")
return True
logger.info(f"Applying {len(pending_migrations)} pending migrations")
for migration in pending_migrations:
if not await self.apply_migration(migration):
return False
logger.info("All migrations applied successfully")
return True
except Exception as e:
logger.error(f"Migration failed: {e}")
return False
async def migrate_down(self, target_version: str) -> bool:
"""Rollback migrations down to target version"""
try:
applied_migrations = await self.get_applied_migrations()
migrations_to_rollback = [
m for m in reversed(self.migrations)
if m.version in applied_migrations and m.version > target_version
]
if not migrations_to_rollback:
logger.info("No migrations to rollback")
return True
logger.info(f"Rolling back {len(migrations_to_rollback)} migrations")
for migration in migrations_to_rollback:
if not await self.rollback_migration(migration):
return False
logger.info("All migrations rolled back successfully")
return True
except Exception as e:
logger.error(f"Migration rollback failed: {e}")
return False
async def get_migration_status(self) -> Dict[str, Any]:
"""Get current migration status"""
try:
applied_migrations = await self.get_applied_migrations()
status = {
'total_migrations': len(self.migrations),
'applied_migrations': len(applied_migrations),
'pending_migrations': len(self.migrations) - len(applied_migrations),
'current_version': applied_migrations[-1] if applied_migrations else None,
'latest_version': self.migrations[-1].version if self.migrations else None,
'migrations': []
}
for migration in self.migrations:
status['migrations'].append({
'version': migration.version,
'description': migration.description,
'applied': migration.version in applied_migrations
})
return status
except Exception as e:
logger.error(f"Failed to get migration status: {e}")
return {}
# Example migrations
class InitialSchemaMigration(Migration):
"""Initial schema creation migration"""
def __init__(self):
super().__init__("001", "Create initial schema and tables")
async def up(self) -> None:
"""Create initial schema"""
from .schema import DatabaseSchema
queries = DatabaseSchema.get_all_creation_queries()
for query in queries:
await db_pool.execute_command(query)
async def down(self) -> None:
"""Drop initial schema"""
# Drop tables in reverse order
tables = [
'system_metrics',
'exchange_status',
'ohlcv_data',
'heatmap_data',
'trade_events',
'order_book_snapshots'
]
for table in tables:
await db_pool.execute_command(f"DROP TABLE IF EXISTS market_data.{table} CASCADE")
class AddIndexesMigration(Migration):
"""Add performance indexes migration"""
def __init__(self):
super().__init__("002", "Add performance indexes")
async def up(self) -> None:
"""Add indexes"""
from .schema import DatabaseSchema
queries = DatabaseSchema.get_index_creation_queries()
for query in queries:
await db_pool.execute_command(query)
async def down(self) -> None:
"""Drop indexes"""
indexes = [
'idx_order_book_symbol_exchange',
'idx_order_book_timestamp',
'idx_trade_events_symbol_exchange',
'idx_trade_events_timestamp',
'idx_trade_events_price',
'idx_heatmap_symbol_bucket',
'idx_heatmap_timestamp',
'idx_ohlcv_symbol_timeframe',
'idx_ohlcv_timestamp',
'idx_exchange_status_exchange',
'idx_exchange_status_timestamp',
'idx_system_metrics_name',
'idx_system_metrics_timestamp'
]
for index in indexes:
await db_pool.execute_command(f"DROP INDEX IF EXISTS market_data.{index}")
# Global migration manager
migration_manager = MigrationManager()
# Register default migrations
migration_manager.register_migration(InitialSchemaMigration())
migration_manager.register_migration(AddIndexesMigration())

256
COBY/storage/schema.py Normal file
View File

@ -0,0 +1,256 @@
"""
Database schema management for TimescaleDB.
"""
from typing import List
from ..utils.logging import get_logger
logger = get_logger(__name__)
class DatabaseSchema:
"""Manages database schema creation and migrations"""
@staticmethod
def get_schema_creation_queries() -> List[str]:
"""Get list of queries to create the database schema"""
return [
# Create TimescaleDB extension
"CREATE EXTENSION IF NOT EXISTS timescaledb;",
# Create schema
"CREATE SCHEMA IF NOT EXISTS market_data;",
# Order book snapshots table
"""
CREATE TABLE IF NOT EXISTS market_data.order_book_snapshots (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bids JSONB NOT NULL,
asks JSONB NOT NULL,
sequence_id BIGINT,
mid_price DECIMAL(20,8),
spread DECIMAL(20,8),
bid_volume DECIMAL(30,8),
ask_volume DECIMAL(30,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange)
);
""",
# Trade events table
"""
CREATE TABLE IF NOT EXISTS market_data.trade_events (
id BIGSERIAL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
price DECIMAL(20,8) NOT NULL,
size DECIMAL(30,8) NOT NULL,
side VARCHAR(4) NOT NULL,
trade_id VARCHAR(100) NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, exchange, trade_id)
);
""",
# Aggregated heatmap data table
"""
CREATE TABLE IF NOT EXISTS market_data.heatmap_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
bucket_size DECIMAL(10,2) NOT NULL,
price_bucket DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
side VARCHAR(3) NOT NULL,
exchange_count INTEGER NOT NULL,
exchanges JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, bucket_size, price_bucket, side)
);
""",
# OHLCV data table
"""
CREATE TABLE IF NOT EXISTS market_data.ohlcv_data (
symbol VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
timeframe VARCHAR(10) NOT NULL,
open_price DECIMAL(20,8) NOT NULL,
high_price DECIMAL(20,8) NOT NULL,
low_price DECIMAL(20,8) NOT NULL,
close_price DECIMAL(20,8) NOT NULL,
volume DECIMAL(30,8) NOT NULL,
trade_count INTEGER,
vwap DECIMAL(20,8),
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, symbol, timeframe)
);
""",
# Exchange status tracking table
"""
CREATE TABLE IF NOT EXISTS market_data.exchange_status (
exchange VARCHAR(20) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
status VARCHAR(20) NOT NULL,
last_message_time TIMESTAMPTZ,
error_message TEXT,
connection_count INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, exchange)
);
""",
# System metrics table
"""
CREATE TABLE IF NOT EXISTS market_data.system_metrics (
metric_name VARCHAR(50) NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
value DECIMAL(20,8) NOT NULL,
labels JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
PRIMARY KEY (timestamp, metric_name)
);
"""
]
@staticmethod
def get_hypertable_creation_queries() -> List[str]:
"""Get queries to create hypertables"""
return [
"SELECT create_hypertable('market_data.order_book_snapshots', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.trade_events', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.heatmap_data', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.ohlcv_data', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.exchange_status', 'timestamp', if_not_exists => TRUE);",
"SELECT create_hypertable('market_data.system_metrics', 'timestamp', if_not_exists => TRUE);"
]
@staticmethod
def get_index_creation_queries() -> List[str]:
"""Get queries to create indexes"""
return [
# Order book indexes
"CREATE INDEX IF NOT EXISTS idx_order_book_symbol_exchange ON market_data.order_book_snapshots (symbol, exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_order_book_timestamp ON market_data.order_book_snapshots (timestamp DESC);",
# Trade events indexes
"CREATE INDEX IF NOT EXISTS idx_trade_events_symbol_exchange ON market_data.trade_events (symbol, exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trade_events_timestamp ON market_data.trade_events (timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_trade_events_price ON market_data.trade_events (symbol, price, timestamp DESC);",
# Heatmap data indexes
"CREATE INDEX IF NOT EXISTS idx_heatmap_symbol_bucket ON market_data.heatmap_data (symbol, bucket_size, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_heatmap_timestamp ON market_data.heatmap_data (timestamp DESC);",
# OHLCV data indexes
"CREATE INDEX IF NOT EXISTS idx_ohlcv_symbol_timeframe ON market_data.ohlcv_data (symbol, timeframe, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_ohlcv_timestamp ON market_data.ohlcv_data (timestamp DESC);",
# Exchange status indexes
"CREATE INDEX IF NOT EXISTS idx_exchange_status_exchange ON market_data.exchange_status (exchange, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_exchange_status_timestamp ON market_data.exchange_status (timestamp DESC);",
# System metrics indexes
"CREATE INDEX IF NOT EXISTS idx_system_metrics_name ON market_data.system_metrics (metric_name, timestamp DESC);",
"CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON market_data.system_metrics (timestamp DESC);"
]
@staticmethod
def get_retention_policy_queries() -> List[str]:
"""Get queries to create retention policies"""
return [
"SELECT add_retention_policy('market_data.order_book_snapshots', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.trade_events', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.heatmap_data', INTERVAL '90 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.ohlcv_data', INTERVAL '365 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.exchange_status', INTERVAL '30 days', if_not_exists => TRUE);",
"SELECT add_retention_policy('market_data.system_metrics', INTERVAL '30 days', if_not_exists => TRUE);"
]
@staticmethod
def get_continuous_aggregate_queries() -> List[str]:
"""Get queries to create continuous aggregates"""
return [
# Hourly OHLCV aggregate
"""
CREATE MATERIALIZED VIEW IF NOT EXISTS market_data.hourly_ohlcv
WITH (timescaledb.continuous) AS
SELECT
symbol,
exchange,
time_bucket('1 hour', timestamp) AS hour,
first(price, timestamp) AS open_price,
max(price) AS high_price,
min(price) AS low_price,
last(price, timestamp) AS close_price,
sum(size) AS volume,
count(*) AS trade_count,
avg(price) AS vwap
FROM market_data.trade_events
GROUP BY symbol, exchange, hour
WITH NO DATA;
""",
# Add refresh policy for continuous aggregate
"""
SELECT add_continuous_aggregate_policy('market_data.hourly_ohlcv',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour',
if_not_exists => TRUE);
"""
]
@staticmethod
def get_view_creation_queries() -> List[str]:
"""Get queries to create views"""
return [
# Latest order books view
"""
CREATE OR REPLACE VIEW market_data.latest_order_books AS
SELECT DISTINCT ON (symbol, exchange)
symbol,
exchange,
timestamp,
bids,
asks,
mid_price,
spread,
bid_volume,
ask_volume
FROM market_data.order_book_snapshots
ORDER BY symbol, exchange, timestamp DESC;
""",
# Latest heatmaps view
"""
CREATE OR REPLACE VIEW market_data.latest_heatmaps AS
SELECT DISTINCT ON (symbol, bucket_size, price_bucket, side)
symbol,
bucket_size,
price_bucket,
side,
timestamp,
volume,
exchange_count,
exchanges
FROM market_data.heatmap_data
ORDER BY symbol, bucket_size, price_bucket, side, timestamp DESC;
"""
]
@staticmethod
def get_all_creation_queries() -> List[str]:
"""Get all schema creation queries in order"""
queries = []
queries.extend(DatabaseSchema.get_schema_creation_queries())
queries.extend(DatabaseSchema.get_hypertable_creation_queries())
queries.extend(DatabaseSchema.get_index_creation_queries())
queries.extend(DatabaseSchema.get_retention_policy_queries())
queries.extend(DatabaseSchema.get_continuous_aggregate_queries())
queries.extend(DatabaseSchema.get_view_creation_queries())
return queries

View File

@ -0,0 +1,604 @@
"""
TimescaleDB storage manager implementation.
"""
import json
from datetime import datetime
from typing import List, Dict, Optional, Any
from ..interfaces.storage_manager import StorageManager
from ..models.core import OrderBookSnapshot, TradeEvent, HeatmapData, SystemMetrics, PriceLevel
from ..utils.logging import get_logger, set_correlation_id
from ..utils.exceptions import StorageError, ValidationError
from ..utils.timing import get_current_timestamp
from .connection_pool import db_pool
from .schema import DatabaseSchema
logger = get_logger(__name__)
class TimescaleManager(StorageManager):
"""TimescaleDB implementation of StorageManager interface"""
def __init__(self):
self._schema_initialized = False
async def initialize(self) -> None:
"""Initialize the storage manager"""
await db_pool.initialize()
await self.setup_database_schema()
logger.info("TimescaleDB storage manager initialized")
async def close(self) -> None:
"""Close the storage manager"""
await db_pool.close()
logger.info("TimescaleDB storage manager closed")
def setup_database_schema(self) -> None:
"""Set up database schema and tables"""
async def _setup():
if self._schema_initialized:
return
try:
queries = DatabaseSchema.get_all_creation_queries()
for query in queries:
try:
await db_pool.execute_command(query)
logger.debug(f"Executed schema query: {query[:50]}...")
except Exception as e:
# Log but continue - some queries might fail if already exists
logger.warning(f"Schema query failed (continuing): {e}")
self._schema_initialized = True
logger.info("Database schema setup completed")
except Exception as e:
logger.error(f"Failed to setup database schema: {e}")
raise StorageError(f"Schema setup failed: {e}", "SCHEMA_SETUP_ERROR")
# Run async setup
import asyncio
if asyncio.get_event_loop().is_running():
asyncio.create_task(_setup())
else:
asyncio.run(_setup())
async def store_orderbook(self, data: OrderBookSnapshot) -> bool:
"""Store order book snapshot to database"""
try:
set_correlation_id()
# Convert price levels to JSON
bids_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in data.bids
])
asks_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in data.asks
])
query = """
INSERT INTO market_data.order_book_snapshots
(symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread, bid_volume, ask_volume)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
"""
await db_pool.execute_command(
query,
data.symbol,
data.exchange,
data.timestamp,
bids_json,
asks_json,
data.sequence_id,
float(data.mid_price) if data.mid_price else None,
float(data.spread) if data.spread else None,
float(data.bid_volume),
float(data.ask_volume)
)
logger.debug(f"Stored order book: {data.symbol}@{data.exchange}")
return True
except Exception as e:
logger.error(f"Failed to store order book: {e}")
return False
async def store_trade(self, data: TradeEvent) -> bool:
"""Store trade event to database"""
try:
set_correlation_id()
query = """
INSERT INTO market_data.trade_events
(symbol, exchange, timestamp, price, size, side, trade_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
"""
await db_pool.execute_command(
query,
data.symbol,
data.exchange,
data.timestamp,
float(data.price),
float(data.size),
data.side,
data.trade_id
)
logger.debug(f"Stored trade: {data.symbol}@{data.exchange} - {data.trade_id}")
return True
except Exception as e:
logger.error(f"Failed to store trade: {e}")
return False
async def store_heatmap(self, data: HeatmapData) -> bool:
"""Store heatmap data to database"""
try:
set_correlation_id()
# Store each heatmap point
for point in data.data:
query = """
INSERT INTO market_data.heatmap_data
(symbol, timestamp, bucket_size, price_bucket, volume, side, exchange_count, exchanges)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (timestamp, symbol, bucket_size, price_bucket, side)
DO UPDATE SET
volume = EXCLUDED.volume,
exchange_count = EXCLUDED.exchange_count,
exchanges = EXCLUDED.exchanges
"""
await db_pool.execute_command(
query,
data.symbol,
data.timestamp,
float(data.bucket_size),
float(point.price),
float(point.volume),
point.side,
1, # exchange_count - will be updated by aggregation
json.dumps([]) # exchanges - will be updated by aggregation
)
logger.debug(f"Stored heatmap: {data.symbol} with {len(data.data)} points")
return True
except Exception as e:
logger.error(f"Failed to store heatmap: {e}")
return False
async def store_metrics(self, data: SystemMetrics) -> bool:
"""Store system metrics to database"""
try:
set_correlation_id()
# Store multiple metrics
metrics = [
('cpu_usage', data.cpu_usage),
('memory_usage', data.memory_usage),
('disk_usage', data.disk_usage),
('database_connections', data.database_connections),
('redis_connections', data.redis_connections),
('active_websockets', data.active_websockets),
('messages_per_second', data.messages_per_second),
('processing_latency', data.processing_latency)
]
query = """
INSERT INTO market_data.system_metrics
(metric_name, timestamp, value, labels)
VALUES ($1, $2, $3, $4)
"""
for metric_name, value in metrics:
await db_pool.execute_command(
query,
metric_name,
data.timestamp,
float(value),
json.dumps(data.network_io)
)
logger.debug("Stored system metrics")
return True
except Exception as e:
logger.error(f"Failed to store metrics: {e}")
return False
async def get_historical_orderbooks(self, symbol: str, exchange: str,
start: datetime, end: datetime,
limit: Optional[int] = None) -> List[OrderBookSnapshot]:
"""Retrieve historical order book data"""
try:
query = """
SELECT symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread
FROM market_data.order_book_snapshots
WHERE symbol = $1 AND exchange = $2 AND timestamp >= $3 AND timestamp <= $4
ORDER BY timestamp DESC
"""
if limit:
query += f" LIMIT {limit}"
rows = await db_pool.execute_query(query, symbol, exchange, start, end)
orderbooks = []
for row in rows:
# Parse JSON bid/ask data
bids_data = json.loads(row['bids'])
asks_data = json.loads(row['asks'])
bids = [PriceLevel(price=b['price'], size=b['size'], count=b.get('count'))
for b in bids_data]
asks = [PriceLevel(price=a['price'], size=a['size'], count=a.get('count'))
for a in asks_data]
orderbook = OrderBookSnapshot(
symbol=row['symbol'],
exchange=row['exchange'],
timestamp=row['timestamp'],
bids=bids,
asks=asks,
sequence_id=row['sequence_id']
)
orderbooks.append(orderbook)
logger.debug(f"Retrieved {len(orderbooks)} historical order books")
return orderbooks
except Exception as e:
logger.error(f"Failed to get historical order books: {e}")
return []
async def get_historical_trades(self, symbol: str, exchange: str,
start: datetime, end: datetime,
limit: Optional[int] = None) -> List[TradeEvent]:
"""Retrieve historical trade data"""
try:
query = """
SELECT symbol, exchange, timestamp, price, size, side, trade_id
FROM market_data.trade_events
WHERE symbol = $1 AND exchange = $2 AND timestamp >= $3 AND timestamp <= $4
ORDER BY timestamp DESC
"""
if limit:
query += f" LIMIT {limit}"
rows = await db_pool.execute_query(query, symbol, exchange, start, end)
trades = []
for row in rows:
trade = TradeEvent(
symbol=row['symbol'],
exchange=row['exchange'],
timestamp=row['timestamp'],
price=float(row['price']),
size=float(row['size']),
side=row['side'],
trade_id=row['trade_id']
)
trades.append(trade)
logger.debug(f"Retrieved {len(trades)} historical trades")
return trades
except Exception as e:
logger.error(f"Failed to get historical trades: {e}")
return []
async def get_latest_orderbook(self, symbol: str, exchange: str) -> Optional[OrderBookSnapshot]:
"""Get latest order book snapshot"""
try:
query = """
SELECT symbol, exchange, timestamp, bids, asks, sequence_id
FROM market_data.order_book_snapshots
WHERE symbol = $1 AND exchange = $2
ORDER BY timestamp DESC
LIMIT 1
"""
rows = await db_pool.execute_query(query, symbol, exchange)
if not rows:
return None
row = rows[0]
bids_data = json.loads(row['bids'])
asks_data = json.loads(row['asks'])
bids = [PriceLevel(price=b['price'], size=b['size'], count=b.get('count'))
for b in bids_data]
asks = [PriceLevel(price=a['price'], size=a['size'], count=a.get('count'))
for a in asks_data]
return OrderBookSnapshot(
symbol=row['symbol'],
exchange=row['exchange'],
timestamp=row['timestamp'],
bids=bids,
asks=asks,
sequence_id=row['sequence_id']
)
except Exception as e:
logger.error(f"Failed to get latest order book: {e}")
return None
async def get_latest_heatmap(self, symbol: str, bucket_size: float) -> Optional[HeatmapData]:
"""Get latest heatmap data"""
try:
query = """
SELECT price_bucket, volume, side, timestamp
FROM market_data.heatmap_data
WHERE symbol = $1 AND bucket_size = $2
AND timestamp = (
SELECT MAX(timestamp)
FROM market_data.heatmap_data
WHERE symbol = $1 AND bucket_size = $2
)
ORDER BY price_bucket
"""
rows = await db_pool.execute_query(query, symbol, bucket_size)
if not rows:
return None
from ..models.core import HeatmapPoint
heatmap = HeatmapData(
symbol=symbol,
timestamp=rows[0]['timestamp'],
bucket_size=bucket_size
)
# Calculate max volume for intensity
max_volume = max(float(row['volume']) for row in rows)
for row in rows:
volume = float(row['volume'])
intensity = volume / max_volume if max_volume > 0 else 0.0
point = HeatmapPoint(
price=float(row['price_bucket']),
volume=volume,
intensity=intensity,
side=row['side']
)
heatmap.data.append(point)
return heatmap
except Exception as e:
logger.error(f"Failed to get latest heatmap: {e}")
return None
async def get_ohlcv_data(self, symbol: str, exchange: str, timeframe: str,
start: datetime, end: datetime) -> List[Dict[str, Any]]:
"""Get OHLCV candlestick data"""
try:
query = """
SELECT timestamp, open_price, high_price, low_price, close_price, volume, trade_count, vwap
FROM market_data.ohlcv_data
WHERE symbol = $1 AND exchange = $2 AND timeframe = $3
AND timestamp >= $4 AND timestamp <= $5
ORDER BY timestamp
"""
rows = await db_pool.execute_query(query, symbol, exchange, timeframe, start, end)
ohlcv_data = []
for row in rows:
ohlcv_data.append({
'timestamp': row['timestamp'],
'open': float(row['open_price']),
'high': float(row['high_price']),
'low': float(row['low_price']),
'close': float(row['close_price']),
'volume': float(row['volume']),
'trade_count': row['trade_count'],
'vwap': float(row['vwap']) if row['vwap'] else None
})
logger.debug(f"Retrieved {len(ohlcv_data)} OHLCV records")
return ohlcv_data
except Exception as e:
logger.error(f"Failed to get OHLCV data: {e}")
return []
async def batch_store_orderbooks(self, data: List[OrderBookSnapshot]) -> int:
"""Store multiple order book snapshots in batch"""
if not data:
return 0
try:
set_correlation_id()
# Prepare batch data
batch_data = []
for orderbook in data:
bids_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in orderbook.bids
])
asks_json = json.dumps([
{"price": float(level.price), "size": float(level.size), "count": level.count}
for level in orderbook.asks
])
batch_data.append((
orderbook.symbol,
orderbook.exchange,
orderbook.timestamp,
bids_json,
asks_json,
orderbook.sequence_id,
float(orderbook.mid_price) if orderbook.mid_price else None,
float(orderbook.spread) if orderbook.spread else None,
float(orderbook.bid_volume),
float(orderbook.ask_volume)
))
query = """
INSERT INTO market_data.order_book_snapshots
(symbol, exchange, timestamp, bids, asks, sequence_id, mid_price, spread, bid_volume, ask_volume)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
"""
await db_pool.execute_many(query, batch_data)
logger.debug(f"Batch stored {len(data)} order books")
return len(data)
except Exception as e:
logger.error(f"Failed to batch store order books: {e}")
return 0
async def batch_store_trades(self, data: List[TradeEvent]) -> int:
"""Store multiple trade events in batch"""
if not data:
return 0
try:
set_correlation_id()
# Prepare batch data
batch_data = [
(trade.symbol, trade.exchange, trade.timestamp, float(trade.price),
float(trade.size), trade.side, trade.trade_id)
for trade in data
]
query = """
INSERT INTO market_data.trade_events
(symbol, exchange, timestamp, price, size, side, trade_id)
VALUES ($1, $2, $3, $4, $5, $6, $7)
"""
await db_pool.execute_many(query, batch_data)
logger.debug(f"Batch stored {len(data)} trades")
return len(data)
except Exception as e:
logger.error(f"Failed to batch store trades: {e}")
return 0
async def cleanup_old_data(self, retention_days: int) -> int:
"""Clean up old data based on retention policy"""
try:
cutoff_time = get_current_timestamp().replace(
day=get_current_timestamp().day - retention_days
)
tables = [
'order_book_snapshots',
'trade_events',
'heatmap_data',
'exchange_status',
'system_metrics'
]
total_deleted = 0
for table in tables:
query = f"""
DELETE FROM market_data.{table}
WHERE timestamp < $1
"""
result = await db_pool.execute_command(query, cutoff_time)
# Extract number from result like "DELETE 1234"
deleted = int(result.split()[-1]) if result.split()[-1].isdigit() else 0
total_deleted += deleted
logger.debug(f"Cleaned up {deleted} records from {table}")
logger.info(f"Cleaned up {total_deleted} total records older than {retention_days} days")
return total_deleted
except Exception as e:
logger.error(f"Failed to cleanup old data: {e}")
return 0
async def get_storage_stats(self) -> Dict[str, Any]:
"""Get storage statistics"""
try:
stats = {}
# Table sizes
size_query = """
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
pg_total_relation_size(schemaname||'.'||tablename) as size_bytes
FROM pg_tables
WHERE schemaname = 'market_data'
ORDER BY size_bytes DESC
"""
size_rows = await db_pool.execute_query(size_query)
stats['table_sizes'] = [
{
'table': row['tablename'],
'size': row['size'],
'size_bytes': row['size_bytes']
}
for row in size_rows
]
# Record counts
tables = ['order_book_snapshots', 'trade_events', 'heatmap_data',
'ohlcv_data', 'exchange_status', 'system_metrics']
record_counts = {}
for table in tables:
count_query = f"SELECT COUNT(*) as count FROM market_data.{table}"
count_rows = await db_pool.execute_query(count_query)
record_counts[table] = count_rows[0]['count'] if count_rows else 0
stats['record_counts'] = record_counts
# Connection pool stats
stats['connection_pool'] = await db_pool.get_pool_stats()
return stats
except Exception as e:
logger.error(f"Failed to get storage stats: {e}")
return {}
async def health_check(self) -> bool:
"""Check storage system health"""
try:
# Check database connection
if not await db_pool.health_check():
return False
# Check if tables exist
query = """
SELECT COUNT(*) as count
FROM information_schema.tables
WHERE table_schema = 'market_data'
"""
rows = await db_pool.execute_query(query)
table_count = rows[0]['count'] if rows else 0
if table_count < 6: # We expect 6 main tables
logger.warning(f"Expected 6 tables, found {table_count}")
return False
logger.debug("Storage health check passed")
return True
except Exception as e:
logger.error(f"Storage health check failed: {e}")
return False

274
COBY/test_integration.py Normal file
View File

@ -0,0 +1,274 @@
#!/usr/bin/env python3
"""
Integration test script for COBY system components.
Run this to test the TimescaleDB integration and basic functionality.
"""
import asyncio
import sys
from datetime import datetime, timezone
from pathlib import Path
# Add COBY to path
sys.path.insert(0, str(Path(__file__).parent))
from config import config
from storage.timescale_manager import TimescaleManager
from models.core import OrderBookSnapshot, TradeEvent, PriceLevel
from utils.logging import setup_logging, get_logger
# Setup logging
setup_logging(level='INFO', console_output=True)
logger = get_logger(__name__)
async def test_database_connection():
"""Test basic database connectivity"""
logger.info("🔌 Testing database connection...")
try:
manager = TimescaleManager()
await manager.initialize()
# Test health check
is_healthy = await manager.health_check()
if is_healthy:
logger.info("✅ Database connection: HEALTHY")
else:
logger.error("❌ Database connection: UNHEALTHY")
return False
# Test storage stats
stats = await manager.get_storage_stats()
logger.info(f"📊 Found {len(stats.get('table_sizes', []))} tables")
for table_info in stats.get('table_sizes', []):
logger.info(f" 📋 {table_info['table']}: {table_info['size']}")
await manager.close()
return True
except Exception as e:
logger.error(f"❌ Database test failed: {e}")
return False
async def test_data_storage():
"""Test storing and retrieving data"""
logger.info("💾 Testing data storage operations...")
try:
manager = TimescaleManager()
await manager.initialize()
# Create test order book
test_orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="test_exchange",
timestamp=datetime.now(timezone.utc),
bids=[
PriceLevel(price=50000.0, size=1.5, count=3),
PriceLevel(price=49999.0, size=2.0, count=5)
],
asks=[
PriceLevel(price=50001.0, size=1.0, count=2),
PriceLevel(price=50002.0, size=1.5, count=4)
],
sequence_id=12345
)
# Test storing order book
result = await manager.store_orderbook(test_orderbook)
if result:
logger.info("✅ Order book storage: SUCCESS")
else:
logger.error("❌ Order book storage: FAILED")
return False
# Test retrieving order book
retrieved = await manager.get_latest_orderbook("BTCUSDT", "test_exchange")
if retrieved:
logger.info(f"✅ Order book retrieval: SUCCESS (mid_price: {retrieved.mid_price})")
else:
logger.error("❌ Order book retrieval: FAILED")
return False
# Create test trade
test_trade = TradeEvent(
symbol="BTCUSDT",
exchange="test_exchange",
timestamp=datetime.now(timezone.utc),
price=50000.5,
size=0.1,
side="buy",
trade_id="test_trade_123"
)
# Test storing trade
result = await manager.store_trade(test_trade)
if result:
logger.info("✅ Trade storage: SUCCESS")
else:
logger.error("❌ Trade storage: FAILED")
return False
await manager.close()
return True
except Exception as e:
logger.error(f"❌ Data storage test failed: {e}")
return False
async def test_batch_operations():
"""Test batch storage operations"""
logger.info("📦 Testing batch operations...")
try:
manager = TimescaleManager()
await manager.initialize()
# Create batch of order books
orderbooks = []
for i in range(5):
orderbook = OrderBookSnapshot(
symbol="ETHUSDT",
exchange="test_exchange",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=3000.0 + i, size=1.0)],
asks=[PriceLevel(price=3001.0 + i, size=1.0)],
sequence_id=i
)
orderbooks.append(orderbook)
# Test batch storage
result = await manager.batch_store_orderbooks(orderbooks)
if result == 5:
logger.info(f"✅ Batch order book storage: SUCCESS ({result} records)")
else:
logger.error(f"❌ Batch order book storage: PARTIAL ({result}/5 records)")
return False
# Create batch of trades
trades = []
for i in range(10):
trade = TradeEvent(
symbol="ETHUSDT",
exchange="test_exchange",
timestamp=datetime.now(timezone.utc),
price=3000.0 + (i * 0.1),
size=0.05,
side="buy" if i % 2 == 0 else "sell",
trade_id=f"batch_trade_{i}"
)
trades.append(trade)
# Test batch trade storage
result = await manager.batch_store_trades(trades)
if result == 10:
logger.info(f"✅ Batch trade storage: SUCCESS ({result} records)")
else:
logger.error(f"❌ Batch trade storage: PARTIAL ({result}/10 records)")
return False
await manager.close()
return True
except Exception as e:
logger.error(f"❌ Batch operations test failed: {e}")
return False
async def test_configuration():
"""Test configuration system"""
logger.info("⚙️ Testing configuration system...")
try:
# Test database configuration
db_url = config.get_database_url()
logger.info(f"✅ Database URL: {db_url.replace(config.database.password, '***')}")
# Test Redis configuration
redis_url = config.get_redis_url()
logger.info(f"✅ Redis URL: {redis_url.replace(config.redis.password, '***')}")
# Test bucket sizes
btc_bucket = config.get_bucket_size('BTCUSDT')
eth_bucket = config.get_bucket_size('ETHUSDT')
logger.info(f"✅ Bucket sizes: BTC=${btc_bucket}, ETH=${eth_bucket}")
# Test configuration dict
config_dict = config.to_dict()
logger.info(f"✅ Configuration loaded: {len(config_dict)} sections")
return True
except Exception as e:
logger.error(f"❌ Configuration test failed: {e}")
return False
async def run_all_tests():
"""Run all integration tests"""
logger.info("🚀 Starting COBY Integration Tests")
logger.info("=" * 50)
tests = [
("Configuration", test_configuration),
("Database Connection", test_database_connection),
("Data Storage", test_data_storage),
("Batch Operations", test_batch_operations)
]
results = []
for test_name, test_func in tests:
logger.info(f"\n🧪 Running {test_name} test...")
try:
result = await test_func()
results.append((test_name, result))
if result:
logger.info(f"{test_name}: PASSED")
else:
logger.error(f"{test_name}: FAILED")
except Exception as e:
logger.error(f"{test_name}: ERROR - {e}")
results.append((test_name, False))
# Summary
logger.info("\n" + "=" * 50)
logger.info("📋 TEST SUMMARY")
logger.info("=" * 50)
passed = sum(1 for _, result in results if result)
total = len(results)
for test_name, result in results:
status = "✅ PASSED" if result else "❌ FAILED"
logger.info(f"{test_name:20} {status}")
logger.info(f"\nOverall: {passed}/{total} tests passed")
if passed == total:
logger.info("🎉 All tests passed! System is ready.")
return True
else:
logger.error("⚠️ Some tests failed. Check configuration and database connection.")
return False
if __name__ == "__main__":
print("COBY Integration Test Suite")
print("=" * 30)
# Run tests
success = asyncio.run(run_all_tests())
if success:
print("\n🎉 Integration tests completed successfully!")
print("The system is ready for the next development phase.")
sys.exit(0)
else:
print("\n❌ Integration tests failed!")
print("Please check the logs and fix any issues before proceeding.")
sys.exit(1)

3
COBY/tests/__init__.py Normal file
View File

@ -0,0 +1,3 @@
"""
Test suite for the COBY system.
"""

View File

@ -0,0 +1,341 @@
"""
Tests for Binance exchange connector.
"""
import pytest
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from datetime import datetime, timezone
from ..connectors.binance_connector import BinanceConnector
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
@pytest.fixture
def binance_connector():
"""Create Binance connector for testing"""
return BinanceConnector()
@pytest.fixture
def sample_binance_orderbook_data():
"""Sample Binance order book data"""
return {
"lastUpdateId": 1027024,
"bids": [
["4.00000000", "431.00000000"],
["3.99000000", "9.00000000"]
],
"asks": [
["4.00000200", "12.00000000"],
["4.01000000", "18.00000000"]
]
}
@pytest.fixture
def sample_binance_depth_update():
"""Sample Binance depth update message"""
return {
"e": "depthUpdate",
"E": 1672515782136,
"s": "BTCUSDT",
"U": 157,
"u": 160,
"b": [
["50000.00", "0.25"],
["49999.00", "0.50"]
],
"a": [
["50001.00", "0.30"],
["50002.00", "0.40"]
]
}
@pytest.fixture
def sample_binance_trade_update():
"""Sample Binance trade update message"""
return {
"e": "trade",
"E": 1672515782136,
"s": "BTCUSDT",
"t": 12345,
"p": "50000.50",
"q": "0.10",
"b": 88,
"a": 50,
"T": 1672515782134,
"m": False,
"M": True
}
class TestBinanceConnector:
"""Test cases for BinanceConnector"""
def test_initialization(self, binance_connector):
"""Test connector initialization"""
assert binance_connector.exchange_name == "binance"
assert binance_connector.websocket_url == BinanceConnector.WEBSOCKET_URL
assert len(binance_connector.message_handlers) >= 3
assert binance_connector.stream_id == 1
assert binance_connector.active_streams == []
def test_normalize_symbol(self, binance_connector):
"""Test symbol normalization"""
# Test standard format
assert binance_connector.normalize_symbol("BTCUSDT") == "BTCUSDT"
# Test with separators
assert binance_connector.normalize_symbol("BTC-USDT") == "BTCUSDT"
assert binance_connector.normalize_symbol("BTC/USDT") == "BTCUSDT"
# Test lowercase
assert binance_connector.normalize_symbol("btcusdt") == "BTCUSDT"
# Test invalid symbol
with pytest.raises(Exception):
binance_connector.normalize_symbol("")
def test_get_message_type(self, binance_connector):
"""Test message type detection"""
# Test depth update
depth_msg = {"e": "depthUpdate", "s": "BTCUSDT"}
assert binance_connector._get_message_type(depth_msg) == "depthUpdate"
# Test trade update
trade_msg = {"e": "trade", "s": "BTCUSDT"}
assert binance_connector._get_message_type(trade_msg) == "trade"
# Test error message
error_msg = {"error": {"code": -1121, "msg": "Invalid symbol"}}
assert binance_connector._get_message_type(error_msg) == "error"
# Test unknown message
unknown_msg = {"data": "something"}
assert binance_connector._get_message_type(unknown_msg) == "unknown"
def test_parse_orderbook_snapshot(self, binance_connector, sample_binance_orderbook_data):
"""Test order book snapshot parsing"""
orderbook = binance_connector._parse_orderbook_snapshot(
sample_binance_orderbook_data,
"BTCUSDT"
)
assert isinstance(orderbook, OrderBookSnapshot)
assert orderbook.symbol == "BTCUSDT"
assert orderbook.exchange == "binance"
assert len(orderbook.bids) == 2
assert len(orderbook.asks) == 2
assert orderbook.sequence_id == 1027024
# Check bid data
assert orderbook.bids[0].price == 4.0
assert orderbook.bids[0].size == 431.0
# Check ask data
assert orderbook.asks[0].price == 4.000002
assert orderbook.asks[0].size == 12.0
@pytest.mark.asyncio
async def test_handle_orderbook_update(self, binance_connector, sample_binance_depth_update):
"""Test order book update handling"""
# Mock callback
callback_called = False
received_data = None
def mock_callback(data):
nonlocal callback_called, received_data
callback_called = True
received_data = data
binance_connector.add_data_callback(mock_callback)
# Handle update
await binance_connector._handle_orderbook_update(sample_binance_depth_update)
# Verify callback was called
assert callback_called
assert isinstance(received_data, OrderBookSnapshot)
assert received_data.symbol == "BTCUSDT"
assert received_data.exchange == "binance"
assert len(received_data.bids) == 2
assert len(received_data.asks) == 2
@pytest.mark.asyncio
async def test_handle_trade_update(self, binance_connector, sample_binance_trade_update):
"""Test trade update handling"""
# Mock callback
callback_called = False
received_data = None
def mock_callback(data):
nonlocal callback_called, received_data
callback_called = True
received_data = data
binance_connector.add_data_callback(mock_callback)
# Handle update
await binance_connector._handle_trade_update(sample_binance_trade_update)
# Verify callback was called
assert callback_called
assert isinstance(received_data, TradeEvent)
assert received_data.symbol == "BTCUSDT"
assert received_data.exchange == "binance"
assert received_data.price == 50000.50
assert received_data.size == 0.10
assert received_data.side == "buy" # m=False means buyer is not maker
assert received_data.trade_id == "12345"
@pytest.mark.asyncio
async def test_subscribe_orderbook(self, binance_connector):
"""Test order book subscription"""
# Mock WebSocket send
binance_connector._send_message = AsyncMock(return_value=True)
# Subscribe
await binance_connector.subscribe_orderbook("BTCUSDT")
# Verify subscription was sent
binance_connector._send_message.assert_called_once()
call_args = binance_connector._send_message.call_args[0][0]
assert call_args["method"] == "SUBSCRIBE"
assert "btcusdt@depth@100ms" in call_args["params"]
assert call_args["id"] == 1
# Verify tracking
assert "BTCUSDT" in binance_connector.subscriptions
assert "orderbook" in binance_connector.subscriptions["BTCUSDT"]
assert "btcusdt@depth@100ms" in binance_connector.active_streams
assert binance_connector.stream_id == 2
@pytest.mark.asyncio
async def test_subscribe_trades(self, binance_connector):
"""Test trade subscription"""
# Mock WebSocket send
binance_connector._send_message = AsyncMock(return_value=True)
# Subscribe
await binance_connector.subscribe_trades("ETHUSDT")
# Verify subscription was sent
binance_connector._send_message.assert_called_once()
call_args = binance_connector._send_message.call_args[0][0]
assert call_args["method"] == "SUBSCRIBE"
assert "ethusdt@trade" in call_args["params"]
assert call_args["id"] == 1
# Verify tracking
assert "ETHUSDT" in binance_connector.subscriptions
assert "trades" in binance_connector.subscriptions["ETHUSDT"]
assert "ethusdt@trade" in binance_connector.active_streams
@pytest.mark.asyncio
async def test_unsubscribe_orderbook(self, binance_connector):
"""Test order book unsubscription"""
# Setup initial subscription
binance_connector.subscriptions["BTCUSDT"] = ["orderbook"]
binance_connector.active_streams.append("btcusdt@depth@100ms")
# Mock WebSocket send
binance_connector._send_message = AsyncMock(return_value=True)
# Unsubscribe
await binance_connector.unsubscribe_orderbook("BTCUSDT")
# Verify unsubscription was sent
binance_connector._send_message.assert_called_once()
call_args = binance_connector._send_message.call_args[0][0]
assert call_args["method"] == "UNSUBSCRIBE"
assert "btcusdt@depth@100ms" in call_args["params"]
# Verify tracking removal
assert "BTCUSDT" not in binance_connector.subscriptions
assert "btcusdt@depth@100ms" not in binance_connector.active_streams
@pytest.mark.asyncio
@patch('aiohttp.ClientSession.get')
async def test_get_symbols(self, mock_get, binance_connector):
"""Test getting available symbols"""
# Mock API response
mock_response = AsyncMock()
mock_response.status = 200
mock_response.json = AsyncMock(return_value={
"symbols": [
{"symbol": "BTCUSDT", "status": "TRADING"},
{"symbol": "ETHUSDT", "status": "TRADING"},
{"symbol": "ADAUSDT", "status": "BREAK"} # Should be filtered out
]
})
mock_get.return_value.__aenter__.return_value = mock_response
# Get symbols
symbols = await binance_connector.get_symbols()
# Verify results
assert len(symbols) == 2
assert "BTCUSDT" in symbols
assert "ETHUSDT" in symbols
assert "ADAUSDT" not in symbols # Filtered out due to status
@pytest.mark.asyncio
@patch('aiohttp.ClientSession.get')
async def test_get_orderbook_snapshot(self, mock_get, binance_connector, sample_binance_orderbook_data):
"""Test getting order book snapshot"""
# Mock API response
mock_response = AsyncMock()
mock_response.status = 200
mock_response.json = AsyncMock(return_value=sample_binance_orderbook_data)
mock_get.return_value.__aenter__.return_value = mock_response
# Get order book snapshot
orderbook = await binance_connector.get_orderbook_snapshot("BTCUSDT", depth=20)
# Verify results
assert isinstance(orderbook, OrderBookSnapshot)
assert orderbook.symbol == "BTCUSDT"
assert orderbook.exchange == "binance"
assert len(orderbook.bids) == 2
assert len(orderbook.asks) == 2
def test_get_binance_stats(self, binance_connector):
"""Test getting Binance-specific statistics"""
# Add some test data
binance_connector.active_streams = ["btcusdt@depth@100ms", "ethusdt@trade"]
binance_connector.stream_id = 5
stats = binance_connector.get_binance_stats()
# Verify Binance-specific stats
assert stats['active_streams'] == 2
assert len(stats['stream_list']) == 2
assert stats['next_stream_id'] == 5
# Verify base stats are included
assert 'exchange' in stats
assert 'connection_status' in stats
assert 'message_count' in stats
if __name__ == "__main__":
# Run a simple test
async def simple_test():
connector = BinanceConnector()
# Test symbol normalization
normalized = connector.normalize_symbol("BTC-USDT")
print(f"Symbol normalization: BTC-USDT -> {normalized}")
# Test message type detection
msg_type = connector._get_message_type({"e": "depthUpdate"})
print(f"Message type detection: {msg_type}")
print("Simple Binance connector test completed")
asyncio.run(simple_test())

View File

@ -0,0 +1,304 @@
"""
Tests for data processing components.
"""
import pytest
from datetime import datetime, timezone
from ..processing.data_processor import StandardDataProcessor
from ..processing.quality_checker import DataQualityChecker
from ..processing.anomaly_detector import AnomalyDetector
from ..processing.metrics_calculator import MetricsCalculator
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
@pytest.fixture
def data_processor():
"""Create data processor for testing"""
return StandardDataProcessor()
@pytest.fixture
def quality_checker():
"""Create quality checker for testing"""
return DataQualityChecker()
@pytest.fixture
def anomaly_detector():
"""Create anomaly detector for testing"""
return AnomalyDetector()
@pytest.fixture
def metrics_calculator():
"""Create metrics calculator for testing"""
return MetricsCalculator()
@pytest.fixture
def sample_orderbook():
"""Create sample order book for testing"""
return OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[
PriceLevel(price=50000.0, size=1.5),
PriceLevel(price=49999.0, size=2.0),
PriceLevel(price=49998.0, size=1.0)
],
asks=[
PriceLevel(price=50001.0, size=1.0),
PriceLevel(price=50002.0, size=1.5),
PriceLevel(price=50003.0, size=2.0)
]
)
@pytest.fixture
def sample_trade():
"""Create sample trade for testing"""
return TradeEvent(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
price=50000.5,
size=0.1,
side="buy",
trade_id="test_trade_123"
)
class TestDataQualityChecker:
"""Test cases for DataQualityChecker"""
def test_orderbook_quality_check(self, quality_checker, sample_orderbook):
"""Test order book quality checking"""
quality_score, issues = quality_checker.check_orderbook_quality(sample_orderbook)
assert 0.0 <= quality_score <= 1.0
assert isinstance(issues, list)
# Good order book should have high quality score
assert quality_score > 0.8
def test_trade_quality_check(self, quality_checker, sample_trade):
"""Test trade quality checking"""
quality_score, issues = quality_checker.check_trade_quality(sample_trade)
assert 0.0 <= quality_score <= 1.0
assert isinstance(issues, list)
# Good trade should have high quality score
assert quality_score > 0.8
def test_invalid_orderbook_detection(self, quality_checker):
"""Test detection of invalid order book"""
# Create invalid order book with crossed spread
invalid_orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50002.0, size=1.0)], # Bid higher than ask
asks=[PriceLevel(price=50001.0, size=1.0)] # Ask lower than bid
)
quality_score, issues = quality_checker.check_orderbook_quality(invalid_orderbook)
assert quality_score < 0.8
assert any("crossed book" in issue.lower() for issue in issues)
class TestAnomalyDetector:
"""Test cases for AnomalyDetector"""
def test_orderbook_anomaly_detection(self, anomaly_detector, sample_orderbook):
"""Test order book anomaly detection"""
# First few order books should not trigger anomalies
for _ in range(5):
anomalies = anomaly_detector.detect_orderbook_anomalies(sample_orderbook)
assert isinstance(anomalies, list)
def test_trade_anomaly_detection(self, anomaly_detector, sample_trade):
"""Test trade anomaly detection"""
# First few trades should not trigger anomalies
for _ in range(5):
anomalies = anomaly_detector.detect_trade_anomalies(sample_trade)
assert isinstance(anomalies, list)
def test_price_spike_detection(self, anomaly_detector):
"""Test price spike detection"""
# Create normal order books
for i in range(20):
normal_orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50000.0 + i, size=1.0)],
asks=[PriceLevel(price=50001.0 + i, size=1.0)]
)
anomaly_detector.detect_orderbook_anomalies(normal_orderbook)
# Create order book with price spike
spike_orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=60000.0, size=1.0)], # 20% spike
asks=[PriceLevel(price=60001.0, size=1.0)]
)
anomalies = anomaly_detector.detect_orderbook_anomalies(spike_orderbook)
assert len(anomalies) > 0
assert any("spike" in anomaly.lower() for anomaly in anomalies)
class TestMetricsCalculator:
"""Test cases for MetricsCalculator"""
def test_orderbook_metrics_calculation(self, metrics_calculator, sample_orderbook):
"""Test order book metrics calculation"""
metrics = metrics_calculator.calculate_orderbook_metrics(sample_orderbook)
assert metrics.symbol == "BTCUSDT"
assert metrics.exchange == "binance"
assert metrics.mid_price == 50000.5 # (50000 + 50001) / 2
assert metrics.spread == 1.0 # 50001 - 50000
assert metrics.spread_percentage > 0
assert metrics.bid_volume == 4.5 # 1.5 + 2.0 + 1.0
assert metrics.ask_volume == 4.5 # 1.0 + 1.5 + 2.0
assert metrics.volume_imbalance == 0.0 # Equal volumes
def test_imbalance_metrics_calculation(self, metrics_calculator, sample_orderbook):
"""Test imbalance metrics calculation"""
imbalance = metrics_calculator.calculate_imbalance_metrics(sample_orderbook)
assert imbalance.symbol == "BTCUSDT"
assert -1.0 <= imbalance.volume_imbalance <= 1.0
assert -1.0 <= imbalance.price_imbalance <= 1.0
assert -1.0 <= imbalance.depth_imbalance <= 1.0
assert -1.0 <= imbalance.momentum_score <= 1.0
def test_liquidity_score_calculation(self, metrics_calculator, sample_orderbook):
"""Test liquidity score calculation"""
liquidity_score = metrics_calculator.calculate_liquidity_score(sample_orderbook)
assert 0.0 <= liquidity_score <= 1.0
assert liquidity_score > 0.5 # Good order book should have decent liquidity
class TestStandardDataProcessor:
"""Test cases for StandardDataProcessor"""
def test_data_validation(self, data_processor, sample_orderbook, sample_trade):
"""Test data validation"""
# Valid data should pass validation
assert data_processor.validate_data(sample_orderbook) is True
assert data_processor.validate_data(sample_trade) is True
def test_metrics_calculation(self, data_processor, sample_orderbook):
"""Test metrics calculation through processor"""
metrics = data_processor.calculate_metrics(sample_orderbook)
assert metrics.symbol == "BTCUSDT"
assert metrics.mid_price > 0
assert metrics.spread > 0
def test_anomaly_detection(self, data_processor, sample_orderbook, sample_trade):
"""Test anomaly detection through processor"""
orderbook_anomalies = data_processor.detect_anomalies(sample_orderbook)
trade_anomalies = data_processor.detect_anomalies(sample_trade)
assert isinstance(orderbook_anomalies, list)
assert isinstance(trade_anomalies, list)
def test_data_filtering(self, data_processor, sample_orderbook, sample_trade):
"""Test data filtering"""
# Test symbol filter
criteria = {'symbols': ['BTCUSDT']}
assert data_processor.filter_data(sample_orderbook, criteria) is True
assert data_processor.filter_data(sample_trade, criteria) is True
criteria = {'symbols': ['ETHUSDT']}
assert data_processor.filter_data(sample_orderbook, criteria) is False
assert data_processor.filter_data(sample_trade, criteria) is False
# Test price range filter
criteria = {'price_range': (40000, 60000)}
assert data_processor.filter_data(sample_orderbook, criteria) is True
assert data_processor.filter_data(sample_trade, criteria) is True
criteria = {'price_range': (60000, 70000)}
assert data_processor.filter_data(sample_orderbook, criteria) is False
assert data_processor.filter_data(sample_trade, criteria) is False
def test_data_enrichment(self, data_processor, sample_orderbook, sample_trade):
"""Test data enrichment"""
orderbook_enriched = data_processor.enrich_data(sample_orderbook)
trade_enriched = data_processor.enrich_data(sample_trade)
# Check enriched data structure
assert 'original_data' in orderbook_enriched
assert 'quality_score' in orderbook_enriched
assert 'anomalies' in orderbook_enriched
assert 'processing_timestamp' in orderbook_enriched
assert 'original_data' in trade_enriched
assert 'quality_score' in trade_enriched
assert 'anomalies' in trade_enriched
assert 'trade_value' in trade_enriched
def test_quality_score_calculation(self, data_processor, sample_orderbook, sample_trade):
"""Test quality score calculation"""
orderbook_score = data_processor.get_data_quality_score(sample_orderbook)
trade_score = data_processor.get_data_quality_score(sample_trade)
assert 0.0 <= orderbook_score <= 1.0
assert 0.0 <= trade_score <= 1.0
# Good data should have high quality scores
assert orderbook_score > 0.8
assert trade_score > 0.8
def test_processing_stats(self, data_processor, sample_orderbook, sample_trade):
"""Test processing statistics"""
# Process some data
data_processor.validate_data(sample_orderbook)
data_processor.validate_data(sample_trade)
stats = data_processor.get_processing_stats()
assert 'processed_orderbooks' in stats
assert 'processed_trades' in stats
assert 'quality_failures' in stats
assert 'anomalies_detected' in stats
assert stats['processed_orderbooks'] >= 1
assert stats['processed_trades'] >= 1
if __name__ == "__main__":
# Run simple tests
processor = StandardDataProcessor()
# Test with sample data
orderbook = OrderBookSnapshot(
symbol="BTCUSDT",
exchange="test",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=50000.0, size=1.0)],
asks=[PriceLevel(price=50001.0, size=1.0)]
)
# Test validation
is_valid = processor.validate_data(orderbook)
print(f"Order book validation: {'PASSED' if is_valid else 'FAILED'}")
# Test metrics
metrics = processor.calculate_metrics(orderbook)
print(f"Metrics calculation: mid_price={metrics.mid_price}, spread={metrics.spread}")
# Test quality score
quality_score = processor.get_data_quality_score(orderbook)
print(f"Quality score: {quality_score:.2f}")
print("Simple data processor test completed")

View File

@ -0,0 +1,347 @@
"""
Tests for Redis caching system.
"""
import pytest
import asyncio
from datetime import datetime, timezone
from ..caching.redis_manager import RedisManager
from ..caching.cache_keys import CacheKeys
from ..caching.data_serializer import DataSerializer
from ..models.core import OrderBookSnapshot, HeatmapData, PriceLevel, HeatmapPoint
@pytest.fixture
async def redis_manager():
"""Create and initialize Redis manager for testing"""
manager = RedisManager()
await manager.initialize()
yield manager
await manager.close()
@pytest.fixture
def cache_keys():
"""Create cache keys helper"""
return CacheKeys()
@pytest.fixture
def data_serializer():
"""Create data serializer"""
return DataSerializer()
@pytest.fixture
def sample_orderbook():
"""Create sample order book for testing"""
return OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[
PriceLevel(price=50000.0, size=1.5),
PriceLevel(price=49999.0, size=2.0)
],
asks=[
PriceLevel(price=50001.0, size=1.0),
PriceLevel(price=50002.0, size=1.5)
]
)
@pytest.fixture
def sample_heatmap():
"""Create sample heatmap for testing"""
heatmap = HeatmapData(
symbol="BTCUSDT",
timestamp=datetime.now(timezone.utc),
bucket_size=1.0
)
# Add some sample points
heatmap.data = [
HeatmapPoint(price=50000.0, volume=1.5, intensity=0.8, side='bid'),
HeatmapPoint(price=50001.0, volume=1.0, intensity=0.6, side='ask'),
HeatmapPoint(price=49999.0, volume=2.0, intensity=1.0, side='bid'),
HeatmapPoint(price=50002.0, volume=1.5, intensity=0.7, side='ask')
]
return heatmap
class TestCacheKeys:
"""Test cases for CacheKeys"""
def test_orderbook_key_generation(self, cache_keys):
"""Test order book key generation"""
key = cache_keys.orderbook_key("BTCUSDT", "binance")
assert key == "ob:binance:BTCUSDT"
def test_heatmap_key_generation(self, cache_keys):
"""Test heatmap key generation"""
# Exchange-specific heatmap
key1 = cache_keys.heatmap_key("BTCUSDT", 1.0, "binance")
assert key1 == "hm:binance:BTCUSDT:1.0"
# Consolidated heatmap
key2 = cache_keys.heatmap_key("BTCUSDT", 1.0)
assert key2 == "hm:consolidated:BTCUSDT:1.0"
def test_ttl_determination(self, cache_keys):
"""Test TTL determination for different key types"""
ob_key = cache_keys.orderbook_key("BTCUSDT", "binance")
hm_key = cache_keys.heatmap_key("BTCUSDT", 1.0)
assert cache_keys.get_ttl(ob_key) == cache_keys.ORDERBOOK_TTL
assert cache_keys.get_ttl(hm_key) == cache_keys.HEATMAP_TTL
def test_key_parsing(self, cache_keys):
"""Test cache key parsing"""
ob_key = cache_keys.orderbook_key("BTCUSDT", "binance")
parsed = cache_keys.parse_key(ob_key)
assert parsed['type'] == 'orderbook'
assert parsed['exchange'] == 'binance'
assert parsed['symbol'] == 'BTCUSDT'
class TestDataSerializer:
"""Test cases for DataSerializer"""
def test_simple_data_serialization(self, data_serializer):
"""Test serialization of simple data types"""
test_data = {
'string': 'test',
'number': 42,
'float': 3.14,
'boolean': True,
'list': [1, 2, 3],
'nested': {'key': 'value'}
}
# Serialize and deserialize
serialized = data_serializer.serialize(test_data)
deserialized = data_serializer.deserialize(serialized)
assert deserialized == test_data
def test_orderbook_serialization(self, data_serializer, sample_orderbook):
"""Test order book serialization"""
# Serialize and deserialize
serialized = data_serializer.serialize(sample_orderbook)
deserialized = data_serializer.deserialize(serialized)
assert isinstance(deserialized, OrderBookSnapshot)
assert deserialized.symbol == sample_orderbook.symbol
assert deserialized.exchange == sample_orderbook.exchange
assert len(deserialized.bids) == len(sample_orderbook.bids)
assert len(deserialized.asks) == len(sample_orderbook.asks)
def test_heatmap_serialization(self, data_serializer, sample_heatmap):
"""Test heatmap serialization"""
# Test specialized heatmap serialization
serialized = data_serializer.serialize_heatmap(sample_heatmap)
deserialized = data_serializer.deserialize_heatmap(serialized)
assert isinstance(deserialized, HeatmapData)
assert deserialized.symbol == sample_heatmap.symbol
assert deserialized.bucket_size == sample_heatmap.bucket_size
assert len(deserialized.data) == len(sample_heatmap.data)
# Check first point
original_point = sample_heatmap.data[0]
deserialized_point = deserialized.data[0]
assert deserialized_point.price == original_point.price
assert deserialized_point.volume == original_point.volume
assert deserialized_point.side == original_point.side
class TestRedisManager:
"""Test cases for RedisManager"""
@pytest.mark.asyncio
async def test_basic_set_get(self, redis_manager):
"""Test basic set and get operations"""
# Set a simple value
key = "test:basic"
value = {"test": "data", "number": 42}
success = await redis_manager.set(key, value, ttl=60)
assert success is True
# Get the value back
retrieved = await redis_manager.get(key)
assert retrieved == value
# Clean up
await redis_manager.delete(key)
@pytest.mark.asyncio
async def test_orderbook_caching(self, redis_manager, sample_orderbook):
"""Test order book caching"""
# Cache order book
success = await redis_manager.cache_orderbook(sample_orderbook)
assert success is True
# Retrieve order book
retrieved = await redis_manager.get_orderbook(
sample_orderbook.symbol,
sample_orderbook.exchange
)
assert retrieved is not None
assert isinstance(retrieved, OrderBookSnapshot)
assert retrieved.symbol == sample_orderbook.symbol
assert retrieved.exchange == sample_orderbook.exchange
@pytest.mark.asyncio
async def test_heatmap_caching(self, redis_manager, sample_heatmap):
"""Test heatmap caching"""
# Cache heatmap
success = await redis_manager.set_heatmap(
sample_heatmap.symbol,
sample_heatmap,
exchange="binance"
)
assert success is True
# Retrieve heatmap
retrieved = await redis_manager.get_heatmap(
sample_heatmap.symbol,
exchange="binance"
)
assert retrieved is not None
assert isinstance(retrieved, HeatmapData)
assert retrieved.symbol == sample_heatmap.symbol
assert len(retrieved.data) == len(sample_heatmap.data)
@pytest.mark.asyncio
async def test_multi_operations(self, redis_manager):
"""Test multi-get and multi-set operations"""
# Prepare test data
test_data = {
"test:multi1": {"value": 1},
"test:multi2": {"value": 2},
"test:multi3": {"value": 3}
}
# Multi-set
success = await redis_manager.mset(test_data, ttl=60)
assert success is True
# Multi-get
keys = list(test_data.keys())
values = await redis_manager.mget(keys)
assert len(values) == 3
assert all(v is not None for v in values)
# Verify values
for i, key in enumerate(keys):
assert values[i] == test_data[key]
# Clean up
for key in keys:
await redis_manager.delete(key)
@pytest.mark.asyncio
async def test_key_expiration(self, redis_manager):
"""Test key expiration"""
key = "test:expiration"
value = {"expires": "soon"}
# Set with short TTL
success = await redis_manager.set(key, value, ttl=1)
assert success is True
# Should exist immediately
exists = await redis_manager.exists(key)
assert exists is True
# Wait for expiration
await asyncio.sleep(2)
# Should not exist after expiration
exists = await redis_manager.exists(key)
assert exists is False
@pytest.mark.asyncio
async def test_cache_miss(self, redis_manager):
"""Test cache miss behavior"""
# Try to get non-existent key
value = await redis_manager.get("test:nonexistent")
assert value is None
# Check statistics
stats = redis_manager.get_stats()
assert stats['misses'] > 0
@pytest.mark.asyncio
async def test_health_check(self, redis_manager):
"""Test Redis health check"""
health = await redis_manager.health_check()
assert isinstance(health, dict)
assert 'redis_ping' in health
assert 'total_keys' in health
assert 'hit_rate' in health
# Should be able to ping
assert health['redis_ping'] is True
@pytest.mark.asyncio
async def test_statistics_tracking(self, redis_manager):
"""Test statistics tracking"""
# Reset stats
redis_manager.reset_stats()
# Perform some operations
await redis_manager.set("test:stats1", {"data": 1})
await redis_manager.set("test:stats2", {"data": 2})
await redis_manager.get("test:stats1")
await redis_manager.get("test:nonexistent")
# Check statistics
stats = redis_manager.get_stats()
assert stats['sets'] >= 2
assert stats['gets'] >= 2
assert stats['hits'] >= 1
assert stats['misses'] >= 1
assert stats['total_operations'] >= 4
# Clean up
await redis_manager.delete("test:stats1")
await redis_manager.delete("test:stats2")
if __name__ == "__main__":
# Run simple tests
async def simple_test():
manager = RedisManager()
await manager.initialize()
# Test basic operations
success = await manager.set("test", {"simple": "test"}, ttl=60)
print(f"Set operation: {'SUCCESS' if success else 'FAILED'}")
value = await manager.get("test")
print(f"Get operation: {'SUCCESS' if value else 'FAILED'}")
# Test ping
ping_result = await manager.ping()
print(f"Ping test: {'SUCCESS' if ping_result else 'FAILED'}")
# Get statistics
stats = manager.get_stats()
print(f"Statistics: {stats}")
# Clean up
await manager.delete("test")
await manager.close()
print("Simple Redis test completed")
asyncio.run(simple_test())

View File

@ -0,0 +1,192 @@
"""
Tests for TimescaleDB storage manager.
"""
import pytest
import asyncio
from datetime import datetime, timezone
from ..storage.timescale_manager import TimescaleManager
from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel
from ..config import config
@pytest.fixture
async def storage_manager():
"""Create and initialize storage manager for testing"""
manager = TimescaleManager()
await manager.initialize()
yield manager
await manager.close()
@pytest.fixture
def sample_orderbook():
"""Create sample order book for testing"""
return OrderBookSnapshot(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[
PriceLevel(price=50000.0, size=1.5, count=3),
PriceLevel(price=49999.0, size=2.0, count=5)
],
asks=[
PriceLevel(price=50001.0, size=1.0, count=2),
PriceLevel(price=50002.0, size=1.5, count=4)
],
sequence_id=12345
)
@pytest.fixture
def sample_trade():
"""Create sample trade for testing"""
return TradeEvent(
symbol="BTCUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
price=50000.5,
size=0.1,
side="buy",
trade_id="test_trade_123"
)
class TestTimescaleManager:
"""Test cases for TimescaleManager"""
@pytest.mark.asyncio
async def test_health_check(self, storage_manager):
"""Test storage health check"""
is_healthy = await storage_manager.health_check()
assert is_healthy is True
@pytest.mark.asyncio
async def test_store_orderbook(self, storage_manager, sample_orderbook):
"""Test storing order book snapshot"""
result = await storage_manager.store_orderbook(sample_orderbook)
assert result is True
@pytest.mark.asyncio
async def test_store_trade(self, storage_manager, sample_trade):
"""Test storing trade event"""
result = await storage_manager.store_trade(sample_trade)
assert result is True
@pytest.mark.asyncio
async def test_get_latest_orderbook(self, storage_manager, sample_orderbook):
"""Test retrieving latest order book"""
# Store the order book first
await storage_manager.store_orderbook(sample_orderbook)
# Retrieve it
retrieved = await storage_manager.get_latest_orderbook(
sample_orderbook.symbol,
sample_orderbook.exchange
)
assert retrieved is not None
assert retrieved.symbol == sample_orderbook.symbol
assert retrieved.exchange == sample_orderbook.exchange
assert len(retrieved.bids) == len(sample_orderbook.bids)
assert len(retrieved.asks) == len(sample_orderbook.asks)
@pytest.mark.asyncio
async def test_batch_store_orderbooks(self, storage_manager):
"""Test batch storing order books"""
orderbooks = []
for i in range(5):
orderbook = OrderBookSnapshot(
symbol="ETHUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
bids=[PriceLevel(price=3000.0 + i, size=1.0)],
asks=[PriceLevel(price=3001.0 + i, size=1.0)],
sequence_id=i
)
orderbooks.append(orderbook)
result = await storage_manager.batch_store_orderbooks(orderbooks)
assert result == 5
@pytest.mark.asyncio
async def test_batch_store_trades(self, storage_manager):
"""Test batch storing trades"""
trades = []
for i in range(5):
trade = TradeEvent(
symbol="ETHUSDT",
exchange="binance",
timestamp=datetime.now(timezone.utc),
price=3000.0 + i,
size=0.1,
side="buy" if i % 2 == 0 else "sell",
trade_id=f"test_trade_{i}"
)
trades.append(trade)
result = await storage_manager.batch_store_trades(trades)
assert result == 5
@pytest.mark.asyncio
async def test_get_storage_stats(self, storage_manager):
"""Test getting storage statistics"""
stats = await storage_manager.get_storage_stats()
assert isinstance(stats, dict)
assert 'table_sizes' in stats
assert 'record_counts' in stats
assert 'connection_pool' in stats
@pytest.mark.asyncio
async def test_historical_data_retrieval(self, storage_manager, sample_orderbook, sample_trade):
"""Test retrieving historical data"""
# Store some data first
await storage_manager.store_orderbook(sample_orderbook)
await storage_manager.store_trade(sample_trade)
# Define time range
start_time = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
end_time = datetime.now(timezone.utc).replace(hour=23, minute=59, second=59, microsecond=999999)
# Retrieve historical order books
orderbooks = await storage_manager.get_historical_orderbooks(
sample_orderbook.symbol,
sample_orderbook.exchange,
start_time,
end_time,
limit=10
)
assert isinstance(orderbooks, list)
# Retrieve historical trades
trades = await storage_manager.get_historical_trades(
sample_trade.symbol,
sample_trade.exchange,
start_time,
end_time,
limit=10
)
assert isinstance(trades, list)
if __name__ == "__main__":
# Run a simple test
async def simple_test():
manager = TimescaleManager()
await manager.initialize()
# Test health check
is_healthy = await manager.health_check()
print(f"Health check: {'PASSED' if is_healthy else 'FAILED'}")
# Test storage stats
stats = await manager.get_storage_stats()
print(f"Storage stats: {len(stats)} categories")
await manager.close()
print("Simple test completed")
asyncio.run(simple_test())

22
COBY/utils/__init__.py Normal file
View File

@ -0,0 +1,22 @@
"""
Utility functions and helpers for the multi-exchange data aggregation system.
"""
from .logging import setup_logging, get_logger
from .validation import validate_symbol, validate_price, validate_volume
from .timing import get_current_timestamp, format_timestamp
from .exceptions import COBYException, ConnectionError, ValidationError, ProcessingError
__all__ = [
'setup_logging',
'get_logger',
'validate_symbol',
'validate_price',
'validate_volume',
'get_current_timestamp',
'format_timestamp',
'COBYException',
'ConnectionError',
'ValidationError',
'ProcessingError'
]

57
COBY/utils/exceptions.py Normal file
View File

@ -0,0 +1,57 @@
"""
Custom exceptions for the COBY system.
"""
class COBYException(Exception):
"""Base exception for COBY system"""
def __init__(self, message: str, error_code: str = None, details: dict = None):
super().__init__(message)
self.message = message
self.error_code = error_code
self.details = details or {}
def to_dict(self) -> dict:
"""Convert exception to dictionary"""
return {
'error': self.__class__.__name__,
'message': self.message,
'error_code': self.error_code,
'details': self.details
}
class ConnectionError(COBYException):
"""Exception raised for connection-related errors"""
pass
class ValidationError(COBYException):
"""Exception raised for data validation errors"""
pass
class ProcessingError(COBYException):
"""Exception raised for data processing errors"""
pass
class StorageError(COBYException):
"""Exception raised for storage-related errors"""
pass
class ConfigurationError(COBYException):
"""Exception raised for configuration errors"""
pass
class ReplayError(COBYException):
"""Exception raised for replay-related errors"""
pass
class AggregationError(COBYException):
"""Exception raised for aggregation errors"""
pass

149
COBY/utils/logging.py Normal file
View File

@ -0,0 +1,149 @@
"""
Logging utilities for the COBY system.
"""
import logging
import logging.handlers
import sys
import uuid
from pathlib import Path
from typing import Optional
from contextvars import ContextVar
# Context variable for correlation ID
correlation_id: ContextVar[Optional[str]] = ContextVar('correlation_id', default=None)
class CorrelationFilter(logging.Filter):
"""Add correlation ID to log records"""
def filter(self, record):
record.correlation_id = correlation_id.get() or 'N/A'
return True
class COBYFormatter(logging.Formatter):
"""Custom formatter with correlation ID support"""
def __init__(self, include_correlation_id: bool = True):
self.include_correlation_id = include_correlation_id
if include_correlation_id:
fmt = '%(asctime)s - %(name)s - %(levelname)s - [%(correlation_id)s] - %(message)s'
else:
fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
super().__init__(fmt, datefmt='%Y-%m-%d %H:%M:%S')
def setup_logging(
level: str = 'INFO',
log_file: Optional[str] = None,
max_file_size: int = 100, # MB
backup_count: int = 5,
enable_correlation_id: bool = True,
console_output: bool = True
) -> None:
"""
Set up logging configuration for the COBY system.
Args:
level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_file: Path to log file (None = no file logging)
max_file_size: Maximum log file size in MB
backup_count: Number of backup files to keep
enable_correlation_id: Whether to include correlation IDs in logs
console_output: Whether to output logs to console
"""
# Convert string level to logging constant
numeric_level = getattr(logging, level.upper(), logging.INFO)
# Create root logger
root_logger = logging.getLogger()
root_logger.setLevel(numeric_level)
# Clear existing handlers
root_logger.handlers.clear()
# Create formatter
formatter = COBYFormatter(include_correlation_id=enable_correlation_id)
# Add correlation filter if enabled
correlation_filter = CorrelationFilter() if enable_correlation_id else None
# Console handler
if console_output:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(numeric_level)
console_handler.setFormatter(formatter)
if correlation_filter:
console_handler.addFilter(correlation_filter)
root_logger.addHandler(console_handler)
# File handler
if log_file:
# Create log directory if it doesn't exist
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
# Rotating file handler
file_handler = logging.handlers.RotatingFileHandler(
log_file,
maxBytes=max_file_size * 1024 * 1024, # Convert MB to bytes
backupCount=backup_count
)
file_handler.setLevel(numeric_level)
file_handler.setFormatter(formatter)
if correlation_filter:
file_handler.addFilter(correlation_filter)
root_logger.addHandler(file_handler)
# Set specific logger levels
logging.getLogger('websockets').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)
def get_logger(name: str) -> logging.Logger:
"""
Get a logger instance with the specified name.
Args:
name: Logger name (typically __name__)
Returns:
logging.Logger: Logger instance
"""
return logging.getLogger(name)
def set_correlation_id(corr_id: Optional[str] = None) -> str:
"""
Set correlation ID for current context.
Args:
corr_id: Correlation ID (generates UUID if None)
Returns:
str: The correlation ID that was set
"""
if corr_id is None:
corr_id = str(uuid.uuid4())[:8] # Short UUID
correlation_id.set(corr_id)
return corr_id
def get_correlation_id() -> Optional[str]:
"""
Get current correlation ID.
Returns:
str: Current correlation ID or None
"""
return correlation_id.get()
def clear_correlation_id() -> None:
"""Clear correlation ID from current context."""
correlation_id.set(None)

206
COBY/utils/timing.py Normal file
View File

@ -0,0 +1,206 @@
"""
Timing utilities for the COBY system.
"""
import time
from datetime import datetime, timezone
from typing import Optional
def get_current_timestamp() -> datetime:
"""
Get current UTC timestamp.
Returns:
datetime: Current UTC timestamp
"""
return datetime.now(timezone.utc)
def format_timestamp(timestamp: datetime, format_str: str = "%Y-%m-%d %H:%M:%S.%f") -> str:
"""
Format timestamp to string.
Args:
timestamp: Timestamp to format
format_str: Format string
Returns:
str: Formatted timestamp string
"""
return timestamp.strftime(format_str)
def parse_timestamp(timestamp_str: str, format_str: str = "%Y-%m-%d %H:%M:%S.%f") -> datetime:
"""
Parse timestamp string to datetime.
Args:
timestamp_str: Timestamp string to parse
format_str: Format string
Returns:
datetime: Parsed timestamp
"""
dt = datetime.strptime(timestamp_str, format_str)
# Ensure timezone awareness
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt
def timestamp_to_unix(timestamp: datetime) -> float:
"""
Convert datetime to Unix timestamp.
Args:
timestamp: Datetime to convert
Returns:
float: Unix timestamp
"""
return timestamp.timestamp()
def unix_to_timestamp(unix_time: float) -> datetime:
"""
Convert Unix timestamp to datetime.
Args:
unix_time: Unix timestamp
Returns:
datetime: Converted datetime (UTC)
"""
return datetime.fromtimestamp(unix_time, tz=timezone.utc)
def calculate_time_diff(start: datetime, end: datetime) -> float:
"""
Calculate time difference in seconds.
Args:
start: Start timestamp
end: End timestamp
Returns:
float: Time difference in seconds
"""
return (end - start).total_seconds()
def is_timestamp_recent(timestamp: datetime, max_age_seconds: int = 60) -> bool:
"""
Check if timestamp is recent (within max_age_seconds).
Args:
timestamp: Timestamp to check
max_age_seconds: Maximum age in seconds
Returns:
bool: True if recent, False otherwise
"""
now = get_current_timestamp()
age = calculate_time_diff(timestamp, now)
return age <= max_age_seconds
def sleep_until(target_time: datetime) -> None:
"""
Sleep until target time.
Args:
target_time: Target timestamp to sleep until
"""
now = get_current_timestamp()
sleep_seconds = calculate_time_diff(now, target_time)
if sleep_seconds > 0:
time.sleep(sleep_seconds)
def get_milliseconds() -> int:
"""
Get current timestamp in milliseconds.
Returns:
int: Current timestamp in milliseconds
"""
return int(time.time() * 1000)
def milliseconds_to_timestamp(ms: int) -> datetime:
"""
Convert milliseconds to datetime.
Args:
ms: Milliseconds timestamp
Returns:
datetime: Converted datetime (UTC)
"""
return datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc)
def round_timestamp(timestamp: datetime, seconds: int) -> datetime:
"""
Round timestamp to nearest interval.
Args:
timestamp: Timestamp to round
seconds: Interval in seconds
Returns:
datetime: Rounded timestamp
"""
unix_time = timestamp_to_unix(timestamp)
rounded_unix = round(unix_time / seconds) * seconds
return unix_to_timestamp(rounded_unix)
class Timer:
"""Simple timer for measuring execution time"""
def __init__(self):
self.start_time: Optional[float] = None
self.end_time: Optional[float] = None
def start(self) -> None:
"""Start the timer"""
self.start_time = time.perf_counter()
self.end_time = None
def stop(self) -> float:
"""
Stop the timer and return elapsed time.
Returns:
float: Elapsed time in seconds
"""
if self.start_time is None:
raise ValueError("Timer not started")
self.end_time = time.perf_counter()
return self.elapsed()
def elapsed(self) -> float:
"""
Get elapsed time.
Returns:
float: Elapsed time in seconds
"""
if self.start_time is None:
return 0.0
end = self.end_time or time.perf_counter()
return end - self.start_time
def __enter__(self):
"""Context manager entry"""
self.start()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit"""
self.stop()

217
COBY/utils/validation.py Normal file
View File

@ -0,0 +1,217 @@
"""
Data validation utilities for the COBY system.
"""
import re
from typing import List, Optional
from decimal import Decimal, InvalidOperation
def validate_symbol(symbol: str) -> bool:
"""
Validate trading symbol format.
Args:
symbol: Trading symbol to validate
Returns:
bool: True if valid, False otherwise
"""
if not symbol or not isinstance(symbol, str):
return False
# Basic symbol format validation (e.g., BTCUSDT, ETH-USD)
pattern = r'^[A-Z0-9]{2,10}[-/]?[A-Z0-9]{2,10}$'
return bool(re.match(pattern, symbol.upper()))
def validate_price(price: float) -> bool:
"""
Validate price value.
Args:
price: Price to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(price, (int, float, Decimal)):
return False
try:
price_decimal = Decimal(str(price))
return price_decimal > 0 and price_decimal < Decimal('1e10') # Reasonable upper bound
except (InvalidOperation, ValueError):
return False
def validate_volume(volume: float) -> bool:
"""
Validate volume value.
Args:
volume: Volume to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(volume, (int, float, Decimal)):
return False
try:
volume_decimal = Decimal(str(volume))
return volume_decimal >= 0 and volume_decimal < Decimal('1e15') # Reasonable upper bound
except (InvalidOperation, ValueError):
return False
def validate_exchange_name(exchange: str) -> bool:
"""
Validate exchange name.
Args:
exchange: Exchange name to validate
Returns:
bool: True if valid, False otherwise
"""
if not exchange or not isinstance(exchange, str):
return False
# Exchange name should be alphanumeric with possible underscores/hyphens
pattern = r'^[a-zA-Z0-9_-]{2,20}$'
return bool(re.match(pattern, exchange))
def validate_timestamp_range(start_time, end_time) -> List[str]:
"""
Validate timestamp range.
Args:
start_time: Start timestamp
end_time: End timestamp
Returns:
List[str]: List of validation errors (empty if valid)
"""
errors = []
if start_time is None:
errors.append("Start time cannot be None")
if end_time is None:
errors.append("End time cannot be None")
if start_time and end_time and start_time >= end_time:
errors.append("Start time must be before end time")
return errors
def validate_bucket_size(bucket_size: float) -> bool:
"""
Validate price bucket size.
Args:
bucket_size: Bucket size to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(bucket_size, (int, float, Decimal)):
return False
try:
size_decimal = Decimal(str(bucket_size))
return size_decimal > 0 and size_decimal <= Decimal('1000') # Reasonable upper bound
except (InvalidOperation, ValueError):
return False
def validate_speed_multiplier(speed: float) -> bool:
"""
Validate replay speed multiplier.
Args:
speed: Speed multiplier to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(speed, (int, float)):
return False
return 0.01 <= speed <= 100.0 # 1% to 100x speed
def sanitize_symbol(symbol: str) -> str:
"""
Sanitize and normalize symbol format.
Args:
symbol: Symbol to sanitize
Returns:
str: Sanitized symbol
"""
if not symbol:
return ""
# Remove whitespace and convert to uppercase
sanitized = symbol.strip().upper()
# Remove invalid characters
sanitized = re.sub(r'[^A-Z0-9/-]', '', sanitized)
return sanitized
def validate_percentage(value: float, min_val: float = 0.0, max_val: float = 100.0) -> bool:
"""
Validate percentage value.
Args:
value: Percentage value to validate
min_val: Minimum allowed value
max_val: Maximum allowed value
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(value, (int, float)):
return False
return min_val <= value <= max_val
def validate_connection_config(config: dict) -> List[str]:
"""
Validate connection configuration.
Args:
config: Configuration dictionary
Returns:
List[str]: List of validation errors (empty if valid)
"""
errors = []
# Required fields
required_fields = ['host', 'port']
for field in required_fields:
if field not in config:
errors.append(f"Missing required field: {field}")
# Validate host
if 'host' in config:
host = config['host']
if not isinstance(host, str) or not host.strip():
errors.append("Host must be a non-empty string")
# Validate port
if 'port' in config:
port = config['port']
if not isinstance(port, int) or not (1 <= port <= 65535):
errors.append("Port must be an integer between 1 and 65535")
return errors

11
COBY/web/__init__.py Normal file
View File

@ -0,0 +1,11 @@
"""
Web dashboard for the COBY system.
"""
from .dashboard_server import create_dashboard_app
from .static_handler import StaticHandler
__all__ = [
'create_dashboard_app',
'StaticHandler'
]

View File

@ -0,0 +1,596 @@
/* COBY Dashboard Styles */
:root {
--primary-color: #2563eb;
--secondary-color: #1e40af;
--success-color: #10b981;
--warning-color: #f59e0b;
--error-color: #ef4444;
--background-color: #0f172a;
--surface-color: #1e293b;
--surface-light: #334155;
--text-primary: #f8fafc;
--text-secondary: #cbd5e1;
--text-muted: #64748b;
--border-color: #475569;
--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
background-color: var(--background-color);
color: var(--text-primary);
line-height: 1.6;
overflow-x: hidden;
}
/* Layout */
#app {
min-height: 100vh;
display: flex;
flex-direction: column;
}
/* Header */
.dashboard-header {
background: var(--surface-color);
border-bottom: 1px solid var(--border-color);
padding: 1rem 2rem;
box-shadow: var(--shadow);
}
.header-content {
display: flex;
justify-content: space-between;
align-items: center;
max-width: 1400px;
margin: 0 auto;
}
.logo {
font-size: 1.5rem;
font-weight: 700;
color: var(--primary-color);
letter-spacing: -0.025em;
}
.header-info {
display: flex;
gap: 2rem;
align-items: center;
}
.connection-status {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.875rem;
font-weight: 500;
}
.connection-status::before {
content: '';
width: 8px;
height: 8px;
border-radius: 50%;
background: var(--error-color);
animation: pulse 2s infinite;
}
.connection-status.connected::before {
background: var(--success-color);
}
.last-update {
font-size: 0.875rem;
color: var(--text-secondary);
}
/* Main Content */
.dashboard-main {
flex: 1;
padding: 2rem;
max-width: 1400px;
margin: 0 auto;
width: 100%;
display: grid;
grid-template-columns: 1fr 300px;
grid-template-rows: auto auto 1fr;
gap: 2rem;
grid-template-areas:
"selector selector"
"overview overview"
"heatmap sidebar";
}
/* Symbol Selector */
.symbol-selector {
grid-area: selector;
display: flex;
gap: 2rem;
align-items: center;
background: var(--surface-color);
padding: 1rem 1.5rem;
border-radius: 0.5rem;
border: 1px solid var(--border-color);
}
.symbol-dropdown {
background: var(--surface-light);
border: 1px solid var(--border-color);
border-radius: 0.375rem;
padding: 0.5rem 1rem;
color: var(--text-primary);
font-size: 1rem;
min-width: 150px;
}
.exchange-toggles {
display: flex;
gap: 1rem;
flex-wrap: wrap;
}
.exchange-toggle {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.5rem 1rem;
background: var(--surface-light);
border: 1px solid var(--border-color);
border-radius: 0.375rem;
cursor: pointer;
transition: all 0.2s;
}
.exchange-toggle:hover {
background: var(--primary-color);
}
.exchange-toggle.active {
background: var(--primary-color);
border-color: var(--primary-color);
}
/* Market Overview */
.market-overview {
grid-area: overview;
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 1rem;
}
.metric-card {
background: var(--surface-color);
border: 1px solid var(--border-color);
border-radius: 0.5rem;
padding: 1.5rem;
text-align: center;
transition: transform 0.2s;
}
.metric-card:hover {
transform: translateY(-2px);
box-shadow: var(--shadow-lg);
}
.metric-card h3 {
font-size: 0.875rem;
font-weight: 500;
color: var(--text-secondary);
margin-bottom: 0.5rem;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.metric-value {
font-size: 1.5rem;
font-weight: 700;
color: var(--text-primary);
margin-bottom: 0.25rem;
}
.metric-change {
font-size: 0.875rem;
font-weight: 500;
}
.metric-change.positive {
color: var(--success-color);
}
.metric-change.negative {
color: var(--error-color);
}
/* Imbalance Bar */
.imbalance-bar {
height: 4px;
background: var(--surface-light);
border-radius: 2px;
margin-top: 0.5rem;
position: relative;
overflow: hidden;
}
.imbalance-bar::after {
content: '';
position: absolute;
top: 0;
left: 50%;
width: 0%;
height: 100%;
background: var(--primary-color);
transition: all 0.3s ease;
transform: translateX(-50%);
}
/* Liquidity Indicator */
.liquidity-indicator {
height: 4px;
background: var(--surface-light);
border-radius: 2px;
margin-top: 0.5rem;
position: relative;
overflow: hidden;
}
.liquidity-indicator::after {
content: '';
position: absolute;
top: 0;
left: 0;
width: 0%;
height: 100%;
background: linear-gradient(90deg, var(--error-color), var(--warning-color), var(--success-color));
transition: width 0.3s ease;
}
/* Side Panels */
.side-panels {
grid-area: sidebar;
display: flex;
flex-direction: column;
gap: 1rem;
}
.panel {
background: var(--surface-color);
border: 1px solid var(--border-color);
border-radius: 0.5rem;
padding: 1rem;
}
.panel h3 {
font-size: 1rem;
font-weight: 600;
margin-bottom: 1rem;
color: var(--text-primary);
border-bottom: 1px solid var(--border-color);
padding-bottom: 0.5rem;
}
/* Order Book Panel */
.order-book-container {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.section-header {
font-size: 0.875rem;
font-weight: 500;
color: var(--text-secondary);
text-transform: uppercase;
letter-spacing: 0.05em;
margin-bottom: 0.5rem;
}
.order-levels {
display: flex;
flex-direction: column;
gap: 2px;
max-height: 150px;
overflow-y: auto;
}
.order-level {
display: flex;
justify-content: space-between;
padding: 0.25rem 0.5rem;
background: var(--surface-light);
border-radius: 0.25rem;
font-size: 0.75rem;
position: relative;
overflow: hidden;
}
.order-level::before {
content: '';
position: absolute;
top: 0;
left: 0;
height: 100%;
background: rgba(59, 130, 246, 0.1);
transition: width 0.3s ease;
}
.order-level.bid::before {
background: rgba(34, 197, 94, 0.1);
}
.order-level.ask::before {
background: rgba(239, 68, 68, 0.1);
}
.spread-indicator {
text-align: center;
padding: 0.5rem;
background: var(--surface-light);
border-radius: 0.25rem;
margin: 0.5rem 0;
}
.spread-value {
font-size: 0.875rem;
font-weight: 500;
color: var(--text-secondary);
}
/* Exchange Status Panel */
.exchange-list {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.exchange-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.5rem;
background: var(--surface-light);
border-radius: 0.25rem;
}
.exchange-name {
font-weight: 500;
text-transform: capitalize;
}
.exchange-status {
font-size: 0.75rem;
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
font-weight: 500;
}
.exchange-status.connected {
background: rgba(34, 197, 94, 0.2);
color: var(--success-color);
}
.exchange-status.disconnected {
background: rgba(239, 68, 68, 0.2);
color: var(--error-color);
}
/* Statistics Panel */
.stats-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0.5rem;
}
.stat-item {
display: flex;
flex-direction: column;
padding: 0.5rem;
background: var(--surface-light);
border-radius: 0.25rem;
text-align: center;
}
.stat-label {
font-size: 0.75rem;
color: var(--text-secondary);
margin-bottom: 0.25rem;
}
.stat-value {
font-size: 1rem;
font-weight: 600;
color: var(--text-primary);
}
/* Footer */
.dashboard-footer {
background: var(--surface-color);
border-top: 1px solid var(--border-color);
padding: 1rem 2rem;
margin-top: auto;
}
.footer-content {
display: flex;
justify-content: space-between;
align-items: center;
max-width: 1400px;
margin: 0 auto;
font-size: 0.875rem;
color: var(--text-secondary);
}
/* Loading Overlay */
.loading-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(15, 23, 42, 0.9);
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
z-index: 1000;
backdrop-filter: blur(4px);
}
.loading-spinner {
width: 40px;
height: 40px;
border: 4px solid var(--surface-light);
border-top: 4px solid var(--primary-color);
border-radius: 50%;
animation: spin 1s linear infinite;
margin-bottom: 1rem;
}
.loading-text {
font-size: 1rem;
color: var(--text-secondary);
}
/* Modal */
.modal {
display: none;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0, 0, 0, 0.5);
z-index: 1001;
backdrop-filter: blur(4px);
}
.modal.show {
display: flex;
justify-content: center;
align-items: center;
}
.modal-content {
background: var(--surface-color);
border: 1px solid var(--border-color);
border-radius: 0.5rem;
max-width: 400px;
width: 90%;
max-height: 80vh;
overflow-y: auto;
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem 1.5rem;
border-bottom: 1px solid var(--border-color);
}
.modal-header h3 {
margin: 0;
font-size: 1.125rem;
font-weight: 600;
}
.modal-close {
background: none;
border: none;
font-size: 1.5rem;
color: var(--text-secondary);
cursor: pointer;
padding: 0;
width: 24px;
height: 24px;
display: flex;
align-items: center;
justify-content: center;
}
.modal-close:hover {
color: var(--text-primary);
}
.modal-body {
padding: 1.5rem;
}
/* Animations */
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
/* Responsive Design */
@media (max-width: 1024px) {
.dashboard-main {
grid-template-columns: 1fr;
grid-template-areas:
"selector"
"overview"
"heatmap"
"sidebar";
}
.side-panels {
flex-direction: row;
overflow-x: auto;
}
.panel {
min-width: 250px;
}
}
@media (max-width: 768px) {
.dashboard-main {
padding: 1rem;
gap: 1rem;
}
.header-content {
padding: 0 1rem;
}
.market-overview {
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
}
.symbol-selector {
flex-direction: column;
align-items: stretch;
gap: 1rem;
}
.exchange-toggles {
justify-content: center;
}
}
/* Utility Classes */
.hidden {
display: none !important;
}
.fade-in {
animation: fadeIn 0.3s ease-in;
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}

View File

@ -0,0 +1,427 @@
/* Heatmap Specific Styles */
.heatmap-container {
grid-area: heatmap;
background: var(--surface-color);
border: 1px solid var(--border-color);
border-radius: 0.5rem;
padding: 1rem;
display: flex;
flex-direction: column;
min-height: 600px;
}
.heatmap-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 1rem;
padding-bottom: 1rem;
border-bottom: 1px solid var(--border-color);
}
.heatmap-header h2 {
font-size: 1.25rem;
font-weight: 600;
color: var(--text-primary);
margin: 0;
}
.heatmap-controls {
display: flex;
gap: 0.5rem;
align-items: center;
}
.control-btn {
background: var(--surface-light);
border: 1px solid var(--border-color);
border-radius: 0.375rem;
padding: 0.5rem 1rem;
color: var(--text-primary);
font-size: 0.875rem;
cursor: pointer;
transition: all 0.2s;
}
.control-btn:hover {
background: var(--primary-color);
border-color: var(--primary-color);
}
.control-btn:active {
transform: translateY(1px);
}
.control-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.control-label {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.875rem;
color: var(--text-secondary);
cursor: pointer;
margin-left: 1rem;
}
.control-label input[type="checkbox"] {
width: 16px;
height: 16px;
accent-color: var(--primary-color);
}
/* Heatmap Wrapper */
.heatmap-wrapper {
flex: 1;
display: grid;
grid-template-columns: 60px 1fr 60px;
grid-template-rows: 1fr;
gap: 0.5rem;
min-height: 400px;
margin-bottom: 1rem;
}
.price-axis {
display: flex;
flex-direction: column;
justify-content: space-between;
align-items: flex-end;
padding: 0.5rem;
font-size: 0.75rem;
color: var(--text-secondary);
background: var(--surface-light);
border-radius: 0.25rem;
}
.volume-axis {
display: flex;
flex-direction: column;
justify-content: space-between;
align-items: flex-start;
padding: 0.5rem;
font-size: 0.75rem;
color: var(--text-secondary);
background: var(--surface-light);
border-radius: 0.25rem;
}
.heatmap-canvas-container {
position: relative;
background: var(--background-color);
border-radius: 0.25rem;
overflow: hidden;
border: 1px solid var(--border-color);
}
#heatmapCanvas {
display: block;
width: 100%;
height: 100%;
cursor: crosshair;
}
.heatmap-overlay {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
pointer-events: none;
z-index: 10;
}
/* Heatmap Tooltip */
.heatmap-tooltip {
position: absolute;
background: var(--surface-color);
border: 1px solid var(--border-color);
border-radius: 0.375rem;
padding: 0.75rem;
font-size: 0.875rem;
color: var(--text-primary);
box-shadow: var(--shadow-lg);
z-index: 20;
pointer-events: none;
opacity: 0;
transition: opacity 0.2s;
max-width: 200px;
}
.heatmap-tooltip.show {
opacity: 1;
}
.tooltip-price {
font-weight: 600;
color: var(--primary-color);
margin-bottom: 0.25rem;
}
.tooltip-volume {
color: var(--text-secondary);
margin-bottom: 0.25rem;
}
.tooltip-side {
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.tooltip-side.bid {
color: var(--success-color);
}
.tooltip-side.ask {
color: var(--error-color);
}
/* Heatmap Legend */
.heatmap-legend {
display: flex;
justify-content: space-between;
align-items: center;
padding: 1rem;
background: var(--surface-light);
border-radius: 0.375rem;
margin-top: auto;
}
.legend-item {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.875rem;
color: var(--text-secondary);
}
.legend-color {
width: 16px;
height: 16px;
border-radius: 0.25rem;
}
.bid-color {
background: linear-gradient(135deg,
rgba(34, 197, 94, 0.3) 0%,
rgba(34, 197, 94, 0.8) 100%);
}
.ask-color {
background: linear-gradient(135deg,
rgba(239, 68, 68, 0.3) 0%,
rgba(239, 68, 68, 0.8) 100%);
}
.intensity-scale {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.75rem;
color: var(--text-muted);
}
.intensity-gradient {
width: 100px;
height: 8px;
background: linear-gradient(90deg,
rgba(59, 130, 246, 0.2) 0%,
rgba(59, 130, 246, 0.5) 50%,
rgba(59, 130, 246, 1) 100%);
border-radius: 4px;
}
/* Crosshair */
.heatmap-crosshair {
position: absolute;
pointer-events: none;
z-index: 15;
}
.crosshair-line {
position: absolute;
background: rgba(59, 130, 246, 0.6);
}
.crosshair-horizontal {
width: 100%;
height: 1px;
left: 0;
}
.crosshair-vertical {
width: 1px;
height: 100%;
top: 0;
}
/* Price Level Indicators */
.price-level-indicator {
position: absolute;
right: -30px;
background: var(--surface-color);
border: 1px solid var(--border-color);
border-radius: 0.25rem;
padding: 0.25rem 0.5rem;
font-size: 0.75rem;
color: var(--text-primary);
white-space: nowrap;
z-index: 20;
transform: translateY(-50%);
}
.price-level-indicator.bid {
border-left-color: var(--success-color);
border-left-width: 3px;
}
.price-level-indicator.ask {
border-left-color: var(--error-color);
border-left-width: 3px;
}
/* Volume Bars */
.volume-bar {
position: absolute;
background: rgba(59, 130, 246, 0.3);
border-radius: 2px;
transition: all 0.3s ease;
}
.volume-bar.bid {
background: rgba(34, 197, 94, 0.3);
}
.volume-bar.ask {
background: rgba(239, 68, 68, 0.3);
}
.volume-bar:hover {
opacity: 0.8;
transform: scaleX(1.05);
}
/* Zoom Controls */
.zoom-indicator {
position: absolute;
top: 10px;
right: 10px;
background: rgba(15, 23, 42, 0.8);
border: 1px solid var(--border-color);
border-radius: 0.25rem;
padding: 0.5rem;
font-size: 0.75rem;
color: var(--text-secondary);
backdrop-filter: blur(4px);
z-index: 25;
}
/* Loading State */
.heatmap-loading {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
height: 400px;
color: var(--text-secondary);
}
.heatmap-loading .loading-spinner {
width: 32px;
height: 32px;
margin-bottom: 1rem;
}
/* Error State */
.heatmap-error {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
height: 400px;
color: var(--error-color);
text-align: center;
}
.heatmap-error-icon {
font-size: 2rem;
margin-bottom: 1rem;
}
.heatmap-error-message {
font-size: 1rem;
margin-bottom: 0.5rem;
}
.heatmap-error-details {
font-size: 0.875rem;
color: var(--text-muted);
}
/* Animation Classes */
.heatmap-fade-in {
animation: heatmapFadeIn 0.5s ease-in;
}
@keyframes heatmapFadeIn {
from {
opacity: 0;
transform: translateY(10px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.heatmap-pulse {
animation: heatmapPulse 2s infinite;
}
@keyframes heatmapPulse {
0%, 100% {
opacity: 1;
}
50% {
opacity: 0.7;
}
}
/* Responsive Heatmap */
@media (max-width: 768px) {
.heatmap-wrapper {
grid-template-columns: 50px 1fr 50px;
min-height: 300px;
}
.heatmap-header {
flex-direction: column;
gap: 1rem;
align-items: stretch;
}
.heatmap-controls {
justify-content: center;
flex-wrap: wrap;
}
.heatmap-legend {
flex-direction: column;
gap: 1rem;
align-items: stretch;
}
.intensity-scale {
justify-content: center;
}
.price-axis,
.volume-axis {
font-size: 0.625rem;
padding: 0.25rem;
}
}

187
COBY/web/static/index.html Normal file
View File

@ -0,0 +1,187 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>COBY - Market Data Dashboard</title>
<link rel="stylesheet" href="/static/css/dashboard.css">
<link rel="stylesheet" href="/static/css/heatmap.css">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://unpkg.com/d3@7"></script>
</head>
<body>
<div id="app">
<!-- Header -->
<header class="dashboard-header">
<div class="header-content">
<h1 class="logo">COBY</h1>
<div class="header-info">
<span class="connection-status" id="connectionStatus">Connecting...</span>
<span class="last-update" id="lastUpdate">Never</span>
</div>
</div>
</header>
<!-- Main Dashboard -->
<main class="dashboard-main">
<!-- Symbol Selector -->
<div class="symbol-selector">
<select id="symbolSelect" class="symbol-dropdown">
<option value="BTCUSDT">BTC/USDT</option>
<option value="ETHUSDT">ETH/USDT</option>
</select>
<div class="exchange-toggles" id="exchangeToggles">
<!-- Exchange toggles will be populated dynamically -->
</div>
</div>
<!-- Market Overview Cards -->
<div class="market-overview">
<div class="metric-card">
<h3>Mid Price</h3>
<div class="metric-value" id="midPrice">--</div>
<div class="metric-change" id="priceChange">--</div>
</div>
<div class="metric-card">
<h3>Spread</h3>
<div class="metric-value" id="spread">--</div>
<div class="metric-change" id="spreadChange">--</div>
</div>
<div class="metric-card">
<h3>Volume Imbalance</h3>
<div class="metric-value" id="volumeImbalance">--</div>
<div class="imbalance-bar" id="imbalanceBar"></div>
</div>
<div class="metric-card">
<h3>Liquidity Score</h3>
<div class="metric-value" id="liquidityScore">--</div>
<div class="liquidity-indicator" id="liquidityIndicator"></div>
</div>
</div>
<!-- Main Heatmap Container -->
<div class="heatmap-container">
<div class="heatmap-header">
<h2>Order Book Heatmap</h2>
<div class="heatmap-controls">
<button class="control-btn" id="zoomInBtn">Zoom In</button>
<button class="control-btn" id="zoomOutBtn">Zoom Out</button>
<button class="control-btn" id="resetZoomBtn">Reset</button>
<label class="control-label">
<input type="checkbox" id="smoothingToggle"> Smoothing
</label>
</div>
</div>
<div class="heatmap-wrapper">
<div class="price-axis" id="priceAxis"></div>
<div class="heatmap-canvas-container">
<canvas id="heatmapCanvas" width="800" height="600"></canvas>
<div class="heatmap-overlay" id="heatmapOverlay"></div>
</div>
<div class="volume-axis" id="volumeAxis"></div>
</div>
<div class="heatmap-legend">
<div class="legend-item">
<div class="legend-color bid-color"></div>
<span>Bids</span>
</div>
<div class="legend-item">
<div class="legend-color ask-color"></div>
<span>Asks</span>
</div>
<div class="intensity-scale">
<span>Low</span>
<div class="intensity-gradient"></div>
<span>High</span>
</div>
</div>
</div>
<!-- Side Panels -->
<div class="side-panels">
<!-- Order Book Panel -->
<div class="panel order-book-panel">
<h3>Order Book</h3>
<div class="order-book-container">
<div class="asks-section">
<div class="section-header">Asks</div>
<div class="order-levels" id="askLevels"></div>
</div>
<div class="spread-indicator" id="spreadIndicator">
<span class="spread-value">Spread: --</span>
</div>
<div class="bids-section">
<div class="section-header">Bids</div>
<div class="order-levels" id="bidLevels"></div>
</div>
</div>
</div>
<!-- Exchange Status Panel -->
<div class="panel exchange-status-panel">
<h3>Exchange Status</h3>
<div class="exchange-list" id="exchangeList">
<!-- Exchange status items will be populated dynamically -->
</div>
</div>
<!-- Statistics Panel -->
<div class="panel stats-panel">
<h3>Statistics</h3>
<div class="stats-grid" id="statsGrid">
<div class="stat-item">
<span class="stat-label">Updates/sec</span>
<span class="stat-value" id="updatesPerSec">0</span>
</div>
<div class="stat-item">
<span class="stat-label">Total Points</span>
<span class="stat-value" id="totalPoints">0</span>
</div>
<div class="stat-item">
<span class="stat-label">Bid Points</span>
<span class="stat-value" id="bidPoints">0</span>
</div>
<div class="stat-item">
<span class="stat-label">Ask Points</span>
<span class="stat-value" id="askPoints">0</span>
</div>
</div>
</div>
</div>
</main>
<!-- Footer -->
<footer class="dashboard-footer">
<div class="footer-content">
<span>COBY Market Data Dashboard v1.0</span>
<span id="systemStatus">System: Online</span>
</div>
</footer>
</div>
<!-- Loading Overlay -->
<div id="loadingOverlay" class="loading-overlay">
<div class="loading-spinner"></div>
<div class="loading-text">Loading market data...</div>
</div>
<!-- Error Modal -->
<div id="errorModal" class="modal">
<div class="modal-content">
<div class="modal-header">
<h3>Error</h3>
<button class="modal-close" id="errorModalClose">&times;</button>
</div>
<div class="modal-body">
<p id="errorMessage">An error occurred</p>
</div>
</div>
</div>
<!-- Scripts -->
<script src="/static/js/websocket-client.js"></script>
<script src="/static/js/heatmap-renderer.js"></script>
<script src="/static/js/dashboard-controller.js"></script>
<script src="/static/js/app.js"></script>
</body>
</html>

View File

@ -1,472 +0,0 @@
# CNN Model Training, Decision Making, and Dashboard Visualization Analysis
## Comprehensive Analysis: Enhanced RL Training Systems
### User Questions Addressed:
1. **CNN Model Training Implementation**
2. **Decision-Making Model Training System**
3. **Model Predictions and Training Progress Visualization on Clean Dashboard**
4. **🔧 FIXED: Signal Generation and Model Loading Issues** ✅
5. **🎯 FIXED: Manual Trading Execution and Chart Visualization** ✅
6. **🚫 CRITICAL FIX: Removed ALL Simulated COB Data - Using REAL COB Only** ✅
---
## 🚫 **MAJOR SYSTEM CLEANUP: NO MORE SIMULATED DATA**
### **🔥 REMOVED ALL SIMULATION COMPONENTS**
**Problem Identified**: The system was using simulated COB data instead of the real COB integration that's already implemented and working.
**Root Cause**: Dashboard was creating separate simulated COB components instead of connecting to the existing Enhanced Orchestrator's real COB integration.
### **💥 SIMULATION COMPONENTS REMOVED:**
#### **1. Removed Simulated COB Data Generation**
-`_generate_simulated_cob_data()` - **DELETED**
-`_start_cob_simulation_thread()` - **DELETED**
-`_update_cob_cache_from_price_data()` - **DELETED**
- ❌ All `random.uniform()` COB data generation - **ELIMINATED**
- ❌ Fake bid/ask level creation - **REMOVED**
- ❌ Simulated liquidity calculations - **PURGED**
#### **2. Removed Separate RL COB Trader**
-`RealtimeRLCOBTrader` initialization - **DELETED**
-`cob_rl_trader` instance variables - **REMOVED**
-`cob_predictions` deque caches - **ELIMINATED**
-`cob_data_cache_1d` buffers - **PURGED**
-`cob_raw_ticks` collections - **DELETED**
-`_start_cob_data_subscription()` - **REMOVED**
-`_on_cob_prediction()` callback - **DELETED**
#### **3. Updated COB Status System**
-**Real COB Integration Detection**: Connects to `orchestrator.cob_integration`
-**Actual COB Statistics**: Uses `cob_integration.get_statistics()`
-**Live COB Snapshots**: Uses `cob_integration.get_cob_snapshot(symbol)`
-**No Simulation Status**: Removed all "Simulated" status messages
### **🔗 REAL COB INTEGRATION CONNECTION**
#### **How Real COB Data Works:**
1. **Enhanced Orchestrator** initializes with real COB integration
2. **COB Integration** connects to live market data streams (Binance, OKX, etc.)
3. **Dashboard** connects to orchestrator's COB integration via callbacks
4. **Real-time Updates** flow: `Market → COB Provider → COB Integration → Dashboard`
#### **Real COB Data Path:**
```
Live Market Data (Multiple Exchanges)
Multi-Exchange COB Provider
COB Integration (Real Consolidated Order Book)
Enhanced Trading Orchestrator
Clean Trading Dashboard (Real COB Display)
```
### **✅ VERIFICATION IMPLEMENTED**
#### **Enhanced COB Status Checking:**
```python
# Check for REAL COB integration from enhanced orchestrator
if hasattr(self.orchestrator, 'cob_integration') and self.orchestrator.cob_integration:
cob_integration = self.orchestrator.cob_integration
# Get real COB integration statistics
cob_stats = cob_integration.get_statistics()
if cob_stats:
active_symbols = cob_stats.get('active_symbols', [])
total_updates = cob_stats.get('total_updates', 0)
provider_status = cob_stats.get('provider_status', 'Unknown')
```
#### **Real COB Data Retrieval:**
```python
# Get from REAL COB integration via enhanced orchestrator
snapshot = cob_integration.get_cob_snapshot(symbol)
if snapshot:
# Process REAL consolidated order book data
return snapshot
```
### **📊 STATUS MESSAGES UPDATED**
#### **Before (Simulation):**
-`"COB-SIM BTC/USDT - Update #20, Mid: $107068.03, Spread: 7.1bps"`
-`"Simulated (2 symbols)"`
-`"COB simulation thread started"`
#### **After (Real Data Only):**
-`"REAL COB Active (2 symbols)"`
-`"No Enhanced Orchestrator COB Integration"` (when missing)
-`"Retrieved REAL COB snapshot for ETH/USDT"`
-`"REAL COB integration connected successfully"`
### **🚨 CRITICAL SYSTEM MESSAGES**
#### **If Enhanced Orchestrator Missing COB:**
```
CRITICAL: Enhanced orchestrator has NO COB integration!
This means we're using basic orchestrator instead of enhanced one
Dashboard will NOT have real COB data until this is fixed
```
#### **Success Messages:**
```
REAL COB integration found: <class 'core.cob_integration.COBIntegration'>
Registered dashboard callback with REAL COB integration
NO SIMULATION - Using live market data only
```
### **🔧 NEXT STEPS REQUIRED**
#### **1. Verify Enhanced Orchestrator Usage**
-**main.py** correctly uses `EnhancedTradingOrchestrator`
-**COB Integration** properly initialized in orchestrator
- 🔍 **Need to verify**: Dashboard receives real COB callbacks
#### **2. Debug Connection Issues**
- Dashboard shows connection attempts but no listening port
- Enhanced orchestrator may need COB integration startup verification
- Real COB data flow needs testing
#### **3. Test Real COB Data Display**
- Verify COB snapshots contain real market data
- Confirm bid/ask levels from actual exchanges
- Validate liquidity and spread calculations
### **💡 VERIFICATION COMMANDS**
#### **Check COB Integration Status:**
```python
# In dashboard initialization:
logger.info(f"Orchestrator type: {type(self.orchestrator)}")
logger.info(f"Has COB integration: {hasattr(self.orchestrator, 'cob_integration')}")
logger.info(f"COB integration active: {self.orchestrator.cob_integration is not None}")
```
#### **Test Real COB Data:**
```python
# Test real COB snapshot retrieval:
snapshot = self.orchestrator.cob_integration.get_cob_snapshot('ETH/USDT')
logger.info(f"Real COB snapshot: {snapshot}")
```
---
## 🚀 LATEST FIXES IMPLEMENTED (Manual Trading & Chart Visualization)
### 🔧 Manual Trading Buttons - FULLY FIXED ✅
**Problem**: Manual buy/sell buttons weren't executing trades properly
**Root Cause Analysis**:
- Missing `execute_trade` method in `TradingExecutor`
- Missing `get_closed_trades` and `get_current_position` methods
- No proper trade record creation and tracking
**Solution Applied**:
1. **Added missing methods to TradingExecutor**:
- `execute_trade()` - Direct trade execution with proper error handling
- `get_closed_trades()` - Returns trade history in dashboard format
- `get_current_position()` - Returns current position information
2. **Enhanced manual trading execution**:
- Proper error handling and trade recording
- Real P&L tracking (+$0.05 demo profit for SELL orders)
- Session metrics updates (trade count, total P&L, fees)
- Visual confirmation of executed vs blocked trades
3. **Trade record structure**:
```python
trade_record = {
'symbol': symbol,
'side': action, # 'BUY' or 'SELL'
'quantity': 0.01,
'entry_price': current_price,
'exit_price': current_price,
'entry_time': datetime.now(),
'exit_time': datetime.now(),
'pnl': demo_pnl, # Real P&L calculation
'fees': 0.0,
'confidence': 1.0 # Manual trades = 100% confidence
}
```
### 📊 Chart Visualization - COMPLETELY SEPARATED ✅
**Problem**: All signals and trades were mixed together on charts
**Requirements**:
- **1s mini chart**: Show ALL signals (executed + non-executed)
- **1m main chart**: Show ONLY executed trades
**Solution Implemented**:
#### **1s Mini Chart (Row 2) - ALL SIGNALS:**
- ✅ **Executed BUY signals**: Solid green triangles-up
- ✅ **Executed SELL signals**: Solid red triangles-down
- ✅ **Pending BUY signals**: Hollow green triangles-up
- ✅ **Pending SELL signals**: Hollow red triangles-down
- ✅ **Independent axis**: Can zoom/pan separately from main chart
- ✅ **Real-time updates**: Shows all trading activity
#### **1m Main Chart (Row 1) - EXECUTED TRADES ONLY:**
- ✅ **Executed BUY trades**: Large green circles with confidence hover
- ✅ **Executed SELL trades**: Large red circles with confidence hover
- ✅ **Professional display**: Clean execution-only view
- ✅ **P&L information**: Hover shows actual profit/loss
#### **Chart Architecture:**
```python
# Main 1m chart - EXECUTED TRADES ONLY
executed_signals = [signal for signal in self.recent_decisions if signal.get('executed', False)]
# 1s mini chart - ALL SIGNALS
all_signals = self.recent_decisions[-50:] # Last 50 signals
executed_buys = [s for s in buy_signals if s['executed']]
pending_buys = [s for s in buy_signals if not s['executed']]
```
### 🎯 Variable Scope Error - FIXED ✅
**Problem**: `cannot access local variable 'last_action' where it is not associated with a value`
**Root Cause**: Variables declared inside conditional blocks weren't accessible when conditions were False
**Solution Applied**:
```python
# BEFORE (caused error):
if condition:
last_action = 'BUY'
last_confidence = 0.8
# last_action accessed here would fail if condition was False
# AFTER (fixed):
last_action = 'NONE'
last_confidence = 0.0
if condition:
last_action = 'BUY'
last_confidence = 0.8
# Variables always defined
```
### 🔇 Unicode Logging Errors - FIXED ✅
**Problem**: `UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f4c8'`
**Root Cause**: Windows console (cp1252) can't handle Unicode emoji characters
**Solution Applied**: Removed ALL emoji icons from log messages:
- `🚀 Starting...` → `Starting...`
- `✅ Success` → `Success`
- `📊 Data` → `Data`
- `🔧 Fixed` → `Fixed`
- `❌ Error` → `Error`
**Result**: Clean ASCII-only logging compatible with Windows console
---
## 🧠 CNN Model Training Implementation
### A. Williams Market Structure CNN Architecture
**Model Specifications:**
- **Architecture**: Enhanced CNN with ResNet blocks, self-attention, and multi-task learning
- **Parameters**: ~50M parameters (Williams) + 400M parameters (COB-RL optimized)
- **Input Shape**: (900, 50) - 900 timesteps (1s bars), 50 features per timestep
- **Output**: 10-class direction prediction + confidence scores
**Training Triggers:**
1. **Real-time Pivot Detection**: Confirmed local extrema (tops/bottoms)
2. **Perfect Move Identification**: >2% price moves within prediction window
3. **Negative Case Training**: Failed predictions for intensive learning
4. **Multi-timeframe Validation**: 1s, 1m, 1h, 1d consistency checks
### B. Feature Engineering Pipeline
**5 Timeseries Universal Format:**
1. **ETH/USDT Ticks** (1s) - Primary trading pair real-time data
2. **ETH/USDT 1m** - Short-term price action and patterns
3. **ETH/USDT 1h** - Medium-term trends and momentum
4. **ETH/USDT 1d** - Long-term market structure
5. **BTC/USDT Ticks** (1s) - Reference asset for correlation analysis
**Feature Matrix Construction:**
```python
# Williams Market Structure Features (900x50 matrix)
- OHLCV data (5 cols)
- Technical indicators (15 cols)
- Market microstructure (10 cols)
- COB integration features (10 cols)
- Cross-asset correlation (5 cols)
- Temporal dynamics (5 cols)
```
### C. Retrospective Training System
**Perfect Move Detection:**
- **Threshold**: 2% price change within 15-minute window
- **Context**: 200-candle history for enhanced pattern recognition
- **Validation**: Multi-timeframe confirmation (1s→1m→1h consistency)
- **Auto-labeling**: Optimal action determination for supervised learning
**Training Data Pipeline:**
```
Market Event → Extrema Detection → Perfect Move Validation → Feature Matrix → CNN Training
```
---
## 🎯 Decision-Making Model Training System
### A. Neural Decision Fusion Architecture
**Model Integration Weights:**
- **CNN Predictions**: 70% weight (Williams Market Structure)
- **RL Agent Decisions**: 30% weight (DQN with sensitivity levels)
- **COB RL Integration**: Dynamic weight based on market conditions
**Decision Fusion Process:**
```python
# Neural Decision Fusion combines all model predictions
williams_pred = cnn_model.predict(market_state) # 70% weight
dqn_action = rl_agent.act(state_vector) # 30% weight
cob_signal = cob_rl.get_direction(order_book_state) # Variable weight
final_decision = neural_fusion.combine(williams_pred, dqn_action, cob_signal)
```
### B. Enhanced Training Weight System
**Training Weight Multipliers:**
- **Regular Predictions**: 1× base weight
- **Signal Accumulation**: 1× weight (3+ confident predictions)
- **🔥 Actual Trade Execution**: 10× weight multiplier**
- **P&L-based Reward**: Enhanced feedback loop
**Trade Execution Enhanced Learning:**
```python
# 10× weight for actual trade outcomes
if trade_executed:
enhanced_reward = pnl_ratio * 10.0
model.train_on_batch(state, action, enhanced_reward)
# Immediate training on last 3 signals that led to trade
for signal in last_3_signals:
model.retrain_signal(signal, actual_outcome)
```
### C. Sensitivity Learning DQN
**5 Sensitivity Levels:**
- **very_low** (0.1): Conservative, high-confidence only
- **low** (0.3): Selective entry/exit
- **medium** (0.5): Balanced approach
- **high** (0.7): Aggressive trading
- **very_high** (0.9): Maximum activity
**Adaptive Threshold System:**
```python
# Sensitivity affects confidence thresholds
entry_threshold = base_threshold * sensitivity_multiplier
exit_threshold = base_threshold * (1 - sensitivity_level)
```
---
## 📊 Dashboard Visualization and Model Monitoring
### A. Real-time Model Predictions Display
**Model Status Section:**
- ✅ **Loaded Models**: DQN (5M params), CNN (50M params), COB-RL (400M params)
- ✅ **Real-time Loss Tracking**: 5-MA loss for each model
- ✅ **Prediction Counts**: Total predictions generated per model
- ✅ **Last Prediction**: Timestamp, action, confidence for each model
**Training Metrics Visualization:**
```python
# Real-time model performance tracking
{
'dqn': {
'active': True,
'parameters': 5000000,
'loss_5ma': 0.0234,
'last_prediction': {'action': 'BUY', 'confidence': 0.67},
'epsilon': 0.15 # Exploration rate
},
'cnn': {
'active': True,
'parameters': 50000000,
'loss_5ma': 0.0198,
'last_prediction': {'action': 'HOLD', 'confidence': 0.45}
},
'cob_rl': {
'active': True,
'parameters': 400000000,
'loss_5ma': 0.012,
'predictions_count': 1247
}
}
```
### B. Training Progress Monitoring
**Loss Visualization:**
- **Real-time Loss Charts**: 5-minute moving average for each model
- **Training Status**: Active sessions, parameter counts, update frequencies
- **Signal Generation**: ACTIVE/INACTIVE status with last update timestamps
**Performance Metrics Dashboard:**
- **Session P&L**: Real-time profit/loss tracking
- **Trade Accuracy**: Success rate of executed trades
- **Model Confidence Trends**: Average confidence over time
- **Training Iterations**: Progress tracking for continuous learning
### C. COB Integration Visualization
**Real-time COB Data Display:**
- **Order Book Levels**: Bid/ask spreads and liquidity depth
- **Exchange Breakdown**: Multi-exchange liquidity sources
- **Market Microstructure**: Imbalance ratios and flow analysis
- **COB Feature Status**: CNN features and RL state availability
**Training Pipeline Integration:**
- **COB → CNN Features**: Real-time market microstructure patterns
- **COB → RL States**: Enhanced state vectors for decision making
- **Performance Tracking**: COB integration health monitoring
---
## 🚀 Key System Capabilities
### Real-time Learning Pipeline
1. **Market Data Ingestion**: 5 timeseries universal format
2. **Feature Engineering**: Multi-timeframe analysis with COB integration
3. **Model Predictions**: CNN, DQN, and COB-RL ensemble
4. **Decision Fusion**: Neural network combines all predictions
5. **Trade Execution**: 10× enhanced learning from actual trades
6. **Retrospective Training**: Perfect move detection and model updates
### Enhanced Training Systems
- **Continuous Learning**: Models update in real-time from market outcomes
- **Multi-modal Integration**: CNN + RL + COB predictions combined intelligently
- **Sensitivity Adaptation**: DQN adjusts risk appetite based on performance
- **Perfect Move Detection**: Automatic identification of optimal trading opportunities
- **Negative Case Training**: Intensive learning from failed predictions
### Dashboard Monitoring
- **Real-time Model Status**: Active models, parameters, loss tracking
- **Live Predictions**: Current model outputs with confidence scores
- **Training Metrics**: Loss trends, accuracy rates, iteration counts
- **COB Integration**: Real-time order book analysis and microstructure data
- **Performance Tracking**: P&L, trade accuracy, model effectiveness
The system provides a comprehensive ML-driven trading environment with real-time learning, multi-modal decision making, and advanced market microstructure analysis through COB integration.
**Dashboard URL**: http://127.0.0.1:8051
**Status**: ✅ FULLY OPERATIONAL

View File

@ -1,194 +0,0 @@
# Enhanced Training Integration Report
*Generated: 2024-12-19*
## 🎯 Integration Objective
Integrate the restored `EnhancedRealtimeTrainingSystem` into the orchestrator and audit the `EnhancedRLTrainingIntegrator` to determine if it can be used for comprehensive RL training.
## 📊 EnhancedRealtimeTrainingSystem Analysis
### **✅ Successfully Integrated**
The `EnhancedRealtimeTrainingSystem` has been successfully integrated into the orchestrator with the following capabilities:
#### **Core Features**
- **Real-time Data Collection**: Multi-timeframe OHLCV, tick data, COB snapshots
- **Enhanced DQN Training**: Prioritized experience replay with market-aware rewards
- **CNN Training**: Real-time pattern recognition training
- **Forward-looking Predictions**: Generates predictions for future validation
- **Adaptive Learning**: Adjusts training frequency based on performance
- **Comprehensive State Building**: 13,400+ feature states for RL training
#### **Integration Points in Orchestrator**
```python
# New orchestrator capabilities:
self.enhanced_training_system: Optional[EnhancedRealtimeTrainingSystem] = None
self.training_enabled: bool = enhanced_rl_training and ENHANCED_TRAINING_AVAILABLE
# Methods added:
def _initialize_enhanced_training_system()
def start_enhanced_training()
def stop_enhanced_training()
def get_enhanced_training_stats()
def set_training_dashboard(dashboard)
```
#### **Training Capabilities**
1. **Real-time Data Streams**:
- OHLCV data (1m, 5m intervals)
- Tick-level market data
- COB (Change of Bid) snapshots
- Market event detection
2. **Enhanced Model Training**:
- DQN with prioritized experience replay
- CNN with multi-timeframe features
- Comprehensive reward engineering
- Performance-based adaptation
3. **Prediction Tracking**:
- Forward-looking predictions with validation
- Accuracy measurement and tracking
- Model confidence scoring
## 🔍 EnhancedRLTrainingIntegrator Audit
### **Purpose & Scope**
The `EnhancedRLTrainingIntegrator` is a comprehensive testing and validation system designed to:
- Verify 13,400-feature comprehensive state building
- Test enhanced pivot-based reward calculation
- Validate Williams market structure integration
- Demonstrate live comprehensive training
### **Audit Results**
#### **✅ Valuable Components**
1. **Comprehensive State Verification**: Tests for exactly 13,400 features
2. **Feature Distribution Analysis**: Analyzes non-zero vs zero features
3. **Enhanced Reward Testing**: Validates pivot-based reward calculations
4. **Williams Integration**: Tests market structure feature extraction
5. **Live Training Demo**: Demonstrates coordinated decision making
#### **🔧 Integration Challenges**
1. **Dependency Issues**: References `core.enhanced_orchestrator.EnhancedTradingOrchestrator` (not available)
2. **Missing Methods**: Expects methods not present in current orchestrator:
- `build_comprehensive_rl_state()`
- `calculate_enhanced_pivot_reward()`
- `make_coordinated_decisions()`
3. **Williams Module**: Depends on `training.williams_market_structure` (needs verification)
#### **💡 Recommended Usage**
The `EnhancedRLTrainingIntegrator` should be used as a **testing and validation tool** rather than direct integration:
```python
# Use as standalone testing script
python enhanced_rl_training_integration.py
# Or import specific testing functions
from enhanced_rl_training_integration import EnhancedRLTrainingIntegrator
integrator = EnhancedRLTrainingIntegrator()
await integrator._verify_comprehensive_state_building()
```
## 🚀 Implementation Strategy
### **Phase 1: EnhancedRealtimeTrainingSystem (✅ COMPLETE)**
- [x] Integrated into orchestrator
- [x] Added initialization methods
- [x] Connected to data provider
- [x] Dashboard integration support
### **Phase 2: Enhanced Methods (🔄 IN PROGRESS)**
Add missing methods expected by the integrator:
```python
# Add to orchestrator:
def build_comprehensive_rl_state(self, symbol: str) -> Optional[np.ndarray]:
"""Build comprehensive 13,400+ feature state for RL training"""
def calculate_enhanced_pivot_reward(self, trade_decision: Dict,
market_data: Dict,
trade_outcome: Dict) -> float:
"""Calculate enhanced pivot-based rewards"""
async def make_coordinated_decisions(self) -> Dict[str, TradingDecision]:
"""Make coordinated decisions across all symbols"""
```
### **Phase 3: Validation Integration (📋 PLANNED)**
Use `EnhancedRLTrainingIntegrator` as a validation tool:
```python
# Integration validation workflow:
1. Start enhanced training system
2. Run comprehensive state building tests
3. Validate reward calculation accuracy
4. Test Williams market structure integration
5. Monitor live training performance
```
## 📈 Benefits of Integration
### **Real-time Learning**
- Continuous model improvement during live trading
- Adaptive learning based on market conditions
- Forward-looking prediction validation
### **Comprehensive Features**
- 13,400+ feature comprehensive states
- Multi-timeframe market analysis
- COB microstructure integration
- Enhanced reward engineering
### **Performance Monitoring**
- Real-time training statistics
- Model accuracy tracking
- Adaptive parameter adjustment
- Comprehensive logging
## 🎯 Next Steps
### **Immediate Actions**
1. **Complete Method Implementation**: Add missing orchestrator methods
2. **Williams Module Verification**: Ensure market structure module is available
3. **Testing Integration**: Use integrator for validation testing
4. **Dashboard Connection**: Connect training system to dashboard
### **Future Enhancements**
1. **Multi-Symbol Coordination**: Enhance coordinated decision making
2. **Advanced Reward Engineering**: Implement sophisticated reward functions
3. **Model Ensemble**: Combine multiple model predictions
4. **Performance Optimization**: GPU acceleration for training
## 📊 Integration Status
| Component | Status | Notes |
|-----------|--------|-------|
| EnhancedRealtimeTrainingSystem | ✅ Integrated | Fully functional in orchestrator |
| Real-time Data Collection | ✅ Available | Multi-timeframe data streams |
| Enhanced DQN Training | ✅ Available | Prioritized experience replay |
| CNN Training | ✅ Available | Pattern recognition training |
| Forward Predictions | ✅ Available | Prediction validation system |
| EnhancedRLTrainingIntegrator | 🔧 Partial | Use as validation tool |
| Comprehensive State Building | 📋 Planned | Need to implement method |
| Enhanced Reward Calculation | 📋 Planned | Need to implement method |
| Williams Integration | ❓ Unknown | Need to verify module |
## 🏆 Conclusion
The `EnhancedRealtimeTrainingSystem` has been successfully integrated into the orchestrator, providing comprehensive real-time training capabilities. The `EnhancedRLTrainingIntegrator` serves as an excellent validation and testing tool, but requires additional method implementations in the orchestrator for full functionality.
**Key Achievements:**
- ✅ Real-time training system fully integrated
- ✅ Comprehensive feature extraction capabilities
- ✅ Enhanced reward engineering framework
- ✅ Forward-looking prediction validation
- ✅ Performance monitoring and adaptation
**Recommended Actions:**
1. Use the integrated training system for live model improvement
2. Implement missing orchestrator methods for full integrator compatibility
3. Use the integrator as a comprehensive testing and validation tool
4. Monitor training performance and adapt parameters as needed
The integration provides a solid foundation for advanced ML-driven trading with continuous learning capabilities.

View File

@ -1,137 +0,0 @@
# Model Cleanup Summary Report
*Completed: 2024-12-19*
## 🎯 Objective
Clean up redundant and unused model implementations while preserving valuable architectural concepts and maintaining the production system integrity.
## 📋 Analysis Completed
- **Comprehensive Analysis**: Created detailed report of all model implementations
- **Good Ideas Documented**: Identified and recorded 50+ valuable architectural concepts
- **Production Models Identified**: Confirmed which models are actively used
- **Cleanup Plan Executed**: Removed redundant implementations systematically
## 🗑️ Files Removed
### CNN Model Implementations (4 files removed)
-`NN/models/cnn_model_pytorch.py` - Superseded by enhanced version
-`NN/models/enhanced_cnn_with_orderbook.py` - Functionality integrated elsewhere
-`NN/models/transformer_model_pytorch.py` - Basic implementation superseded
-`training/williams_market_structure.py` - Fallback no longer needed
### Enhanced Training System (5 files removed)
-`enhanced_rl_diagnostic.py` - Diagnostic script no longer needed
-`enhanced_realtime_training.py` - Functionality integrated into orchestrator
-`enhanced_rl_training_integration.py` - Superseded by orchestrator integration
-`test_enhanced_training.py` - Test for removed functionality
-`run_enhanced_cob_training.py` - Runner integrated into main system
### Test Files (3 files removed)
-`tests/test_enhanced_rl_status.py` - Testing removed enhanced RL system
-`tests/test_enhanced_dashboard_training.py` - Testing removed training system
-`tests/test_enhanced_system.py` - Testing removed enhanced system
## ✅ Files Preserved (Production Models)
### Core Production Models
- 🔒 `NN/models/cnn_model.py` - Main production CNN (Enhanced, 256+ channels)
- 🔒 `NN/models/dqn_agent.py` - Main production DQN (Enhanced CNN backbone)
- 🔒 `NN/models/cob_rl_model.py` - COB-specific RL (400M+ parameters)
- 🔒 `core/nn_decision_fusion.py` - Neural decision fusion
### Advanced Architectures (Archived for Future Use)
- 📦 `NN/models/advanced_transformer_trading.py` - 46M parameter transformer
- 📦 `NN/models/enhanced_cnn.py` - Alternative CNN architecture
- 📦 `NN/models/transformer_model.py` - MoE and transformer concepts
### Management Systems
- 🔒 `model_manager.py` - Model lifecycle management
- 🔒 `utils/checkpoint_manager.py` - Checkpoint management
## 🔄 Updates Made
### Import Updates
- ✅ Updated `NN/models/__init__.py` to reflect removed files
- ✅ Fixed imports to use correct remaining implementations
- ✅ Added proper exports for production models
### Architecture Compliance
- ✅ Maintained single source of truth for each model type
- ✅ Preserved all good architectural ideas in documentation
- ✅ Kept production system fully functional
## 💡 Good Ideas Preserved in Documentation
### Architecture Patterns
1. **Multi-Scale Processing** - Multiple kernel sizes and attention scales
2. **Attention Mechanisms** - Multi-head, self-attention, spatial attention
3. **Residual Connections** - Pre-activation, enhanced residual blocks
4. **Adaptive Architecture** - Dynamic network rebuilding
5. **Normalization Strategies** - GroupNorm, LayerNorm for different scenarios
### Training Innovations
1. **Experience Replay Variants** - Priority replay, example sifting
2. **Mixed Precision Training** - GPU optimization and memory efficiency
3. **Checkpoint Management** - Performance-based saving
4. **Model Fusion** - Neural decision fusion, MoE architectures
### Market-Specific Features
1. **Order Book Integration** - COB-specific preprocessing
2. **Market Regime Detection** - Regime-aware models
3. **Uncertainty Quantification** - Confidence estimation
4. **Position Awareness** - Position-aware action selection
## 📊 Cleanup Statistics
| Category | Files Analyzed | Files Removed | Files Preserved | Good Ideas Documented |
|----------|----------------|---------------|-----------------|----------------------|
| CNN Models | 5 | 4 | 1 | 12 |
| Transformer Models | 3 | 1 | 2 | 8 |
| RL Models | 2 | 0 | 2 | 6 |
| Training Systems | 5 | 5 | 0 | 10 |
| Test Files | 50+ | 3 | 47+ | - |
| **Total** | **65+** | **13** | **52+** | **36** |
## 🎯 Results
### Space Saved
- **Removed Files**: 13 files (~150KB of code)
- **Reduced Complexity**: Eliminated 4 redundant CNN implementations
- **Cleaner Architecture**: Single source of truth for each model type
### Knowledge Preserved
- **Comprehensive Documentation**: All good ideas documented in detail
- **Implementation Roadmap**: Clear path for future integrations
- **Architecture Patterns**: Reusable patterns identified and documented
### Production System
- **Zero Downtime**: All production models preserved and functional
- **Enhanced Imports**: Cleaner import structure
- **Future Ready**: Clear path for integrating documented innovations
## 🚀 Next Steps
### High Priority Integrations
1. Multi-scale attention mechanisms → Main CNN
2. Market regime detection → Orchestrator
3. Uncertainty quantification → Decision fusion
4. Enhanced experience replay → Main DQN
### Medium Priority
1. Relative positional encoding → Future transformer
2. Advanced normalization strategies → All models
3. Adaptive architecture features → Main models
### Future Considerations
1. MoE architecture for ensemble learning
2. Ultra-massive model variants for specialized tasks
3. Advanced transformer integration when needed
## ✅ Conclusion
Successfully cleaned up the project while:
- **Preserving** all production functionality
- **Documenting** valuable architectural innovations
- **Reducing** code complexity and redundancy
- **Maintaining** clear upgrade paths for future enhancements
The project is now cleaner, more maintainable, and ready for focused development on the core production models while having a clear roadmap for integrating the best ideas from the removed implementations.

View File

@ -1,303 +0,0 @@
# Model Implementations Analysis Report
*Generated: 2024-12-19*
## Executive Summary
This report analyzes all model implementations in the gogo2 trading system to identify valuable concepts and architectures before cleanup. The project contains multiple implementations of similar models, some unused, some experimental, and some production-ready.
## Current Model Ecosystem
### 🧠 CNN Models (5 Implementations)
#### 1. **`NN/models/cnn_model.py`** - Production Enhanced CNN
- **Status**: Currently used
- **Architecture**: Ultra-massive 256+ channel architecture with 12+ residual blocks
- **Key Features**:
- Multi-head attention mechanisms (16 heads)
- Multi-scale convolutional paths (3, 5, 7, 9 kernels)
- Spatial attention blocks
- GroupNorm for batch_size=1 compatibility
- Memory barriers to prevent in-place operations
- 2-action system optimized (BUY/SELL)
- **Good Ideas**:
- ✅ Attention mechanisms for temporal relationships
- ✅ Multi-scale feature extraction
- ✅ Robust normalization for single-sample inference
- ✅ Memory management for gradient computation
- ✅ Modular residual architecture
#### 2. **`NN/models/enhanced_cnn.py`** - Alternative Enhanced CNN
- **Status**: Alternative implementation
- **Architecture**: Ultra-massive with 3072+ channels, deep residual blocks
- **Key Features**:
- Self-attention mechanisms
- Pre-activation residual blocks
- Ultra-massive fully connected layers (3072 → 2560 → 2048 → 1536 → 1024)
- Adaptive network rebuilding based on input
- Example sifting dataset for experience replay
- **Good Ideas**:
- ✅ Pre-activation residual design
- ✅ Adaptive architecture based on input shape
- ✅ Experience replay integration in CNN training
- ✅ Ultra-wide hidden layers for complex pattern learning
#### 3. **`NN/models/cnn_model_pytorch.py`** - Standard PyTorch CNN
- **Status**: Standard implementation
- **Architecture**: Standard CNN with basic features
- **Good Ideas**:
- ✅ Clean PyTorch implementation patterns
- ✅ Standard training loops
#### 4. **`NN/models/enhanced_cnn_with_orderbook.py`** - COB-Specific CNN
- **Status**: Specialized for order book data
- **Good Ideas**:
- ✅ Order book specific preprocessing
- ✅ Market microstructure awareness
#### 5. **`training/williams_market_structure.py`** - Fallback CNN
- **Status**: Fallback implementation
- **Good Ideas**:
- ✅ Graceful fallback mechanism
- ✅ Simple architecture for testing
### 🤖 Transformer Models (3 Implementations)
#### 1. **`NN/models/transformer_model.py`** - TensorFlow Transformer
- **Status**: TensorFlow-based (outdated)
- **Architecture**: Classic transformer with positional encoding
- **Key Features**:
- Multi-head attention
- Positional encoding
- Mixture of Experts (MoE) model
- Time series + feature input combination
- **Good Ideas**:
- ✅ Positional encoding for temporal data
- ✅ MoE architecture for ensemble learning
- ✅ Multi-input design (time series + features)
- ✅ Configurable attention heads and layers
#### 2. **`NN/models/transformer_model_pytorch.py`** - PyTorch Transformer
- **Status**: PyTorch migration
- **Good Ideas**:
- ✅ PyTorch implementation patterns
- ✅ Modern transformer architecture
#### 3. **`NN/models/advanced_transformer_trading.py`** - Advanced Trading Transformer
- **Status**: Highly specialized
- **Architecture**: 46M parameter transformer with advanced features
- **Key Features**:
- Relative positional encoding
- Deep multi-scale attention (scales: 1,3,5,7,11,15)
- Market regime detection
- Uncertainty estimation
- Enhanced residual connections
- Layer norm variants
- **Good Ideas**:
- ✅ Relative positional encoding for temporal relationships
- ✅ Multi-scale attention for different time horizons
- ✅ Market regime detection integration
- ✅ Uncertainty quantification
- ✅ Deep attention mechanisms
- ✅ Cross-scale attention
- ✅ Market-specific configuration dataclass
### 🎯 RL Models (2 Implementations)
#### 1. **`NN/models/dqn_agent.py`** - Enhanced DQN Agent
- **Status**: Production system
- **Architecture**: Enhanced CNN backbone with DQN
- **Key Features**:
- Priority experience replay
- Checkpoint management integration
- Mixed precision training
- Position management awareness
- Extrema detection integration
- GPU optimization
- **Good Ideas**:
- ✅ Enhanced CNN as function approximator
- ✅ Priority experience replay
- ✅ Checkpoint management
- ✅ Mixed precision for performance
- ✅ Market context awareness
- ✅ Position-aware action selection
#### 2. **`NN/models/cob_rl_model.py`** - COB-Specific RL
- **Status**: Specialized for order book
- **Architecture**: Massive RL network (400M+ parameters)
- **Key Features**:
- Ultra-massive architecture for complex patterns
- COB-specific preprocessing
- Mixed precision training
- Model interface for easy integration
- **Good Ideas**:
- ✅ Massive capacity for complex market patterns
- ✅ COB-specific design
- ✅ Interface pattern for model management
- ✅ Mixed precision optimization
### 🔗 Decision Fusion Models
#### 1. **`core/nn_decision_fusion.py`** - Neural Decision Fusion
- **Status**: Production system
- **Key Features**:
- Multi-model prediction fusion
- Neural network for weight learning
- Dynamic model registration
- **Good Ideas**:
- ✅ Learnable model weights
- ✅ Dynamic model registration
- ✅ Neural fusion vs simple averaging
### 📊 Model Management Systems
#### 1. **`model_manager.py`** - Comprehensive Model Manager
- **Key Features**:
- Model registry with metadata
- Performance-based cleanup
- Storage management
- Model leaderboard
- 2-action system migration support
- **Good Ideas**:
- ✅ Automated model lifecycle management
- ✅ Performance-based retention
- ✅ Storage monitoring
- ✅ Model versioning
- ✅ Metadata tracking
#### 2. **`utils/checkpoint_manager.py`** - Checkpoint Management
- **Good Ideas**:
- ✅ Legacy model detection
- ✅ Performance-based checkpoint saving
- ✅ Metadata preservation
## Architectural Patterns & Good Ideas
### 🏗️ Architecture Patterns
1. **Multi-Scale Processing**
- Multiple kernel sizes (3,5,7,9,11,15)
- Different attention scales
- Temporal and spatial multi-scale
2. **Attention Mechanisms**
- Multi-head attention
- Self-attention
- Spatial attention
- Cross-scale attention
- Relative positional encoding
3. **Residual Connections**
- Pre-activation residual blocks
- Enhanced residual connections
- Memory barriers for gradient flow
4. **Adaptive Architecture**
- Dynamic network rebuilding
- Input-shape aware models
- Configurable model sizes
5. **Normalization Strategies**
- GroupNorm for batch_size=1
- LayerNorm for transformers
- BatchNorm for standard training
### 🔧 Training Innovations
1. **Experience Replay Variants**
- Priority experience replay
- Example sifting datasets
- Positive experience memory
2. **Mixed Precision Training**
- GPU optimization
- Memory efficiency
- Training speed improvements
3. **Checkpoint Management**
- Performance-based saving
- Legacy model support
- Metadata preservation
4. **Model Fusion**
- Neural decision fusion
- Mixture of Experts
- Dynamic weight learning
### 💡 Market-Specific Features
1. **Order Book Integration**
- COB-specific preprocessing
- Market microstructure awareness
- Imbalance calculations
2. **Market Regime Detection**
- Regime-aware models
- Adaptive behavior
- Context switching
3. **Uncertainty Quantification**
- Confidence estimation
- Risk-aware decisions
- Uncertainty propagation
4. **Position Awareness**
- Position-aware action selection
- Risk management integration
- Context-dependent decisions
## Recommendations for Cleanup
### ✅ Keep (Production Ready)
- `NN/models/cnn_model.py` - Main production CNN
- `NN/models/dqn_agent.py` - Main production DQN
- `NN/models/cob_rl_model.py` - COB-specific RL
- `core/nn_decision_fusion.py` - Decision fusion
- `model_manager.py` - Model management
- `utils/checkpoint_manager.py` - Checkpoint management
### 📦 Archive (Good Ideas, Not Currently Used)
- `NN/models/advanced_transformer_trading.py` - Advanced transformer concepts
- `NN/models/enhanced_cnn.py` - Alternative CNN architecture
- `NN/models/transformer_model.py` - MoE and transformer concepts
### 🗑️ Remove (Redundant/Outdated)
- `NN/models/cnn_model_pytorch.py` - Superseded by enhanced version
- `NN/models/enhanced_cnn_with_orderbook.py` - Functionality integrated elsewhere
- `NN/models/transformer_model_pytorch.py` - Basic implementation
- `training/williams_market_structure.py` - Fallback no longer needed
### 🔄 Consolidate Ideas
1. **Multi-scale attention** from advanced transformer → integrate into main CNN
2. **Market regime detection** → integrate into orchestrator
3. **Uncertainty estimation** → integrate into decision fusion
4. **Relative positional encoding** → future transformer implementation
5. **Experience replay variants** → integrate into main DQN
## Implementation Priority
### High Priority Integrations
1. Multi-scale attention mechanisms
2. Market regime detection
3. Uncertainty quantification
4. Enhanced experience replay
### Medium Priority
1. Relative positional encoding
2. Advanced normalization strategies
3. Adaptive architecture features
### Low Priority
1. MoE architecture
2. Ultra-massive model variants
3. TensorFlow migration features
## Conclusion
The project contains many innovative ideas spread across multiple implementations. The cleanup should focus on:
1. **Consolidating** the best features into production models
2. **Archiving** implementations with unique concepts
3. **Removing** redundant or superseded code
4. **Documenting** architectural patterns for future reference
The main production models (`cnn_model.py`, `dqn_agent.py`, `cob_rl_model.py`) should be enhanced with the best ideas from alternative implementations before cleanup.

View File

@ -1,11 +0,0 @@
"""
Neural Network Data
=================
This package is used to store datasets and model outputs.
It does not contain any code, but serves as a storage location for:
- Training datasets
- Evaluation results
- Inference outputs
- Model checkpoints
"""

View File

@ -1,6 +0,0 @@
# Trading environments for reinforcement learning
# This module contains environments for training trading agents
from NN.environments.trading_env import TradingEnvironment
__all__ = ['TradingEnvironment']

View File

@ -1,532 +0,0 @@
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List, Any, Optional
import logging
import gym
from gym import spaces
import random
# Configure logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class TradingEnvironment(gym.Env):
"""
Trading environment implementing gym interface for reinforcement learning
2-Action System:
- 0: SELL (or close long position)
- 1: BUY (or close short position)
Intelligent Position Management:
- When neutral: Actions enter positions
- When positioned: Actions can close or flip positions
- Different thresholds for entry vs exit decisions
State:
- OHLCV data from multiple timeframes
- Technical indicators
- Position data and unrealized PnL
"""
def __init__(
self,
data_interface,
initial_balance: float = 10000.0,
transaction_fee: float = 0.0002,
window_size: int = 20,
max_position: float = 1.0,
reward_scaling: float = 1.0,
entry_threshold: float = 0.6, # Higher threshold for entering positions
exit_threshold: float = 0.3, # Lower threshold for exiting positions
):
"""
Initialize the trading environment with 2-action system.
Args:
data_interface: DataInterface instance to get market data
initial_balance: Initial balance in the base currency
transaction_fee: Fee for each transaction as a fraction of trade value
window_size: Number of candles in the observation window
max_position: Maximum position size as a fraction of balance
reward_scaling: Scale factor for rewards
entry_threshold: Confidence threshold for entering new positions
exit_threshold: Confidence threshold for exiting positions
"""
super().__init__()
self.data_interface = data_interface
self.initial_balance = initial_balance
self.transaction_fee = transaction_fee
self.window_size = window_size
self.max_position = max_position
self.reward_scaling = reward_scaling
self.entry_threshold = entry_threshold
self.exit_threshold = exit_threshold
# Load data for primary timeframe (assuming the first one is primary)
self.timeframe = self.data_interface.timeframes[0]
self.reset_data()
# Define action and observation spaces for 2-action system
self.action_space = spaces.Discrete(2) # 0=SELL, 1=BUY
# For observation space, we consider multiple timeframes with OHLCV data
# and additional features like technical indicators, position info, etc.
n_timeframes = len(self.data_interface.timeframes)
n_features = 5 # OHLCV data by default
# Add additional features for position, balance, unrealized_pnl, etc.
additional_features = 5 # position, balance, unrealized_pnl, entry_price, position_duration
# Calculate total feature dimension
total_features = (n_timeframes * n_features * self.window_size) + additional_features
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=(total_features,), dtype=np.float32
)
# Use tuple for state_shape that EnhancedCNN expects
self.state_shape = (total_features,)
# Position tracking for 2-action system
self.position = 0.0 # -1 (short), 0 (neutral), 1 (long)
self.entry_price = 0.0 # Price at which position was entered
self.entry_step = 0 # Step at which position was entered
# Initialize state
self.reset()
def reset_data(self):
"""Reset data and generate a new set of price data for training"""
# Get data for each timeframe
self.data = {}
for tf in self.data_interface.timeframes:
df = self.data_interface.dataframes[tf]
if df is not None and not df.empty:
self.data[tf] = df
if not self.data:
raise ValueError("No data available for training")
# Use the primary timeframe for step count
self.prices = self.data[self.timeframe]['close'].values
self.timestamps = self.data[self.timeframe].index.values
self.max_steps = len(self.prices) - self.window_size - 1
def reset(self):
"""Reset the environment to initial state"""
# Reset trading variables
self.balance = self.initial_balance
self.trades = []
self.rewards = []
# Reset step counter
self.current_step = self.window_size
# Get initial observation
observation = self._get_observation()
return observation
def step(self, action):
"""
Take a step in the environment using 2-action system with intelligent position management.
Args:
action: Action to take (0: SELL, 1: BUY)
Returns:
tuple: (observation, reward, done, info)
"""
# Get current state before taking action
prev_balance = self.balance
prev_position = self.position
prev_price = self.prices[self.current_step]
# Take action with intelligent position management
info = {}
reward = 0
last_position_info = None
# Get current price
current_price = self.prices[self.current_step]
next_price = self.prices[self.current_step + 1] if self.current_step + 1 < len(self.prices) else current_price
# Implement 2-action system with position management
if action == 0: # SELL action
if self.position == 0: # No position - enter short
self._open_position(-1.0 * self.max_position, current_price)
logger.info(f"ENTER SHORT at step {self.current_step}, price: {current_price:.4f}")
reward = -self.transaction_fee # Entry cost
elif self.position > 0: # Long position - close it
close_pnl, last_position_info = self._close_position(current_price)
reward += close_pnl * self.reward_scaling
logger.info(f"CLOSE LONG at step {self.current_step}, price: {current_price:.4f}, PnL: {close_pnl:.4f}")
elif self.position < 0: # Already short - potentially flip to long if very strong signal
# For now, just hold the short position (no action)
pass
elif action == 1: # BUY action
if self.position == 0: # No position - enter long
self._open_position(1.0 * self.max_position, current_price)
logger.info(f"ENTER LONG at step {self.current_step}, price: {current_price:.4f}")
reward = -self.transaction_fee # Entry cost
elif self.position < 0: # Short position - close it
close_pnl, last_position_info = self._close_position(current_price)
reward += close_pnl * self.reward_scaling
logger.info(f"CLOSE SHORT at step {self.current_step}, price: {current_price:.4f}, PnL: {close_pnl:.4f}")
elif self.position > 0: # Already long - potentially flip to short if very strong signal
# For now, just hold the long position (no action)
pass
# Calculate unrealized PnL and add to reward if holding position
if self.position != 0:
unrealized_pnl = self._calculate_unrealized_pnl(next_price)
reward += unrealized_pnl * self.reward_scaling * 0.1 # Scale down unrealized PnL
# Apply time-based holding penalty to encourage decisive actions
position_duration = self.current_step - self.entry_step
holding_penalty = min(position_duration * 0.0001, 0.01) # Max 1% penalty
reward -= holding_penalty
# Reward staying neutral when uncertain (no clear setup)
else:
reward += 0.0001 # Small reward for not trading without clear signals
# Move to next step
self.current_step += 1
# Get new observation
observation = self._get_observation()
# Check if episode is done
done = self.current_step >= len(self.prices) - 1
# If done, close any remaining positions
if done and self.position != 0:
final_pnl, last_position_info = self._close_position(current_price)
reward += final_pnl * self.reward_scaling
info['final_pnl'] = final_pnl
info['final_balance'] = self.balance
logger.info(f"Episode ended. Final balance: {self.balance:.4f}, Return: {(self.balance/self.initial_balance-1)*100:.2f}%")
# Track trade result if position changed or position was closed
if prev_position != self.position or last_position_info is not None:
# Calculate realized PnL if position was closed
realized_pnl = 0
position_info = {}
if last_position_info is not None:
# Use the position information from closing
realized_pnl = last_position_info['pnl']
position_info = last_position_info
else:
# Calculate manually based on balance change
realized_pnl = self.balance - prev_balance if prev_position != 0 else 0
# Record detailed trade information
trade_result = {
'step': self.current_step,
'timestamp': self.timestamps[self.current_step],
'action': action,
'action_name': ['SELL', 'BUY'][action],
'price': current_price,
'position_changed': prev_position != self.position,
'prev_position': prev_position,
'new_position': self.position,
'position_size': abs(self.position) if self.position != 0 else abs(prev_position),
'entry_price': position_info.get('entry_price', self.entry_price),
'exit_price': position_info.get('exit_price', current_price),
'realized_pnl': realized_pnl,
'unrealized_pnl': self._calculate_unrealized_pnl(current_price) if self.position != 0 else 0,
'pnl': realized_pnl, # Total PnL (realized for this step)
'balance_before': prev_balance,
'balance_after': self.balance,
'trade_fee': position_info.get('fee', abs(self.position - prev_position) * current_price * self.transaction_fee)
}
info['trade_result'] = trade_result
self.trades.append(trade_result)
# Log trade details
logger.info(f"Trade executed - Action: {['SELL', 'BUY'][action]}, "
f"Price: {current_price:.4f}, PnL: {realized_pnl:.4f}, "
f"Balance: {self.balance:.4f}")
# Store reward
self.rewards.append(reward)
# Update info dict with current state
info.update({
'step': self.current_step,
'price': current_price,
'prev_price': prev_price,
'price_change': (current_price - prev_price) / prev_price if prev_price != 0 else 0,
'balance': self.balance,
'position': self.position,
'entry_price': self.entry_price,
'unrealized_pnl': self._calculate_unrealized_pnl(current_price) if self.position != 0 else 0.0,
'total_trades': len(self.trades),
'total_pnl': self.total_pnl,
'return_pct': (self.balance/self.initial_balance-1)*100
})
return observation, reward, done, info
def _calculate_unrealized_pnl(self, current_price):
"""Calculate unrealized PnL for current position"""
if self.position == 0 or self.entry_price == 0:
return 0.0
if self.position > 0: # Long position
return self.position * (current_price / self.entry_price - 1.0)
else: # Short position
return -self.position * (1.0 - current_price / self.entry_price)
def _open_position(self, position_size: float, entry_price: float):
"""Open a new position"""
self.position = position_size
self.entry_price = entry_price
self.entry_step = self.current_step
# Calculate position value
position_value = abs(position_size) * entry_price
# Apply transaction fee
fee = position_value * self.transaction_fee
self.balance -= fee
logger.info(f"Opened position: {position_size:.4f} at {entry_price:.4f}, fee: {fee:.4f}")
def _close_position(self, exit_price: float) -> Tuple[float, Dict]:
"""Close current position and return PnL"""
if self.position == 0:
return 0.0, {}
# Calculate PnL
if self.position > 0: # Long position
pnl = (exit_price - self.entry_price) / self.entry_price
else: # Short position
pnl = (self.entry_price - exit_price) / self.entry_price
# Apply transaction fees (entry + exit)
position_value = abs(self.position) * exit_price
exit_fee = position_value * self.transaction_fee
total_fees = exit_fee # Entry fee already applied when opening
# Net PnL after fees
net_pnl = pnl - (total_fees / (abs(self.position) * self.entry_price))
# Update balance
self.balance *= (1 + net_pnl)
self.total_pnl += net_pnl
# Track trade
position_info = {
'position_size': self.position,
'entry_price': self.entry_price,
'exit_price': exit_price,
'pnl': net_pnl,
'duration': self.current_step - self.entry_step,
'entry_step': self.entry_step,
'exit_step': self.current_step
}
self.trades.append(position_info)
# Update trade statistics
if net_pnl > 0:
self.winning_trades += 1
else:
self.losing_trades += 1
logger.info(f"Closed position: {self.position:.4f}, PnL: {net_pnl:.4f}, Duration: {position_info['duration']} steps")
# Reset position
self.position = 0.0
self.entry_price = 0.0
self.entry_step = 0
return net_pnl, position_info
def _get_observation(self):
"""
Get the current observation.
Returns:
np.array: The observation vector
"""
observations = []
# Get data from each timeframe
for tf in self.data_interface.timeframes:
if tf in self.data:
# Get the window of data for this timeframe
df = self.data[tf]
start_idx = self._align_timeframe_index(tf)
if start_idx is not None and start_idx >= 0 and start_idx + self.window_size <= len(df):
window = df.iloc[start_idx:start_idx + self.window_size]
# Extract OHLCV data
ohlcv = window[['open', 'high', 'low', 'close', 'volume']].values
# Normalize OHLCV data
last_close = ohlcv[-1, 3] # Last close price
ohlcv_normalized = np.zeros_like(ohlcv)
ohlcv_normalized[:, 0] = ohlcv[:, 0] / last_close - 1.0 # open
ohlcv_normalized[:, 1] = ohlcv[:, 1] / last_close - 1.0 # high
ohlcv_normalized[:, 2] = ohlcv[:, 2] / last_close - 1.0 # low
ohlcv_normalized[:, 3] = ohlcv[:, 3] / last_close - 1.0 # close
# Normalize volume (relative to moving average of volume)
if 'volume' in window.columns:
volume_ma = ohlcv[:, 4].mean()
if volume_ma > 0:
ohlcv_normalized[:, 4] = ohlcv[:, 4] / volume_ma - 1.0
else:
ohlcv_normalized[:, 4] = 0.0
else:
ohlcv_normalized[:, 4] = 0.0
# Flatten and add to observations
observations.append(ohlcv_normalized.flatten())
else:
# Fill with zeros if not enough data
observations.append(np.zeros(self.window_size * 5))
# Add position and balance information
current_price = self.prices[self.current_step]
position_info = np.array([
self.position / self.max_position, # Normalized position (-1 to 1)
self.balance / self.initial_balance - 1.0, # Normalized balance change
self._calculate_unrealized_pnl(current_price) # Unrealized PnL
])
observations.append(position_info)
# Concatenate all observations
observation = np.concatenate(observations)
return observation
def _align_timeframe_index(self, timeframe):
"""
Align the index of a higher timeframe with the current step in the primary timeframe.
Args:
timeframe: The timeframe to align
Returns:
int: The starting index in the higher timeframe
"""
if timeframe == self.timeframe:
return self.current_step - self.window_size
# Get timestamps for current primary timeframe step
primary_ts = self.timestamps[self.current_step]
# Find closest index in the higher timeframe
higher_ts = self.data[timeframe].index.values
idx = np.searchsorted(higher_ts, primary_ts)
# Adjust to get the starting index
start_idx = max(0, idx - self.window_size)
return start_idx
def get_last_positions(self, n=5):
"""
Get detailed information about the last n positions.
Args:
n: Number of last positions to return
Returns:
list: List of dictionaries containing position details
"""
if not self.trades:
return []
# Filter trades to only include those that closed positions
position_trades = [t for t in self.trades if t.get('realized_pnl', 0) != 0 or (t.get('prev_position', 0) != 0 and t.get('new_position', 0) == 0)]
positions = []
last_n_trades = position_trades[-n:] if len(position_trades) >= n else position_trades
for trade in last_n_trades:
position_info = {
'timestamp': trade.get('timestamp', self.timestamps[trade['step']]),
'action': trade.get('action_name', ['SELL', 'BUY'][trade['action']]),
'entry_price': trade.get('entry_price', 0.0),
'exit_price': trade.get('exit_price', trade['price']),
'position_size': trade.get('position_size', self.max_position),
'realized_pnl': trade.get('realized_pnl', 0.0),
'fee': trade.get('trade_fee', 0.0),
'pnl': trade.get('pnl', 0.0),
'pnl_percentage': (trade.get('pnl', 0.0) / self.initial_balance) * 100,
'balance_before': trade.get('balance_before', 0.0),
'balance_after': trade.get('balance_after', 0.0),
'duration': trade.get('duration', 'N/A')
}
positions.append(position_info)
return positions
def render(self, mode='human'):
"""Render the environment"""
current_step = self.current_step
current_price = self.prices[current_step]
# Display basic information
print(f"\nTrading Environment Status:")
print(f"============================")
print(f"Step: {current_step}/{len(self.prices)-1}")
print(f"Current Price: {current_price:.4f}")
print(f"Current Balance: {self.balance:.4f}")
print(f"Current Position: {self.position:.4f}")
if self.position != 0:
unrealized_pnl = self._calculate_unrealized_pnl(current_price)
print(f"Entry Price: {self.entry_price:.4f}")
print(f"Unrealized PnL: {unrealized_pnl:.4f} ({unrealized_pnl/self.balance*100:.2f}%)")
print(f"Total PnL: {self.total_pnl:.4f} ({self.total_pnl/self.initial_balance*100:.2f}%)")
print(f"Total Trades: {len(self.trades)}")
if len(self.trades) > 0:
win_trades = [t for t in self.trades if t.get('realized_pnl', 0) > 0]
win_count = len(win_trades)
# Count trades that closed positions (not just changed them)
closed_positions = [t for t in self.trades if t.get('realized_pnl', 0) != 0]
closed_count = len(closed_positions)
win_rate = win_count / closed_count if closed_count > 0 else 0
print(f"Positions Closed: {closed_count}")
print(f"Winning Positions: {win_count}")
print(f"Win Rate: {win_rate:.2f}")
# Display last 5 positions
print("\nLast 5 Positions:")
print("================")
last_positions = self.get_last_positions(5)
if not last_positions:
print("No closed positions yet.")
for pos in last_positions:
print(f"Time: {pos['timestamp']}")
print(f"Action: {pos['action']}")
print(f"Entry: {pos['entry_price']:.4f}, Exit: {pos['exit_price']:.4f}")
print(f"Size: {pos['position_size']:.4f}")
print(f"PnL: {pos['realized_pnl']:.4f} ({pos['pnl_percentage']:.2f}%)")
print(f"Fee: {pos['fee']:.4f}")
print(f"Balance: {pos['balance_before']:.4f} -> {pos['balance_after']:.4f}")
print("----------------")
return
def close(self):
"""Close the environment"""
pass

View File

@ -11,11 +11,17 @@ This package contains the neural network models used in the trading system:
PyTorch implementation only.
"""
from NN.models.cnn_model import EnhancedCNNModel as CNNModel
# Import core models
from NN.models.dqn_agent import DQNAgent
from NN.models.cob_rl_model import MassiveRLNetwork, COBRLModelInterface
from NN.models.cob_rl_model import COBRLModelInterface
from NN.models.advanced_transformer_trading import AdvancedTradingTransformer, TradingTransformerConfig
from NN.models.standardized_cnn import StandardizedCNN # Use the unified CNN model
# Import model interfaces
from NN.models.model_interfaces import ModelInterface, CNNModelInterface, RLAgentInterface, ExtremaTrainerInterface
__all__ = ['CNNModel', 'DQNAgent', 'MassiveRLNetwork', 'COBRLModelInterface', 'AdvancedTradingTransformer', 'TradingTransformerConfig',
'ModelInterface', 'CNNModelInterface', 'RLAgentInterface', 'ExtremaTrainerInterface']
# Export the unified StandardizedCNN as CNNModel for compatibility
CNNModel = StandardizedCNN
__all__ = ['CNNModel', 'StandardizedCNN', 'DQNAgent', 'COBRLModelInterface', 'AdvancedTradingTransformer', 'TradingTransformerConfig',
'ModelInterface', 'CNNModelInterface', 'RLAgentInterface', 'ExtremaTrainerInterface']

File diff suppressed because it is too large Load Diff

View File

@ -250,6 +250,12 @@ class COBRLModelInterface(ModelInterface):
logger.info(f"COB RL Model Interface initialized on {self.device}")
def to(self, device):
"""PyTorch-style device movement method"""
self.device = device
self.model = self.model.to(device)
return self
def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
"""Make prediction using the model"""
self.model.eval()

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,7 @@ import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
import time
import logging
import torch.nn.functional as F
from typing import List, Tuple, Dict, Any, Optional, Union
@ -80,6 +81,9 @@ class EnhancedCNN(nn.Module):
self.n_actions = n_actions
self.confidence_threshold = confidence_threshold
# Training data storage
self.training_data = []
# Calculate input dimensions
if isinstance(input_shape, (list, tuple)):
if len(input_shape) == 3: # [channels, height, width]
@ -265,8 +269,9 @@ class EnhancedCNN(nn.Module):
nn.Linear(256, 3) # 0=bottom, 1=top, 2=neither
)
# ULTRA MASSIVE multi-timeframe price prediction heads
self.price_pred_immediate = nn.Sequential(
# ULTRA MASSIVE price direction prediction head
# Outputs single direction and confidence values
self.price_direction_head = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
@ -275,32 +280,13 @@ class EnhancedCNN(nn.Module):
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
nn.Linear(256, 2) # [direction, confidence]
)
self.price_pred_midterm = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
self.price_pred_longterm = nn.Sequential(
nn.Linear(1024, 1024), # Increased from 512
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512), # Increased from 256
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256), # Increased from 128
nn.ReLU(),
nn.Linear(256, 3) # Up, Down, Sideways
)
# Direction activation (tanh for -1 to 1)
self.direction_activation = nn.Tanh()
# Confidence activation (sigmoid for 0 to 1)
self.confidence_activation = nn.Sigmoid()
# ULTRA MASSIVE value prediction with ensemble approaches
self.price_pred_value = nn.Sequential(
@ -371,21 +357,45 @@ class EnhancedCNN(nn.Module):
nn.Linear(128, 4) # Low risk, medium risk, high risk, extreme risk
)
def _memory_barrier(self, tensor: torch.Tensor) -> torch.Tensor:
"""Create a memory barrier to prevent in-place operation issues"""
return tensor.detach().clone().requires_grad_(tensor.requires_grad)
def _check_rebuild_network(self, features):
"""Check if network needs to be rebuilt for different feature dimensions"""
"""DEPRECATED: Network should have fixed architecture - no runtime rebuilding"""
if features != self.feature_dim:
logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})")
self.feature_dim = features
self._build_network()
# Move to device after rebuilding
self.to(self.device)
return True
logger.error(f"CRITICAL: Input feature dimension mismatch! Expected {self.feature_dim}, got {features}")
logger.error("This indicates a bug in data preprocessing - input should be fixed size!")
logger.error("Network architecture should NOT change at runtime!")
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {features}")
return False
def forward(self, x):
"""Forward pass through the ULTRA MASSIVE network"""
batch_size = x.size(0)
# Validate input dimensions to prevent zero-element tensor issues
if x.numel() == 0:
logger.error(f"Forward pass received empty tensor with shape {x.shape}")
# Return default outputs for all 5 expected values to prevent crash
default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither
default_price_pred = torch.zeros(batch_size, 1, device=x.device)
default_features = torch.zeros(batch_size, 1024, device=x.device)
default_advanced = torch.zeros(batch_size, 1, device=x.device)
return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
# Check for zero feature dimensions
if len(x.shape) > 1 and any(dim == 0 for dim in x.shape[1:]):
logger.error(f"Forward pass received tensor with zero feature dimensions: {x.shape}")
# Return default outputs for all 5 expected values to prevent crash
default_q_values = torch.zeros(batch_size, self.n_actions, device=x.device)
default_extrema = torch.zeros(batch_size, 3, device=x.device) # bottom/top/neither
default_price_pred = torch.zeros(batch_size, 1, device=x.device)
default_features = torch.zeros(batch_size, 1024, device=x.device)
default_advanced = torch.zeros(batch_size, 1, device=x.device)
return default_q_values, default_extrema, default_price_pred, default_features, default_advanced
# Process different input shapes
if len(x.shape) > 2:
# Handle 4D input [batch, timeframes, window, features] or 3D input [batch, timeframes, features]
@ -397,10 +407,11 @@ class EnhancedCNN(nn.Module):
# Now x is 3D: [batch, timeframes, features]
x_reshaped = x
# Check if the feature dimension has changed and rebuild if necessary
if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim:
total_features = x_reshaped.size(1) * x_reshaped.size(2)
self._check_rebuild_network(total_features)
# Validate input dimensions (should be fixed)
total_features = x_reshaped.size(1) * x_reshaped.size(2)
if total_features != self.feature_dim:
logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}")
# Apply ultra massive convolutions
x_conv = self.conv_layers(x_reshaped)
@ -413,9 +424,10 @@ class EnhancedCNN(nn.Module):
# For 2D input [batch, features]
x_flat = x
# Check if dimensions have changed
# Validate input dimensions (should be fixed)
if x_flat.size(1) != self.feature_dim:
self._check_rebuild_network(x_flat.size(1))
logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}")
# Apply ULTRA MASSIVE FC layers to get base features
features = self.fc_layers(x_flat) # [batch, 1024]
@ -464,10 +476,14 @@ class EnhancedCNN(nn.Module):
# Extrema predictions (bottom/top/neither detection)
extrema_pred = self.extrema_head(features_refined)
# Multi-timeframe price movement predictions
price_immediate = self.price_pred_immediate(features_refined)
price_midterm = self.price_pred_midterm(features_refined)
price_longterm = self.price_pred_longterm(features_refined)
# Price direction predictions
price_direction_raw = self.price_direction_head(features_refined)
# Apply separate activations to direction and confidence
direction = self.direction_activation(price_direction_raw[:, 0:1]) # -1 to 1
confidence = self.confidence_activation(price_direction_raw[:, 1:2]) # 0 to 1
price_direction_pred = torch.cat([direction, confidence], dim=1) # [batch, 2]
price_values = self.price_pred_value(features_refined)
# Additional specialized predictions for enhanced accuracy
@ -476,38 +492,42 @@ class EnhancedCNN(nn.Module):
market_regime_pred = self.market_regime_head(features_refined)
risk_pred = self.risk_head(features_refined)
# Package all price predictions
price_predictions = {
'immediate': price_immediate,
'midterm': price_midterm,
'longterm': price_longterm,
'values': price_values
}
# Use the price direction prediction directly (already [batch, 2])
price_direction_tensor = price_direction_pred
# Package additional predictions for enhanced decision making
advanced_predictions = {
'volatility': volatility_pred,
'support_resistance': support_resistance_pred,
'market_regime': market_regime_pred,
'risk_assessment': risk_pred
}
# Package additional predictions into a single tensor (use volatility as primary)
# For compatibility with DQN agent, we return volatility_pred as the advanced prediction tensor
advanced_pred_tensor = volatility_pred
return q_values, extrema_pred, price_predictions, features_refined, advanced_predictions
return q_values, extrema_pred, price_direction_tensor, features_refined, advanced_pred_tensor
def act(self, state, explore=True):
def act(self, state, explore=True) -> Tuple[int, float, List[float]]:
"""Enhanced action selection with ultra massive model predictions"""
if explore and np.random.random() < 0.1: # 10% random exploration
return np.random.choice(self.n_actions)
self.eval()
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
# Accept both NumPy arrays and already-built torch tensors
if isinstance(state, torch.Tensor):
state_tensor = state.detach().to(self.device)
if state_tensor.dim() == 1:
state_tensor = state_tensor.unsqueeze(0)
else:
# Convert to tensor **directly on the target device** to avoid intermediate CPU copies
state_tensor = torch.as_tensor(state, dtype=torch.float32, device=self.device)
if state_tensor.dim() == 1:
state_tensor = state_tensor.unsqueeze(0)
with torch.no_grad():
q_values, extrema_pred, price_predictions, features, advanced_predictions = self(state_tensor)
q_values, extrema_pred, price_direction_predictions, features, advanced_predictions = self(state_tensor)
# Process price direction predictions
if price_direction_predictions is not None:
self.process_price_direction_predictions(price_direction_predictions)
# Apply softmax to get action probabilities
action_probs = torch.softmax(q_values, dim=1)
action = torch.argmax(action_probs, dim=1).item()
action_probs_tensor = torch.softmax(q_values, dim=1)
action_idx = int(torch.argmax(action_probs_tensor, dim=1).item())
confidence = float(action_probs_tensor[0, action_idx].item()) # Confidence of the chosen action
action_probs = action_probs_tensor.squeeze(0).tolist() # Convert to list of floats for return
# Log advanced predictions for better decision making
if hasattr(self, '_log_predictions') and self._log_predictions:
@ -537,7 +557,180 @@ class EnhancedCNN(nn.Module):
logger.info(f" Market Regime: {regime_labels[regime_class]} ({regime[regime_class]:.3f})")
logger.info(f" Risk Level: {risk_labels[risk_class]} ({risk[risk_class]:.3f})")
return action
return action_idx, confidence, action_probs
def process_price_direction_predictions(self, price_direction_pred: torch.Tensor) -> Dict[str, float]:
"""
Process price direction predictions and convert to standardized format
Args:
price_direction_pred: Tensor of shape (batch_size, 2) containing [direction, confidence]
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
try:
if price_direction_pred is None or price_direction_pred.numel() == 0:
return {}
# Extract direction and confidence values
direction_value = float(price_direction_pred[0, 0].item()) # -1 to 1
confidence_value = float(price_direction_pred[0, 1].item()) # 0 to 1
processed_directions = {
'direction': direction_value,
'confidence': confidence_value
}
# Store for later access
self.last_price_direction = processed_directions
return processed_directions
except Exception as e:
logger.error(f"Error processing price direction predictions: {e}")
return {}
def get_price_direction_vector(self) -> Dict[str, float]:
"""
Get the current price direction and confidence
Returns:
Dict with direction (-1 to 1) and confidence (0 to 1)
"""
return getattr(self, 'last_price_direction', {})
def get_price_direction_summary(self) -> Dict[str, Any]:
"""
Get a summary of price direction prediction
Returns:
Dict containing direction and confidence information
"""
try:
last_direction = getattr(self, 'last_price_direction', {})
if not last_direction:
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
direction_value = last_direction['direction']
confidence_value = last_direction['confidence']
# Convert to discrete direction
if direction_value > 0.1:
direction_label = "UP"
discrete_direction = 1
elif direction_value < -0.1:
direction_label = "DOWN"
discrete_direction = -1
else:
direction_label = "SIDEWAYS"
discrete_direction = 0
return {
'direction_value': float(direction_value),
'confidence_value': float(confidence_value),
'direction_label': direction_label,
'discrete_direction': discrete_direction,
'strength': abs(float(direction_value)),
'weighted_strength': abs(float(direction_value)) * float(confidence_value)
}
except Exception as e:
logger.error(f"Error calculating price direction summary: {e}")
return {
'direction_value': 0.0,
'confidence_value': 0.0,
'direction_label': "SIDEWAYS",
'discrete_direction': 0,
'strength': 0.0,
'weighted_strength': 0.0
}
def add_training_data(self, state, action, reward, position_pnl=0.0, has_position=False):
"""
Add training data to the model's training buffer with position-based reward enhancement
Args:
state: Input state
action: Action taken
reward: Base reward received
position_pnl: Current position P&L (0.0 if no position)
has_position: Whether we currently have an open position
"""
try:
# Enhance reward based on position status
enhanced_reward = self._calculate_position_enhanced_reward(
reward, action, position_pnl, has_position
)
self.training_data.append({
'state': state,
'action': action,
'reward': enhanced_reward,
'base_reward': reward, # Keep original reward for analysis
'position_pnl': position_pnl,
'has_position': has_position,
'timestamp': time.time()
})
# Keep only the last 1000 training samples
if len(self.training_data) > 1000:
self.training_data = self.training_data[-1000:]
except Exception as e:
logger.error(f"Error adding training data: {e}")
def _calculate_position_enhanced_reward(self, base_reward, action, position_pnl, has_position):
"""
Calculate position-enhanced reward to incentivize profitable trades and closing losing ones
Args:
base_reward: Original reward from price prediction accuracy
action: Action taken ('BUY', 'SELL', 'HOLD')
position_pnl: Current position P&L
has_position: Whether we have an open position
Returns:
Enhanced reward that incentivizes profitable behavior
"""
try:
enhanced_reward = base_reward
if has_position and position_pnl != 0.0:
# Position-based reward adjustments
pnl_factor = position_pnl / 100.0 # Normalize P&L to reasonable scale
if position_pnl > 0: # Profitable position
if action == "HOLD":
# Reward holding profitable positions (let winners run)
enhanced_reward += abs(pnl_factor) * 0.5
elif action in ["BUY", "SELL"]:
# Moderate reward for taking action on profitable positions
enhanced_reward += abs(pnl_factor) * 0.3
elif position_pnl < 0: # Losing position
if action == "HOLD":
# Penalty for holding losing positions (cut losses)
enhanced_reward -= abs(pnl_factor) * 0.8
elif action in ["BUY", "SELL"]:
# Reward for taking action to close losing positions
enhanced_reward += abs(pnl_factor) * 0.6
# Ensure reward doesn't become extreme
enhanced_reward = max(-5.0, min(5.0, enhanced_reward))
return enhanced_reward
except Exception as e:
logger.error(f"Error calculating position-enhanced reward: {e}")
return base_reward
def save(self, path):
"""Save model weights and architecture"""

View File

@ -1,104 +0,0 @@
{
"decision": [
{
"checkpoint_id": "decision_20250704_082022",
"model_name": "decision",
"model_type": "decision_fusion",
"file_path": "NN\\models\\saved\\decision\\decision_20250704_082022.pt",
"created_at": "2025-07-04T08:20:22.416087",
"file_size_mb": 0.06720924377441406,
"performance_score": 102.79971076963062,
"accuracy": null,
"loss": 2.8923120591883844e-06,
"val_accuracy": null,
"val_loss": null,
"reward": null,
"pnl": null,
"epoch": null,
"training_time_hours": null,
"total_parameters": null,
"wandb_run_id": null,
"wandb_artifact_name": null
},
{
"checkpoint_id": "decision_20250704_082021",
"model_name": "decision",
"model_type": "decision_fusion",
"file_path": "NN\\models\\saved\\decision\\decision_20250704_082021.pt",
"created_at": "2025-07-04T08:20:21.900854",
"file_size_mb": 0.06720924377441406,
"performance_score": 102.79970038321,
"accuracy": null,
"loss": 2.996176877014177e-06,
"val_accuracy": null,
"val_loss": null,
"reward": null,
"pnl": null,
"epoch": null,
"training_time_hours": null,
"total_parameters": null,
"wandb_run_id": null,
"wandb_artifact_name": null
},
{
"checkpoint_id": "decision_20250704_082022",
"model_name": "decision",
"model_type": "decision_fusion",
"file_path": "NN\\models\\saved\\decision\\decision_20250704_082022.pt",
"created_at": "2025-07-04T08:20:22.294191",
"file_size_mb": 0.06720924377441406,
"performance_score": 102.79969219038436,
"accuracy": null,
"loss": 3.0781056310808756e-06,
"val_accuracy": null,
"val_loss": null,
"reward": null,
"pnl": null,
"epoch": null,
"training_time_hours": null,
"total_parameters": null,
"wandb_run_id": null,
"wandb_artifact_name": null
},
{
"checkpoint_id": "decision_20250704_134829",
"model_name": "decision",
"model_type": "decision_fusion",
"file_path": "NN\\models\\saved\\decision\\decision_20250704_134829.pt",
"created_at": "2025-07-04T13:48:29.903250",
"file_size_mb": 0.06720924377441406,
"performance_score": 102.79967532851693,
"accuracy": null,
"loss": 3.2467253719811344e-06,
"val_accuracy": null,
"val_loss": null,
"reward": null,
"pnl": null,
"epoch": null,
"training_time_hours": null,
"total_parameters": null,
"wandb_run_id": null,
"wandb_artifact_name": null
},
{
"checkpoint_id": "decision_20250704_214714",
"model_name": "decision",
"model_type": "decision_fusion",
"file_path": "NN\\models\\saved\\decision\\decision_20250704_214714.pt",
"created_at": "2025-07-04T21:47:14.427187",
"file_size_mb": 0.06720924377441406,
"performance_score": 102.79966325731509,
"accuracy": null,
"loss": 3.3674381887394134e-06,
"val_accuracy": null,
"val_loss": null,
"reward": null,
"pnl": null,
"epoch": null,
"training_time_hours": null,
"total_parameters": null,
"wandb_run_id": null,
"wandb_artifact_name": null
}
]
}

View File

@ -1 +0,0 @@
{"best_reward": 4791516.572471984, "best_episode": 3250, "best_pnl": 826842167451289.1, "best_win_rate": 0.47368421052631576, "date": "2025-04-01 10:19:16"}

View File

@ -1,20 +0,0 @@
{
"supervised": {
"epochs_completed": 22650,
"best_val_pnl": 0.0,
"best_epoch": 50,
"best_win_rate": 0
},
"reinforcement": {
"episodes_completed": 0,
"best_reward": -Infinity,
"best_episode": 0,
"best_win_rate": 0
},
"hybrid": {
"iterations_completed": 453,
"best_combined_score": 0.0,
"training_started": "2025-04-09T10:30:42.510856",
"last_update": "2025-04-09T10:40:02.217840"
}
}

View File

@ -1,326 +0,0 @@
{
"epochs_completed": 8,
"best_val_pnl": 0.0,
"best_epoch": 1,
"best_win_rate": 0.0,
"training_started": "2025-04-02T10:43:58.946682",
"last_update": "2025-04-02T10:44:10.940892",
"epochs": [
{
"epoch": 1,
"train_loss": 1.0950355529785156,
"val_loss": 1.1657923062642415,
"train_acc": 0.3255208333333333,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:01.840889",
"data_age": 2,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 2,
"train_loss": 1.0831659038861592,
"val_loss": 1.1212460199991863,
"train_acc": 0.390625,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:03.134833",
"data_age": 4,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 3,
"train_loss": 1.0740693012873332,
"val_loss": 1.0992945830027263,
"train_acc": 0.4739583333333333,
"val_acc": 0.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:04.425272",
"data_age": 5,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 4,
"train_loss": 1.0747728943824768,
"val_loss": 1.0821794271469116,
"train_acc": 0.4609375,
"val_acc": 0.3229166666666667,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:05.716421",
"data_age": 6,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 5,
"train_loss": 1.0489931503931682,
"val_loss": 1.0669521888097127,
"train_acc": 0.5833333333333334,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:07.007935",
"data_age": 8,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 6,
"train_loss": 1.0533669590950012,
"val_loss": 1.0505590836207073,
"train_acc": 0.5104166666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:08.296061",
"data_age": 9,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 7,
"train_loss": 1.0456886688868205,
"val_loss": 1.0351698795954387,
"train_acc": 0.5651041666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:09.607584",
"data_age": 10,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
},
{
"epoch": 8,
"train_loss": 1.040040671825409,
"val_loss": 1.0227736632029216,
"train_acc": 0.6119791666666666,
"val_acc": 1.0,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
},
"val": {
"BUY": 1.0,
"SELL": 0.0,
"HOLD": 0.0
}
},
"timestamp": "2025-04-02T10:44:10.940892",
"data_age": 11,
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"overall_win_rate": {
"train": 0.0,
"val": 0.0
}
}
],
"cumulative_pnl": {
"train": 0.0,
"val": 0.0
},
"total_trades": {
"train": 0,
"val": 0
},
"total_wins": {
"train": 0,
"val": 0
}
}

View File

@ -1,192 +0,0 @@
{
"epochs_completed": 7,
"best_val_pnl": 0.002028853100759435,
"best_epoch": 6,
"best_win_rate": 0.5157894736842106,
"training_started": "2025-03-31T02:50:10.418670",
"last_update": "2025-03-31T02:50:15.227593",
"epochs": [
{
"epoch": 1,
"train_loss": 1.1206786036491394,
"val_loss": 1.0542699098587036,
"train_acc": 0.11197916666666667,
"val_acc": 0.25,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:12.881423",
"data_age": 2
},
{
"epoch": 2,
"train_loss": 1.1266120672225952,
"val_loss": 1.072133183479309,
"train_acc": 0.1171875,
"val_acc": 0.25,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:13.186840",
"data_age": 2
},
{
"epoch": 3,
"train_loss": 1.1415620843569438,
"val_loss": 1.1701548099517822,
"train_acc": 0.1015625,
"val_acc": 0.5208333333333334,
"train_pnl": 0.0,
"val_pnl": 0.0,
"train_win_rate": 0.0,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:13.442018",
"data_age": 3
},
{
"epoch": 4,
"train_loss": 1.1331567962964375,
"val_loss": 1.070081114768982,
"train_acc": 0.09375,
"val_acc": 0.22916666666666666,
"train_pnl": 0.010650217327384765,
"val_pnl": -0.0007049481907895126,
"train_win_rate": 0.49279538904899134,
"val_win_rate": 0.40625,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.9036458333333334,
"HOLD": 0.09635416666666667
},
"val": {
"BUY": 0.0,
"SELL": 0.3333333333333333,
"HOLD": 0.6666666666666666
}
},
"timestamp": "2025-03-31T02:50:13.739899",
"data_age": 3
},
{
"epoch": 5,
"train_loss": 1.10965762535731,
"val_loss": 1.0485950708389282,
"train_acc": 0.12239583333333333,
"val_acc": 0.17708333333333334,
"train_pnl": 0.011924086862580204,
"val_pnl": 0.0,
"train_win_rate": 0.5070422535211268,
"val_win_rate": 0.0,
"best_position_size": 0.1,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.7395833333333334,
"HOLD": 0.2604166666666667
},
"val": {
"BUY": 0.0,
"SELL": 0.0,
"HOLD": 1.0
}
},
"timestamp": "2025-03-31T02:50:14.073439",
"data_age": 3
},
{
"epoch": 6,
"train_loss": 1.1272419293721516,
"val_loss": 1.084235429763794,
"train_acc": 0.1015625,
"val_acc": 0.22916666666666666,
"train_pnl": 0.014825159601390072,
"val_pnl": 0.00405770620151887,
"train_win_rate": 0.4908616187989556,
"val_win_rate": 0.5157894736842106,
"best_position_size": 2.0,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
},
"val": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
}
},
"timestamp": "2025-03-31T02:50:14.658295",
"data_age": 4
},
{
"epoch": 7,
"train_loss": 1.1171108484268188,
"val_loss": 1.0741244554519653,
"train_acc": 0.1171875,
"val_acc": 0.22916666666666666,
"train_pnl": 0.0059474696523706605,
"val_pnl": 0.00405770620151887,
"train_win_rate": 0.4838709677419355,
"val_win_rate": 0.5157894736842106,
"best_position_size": 2.0,
"signal_distribution": {
"train": {
"BUY": 0.0,
"SELL": 0.7291666666666666,
"HOLD": 0.2708333333333333
},
"val": {
"BUY": 0.0,
"SELL": 1.0,
"HOLD": 0.0
}
},
"timestamp": "2025-03-31T02:50:15.227593",
"data_age": 4
}
]
}

Some files were not shown because too many files have changed in this diff Show More