working with errors

This commit is contained in:
Dobromir Popov
2025-07-20 01:52:36 +03:00
parent 92919cb1ef
commit 469269e809
7 changed files with 1237 additions and 149 deletions

View File

@ -196,11 +196,39 @@ class DataProvider:
# Load existing pivot bounds from cache
self._load_all_pivot_bounds()
# Centralized data collection for models and dashboard
self.cob_data_cache: Dict[str, deque] = {} # COB data for models
self.training_data_cache: Dict[str, deque] = {} # Training data for models
self.model_data_subscribers: Dict[str, List[Callable]] = {} # Model-specific data callbacks
# Callbacks for data distribution
self.cob_data_callbacks: List[Callable] = [] # COB data callbacks
self.training_data_callbacks: List[Callable] = [] # Training data callbacks
self.model_prediction_callbacks: List[Callable] = [] # Model prediction callbacks
# Initialize data caches
for symbol in self.symbols:
binance_symbol = symbol.replace('/', '').upper()
self.cob_data_cache[binance_symbol] = deque(maxlen=300) # 5 minutes of COB data
self.training_data_cache[binance_symbol] = deque(maxlen=1000) # Training data buffer
# Data collection threads
self.data_collection_active = False
# COB data collection
self.cob_collection_active = False
self.cob_collection_thread = None
# Training data collection
self.training_data_collection_active = False
self.training_data_thread = None
logger.info(f"DataProvider initialized for symbols: {self.symbols}")
logger.info(f"Timeframes: {self.timeframes}")
logger.info("Centralized data distribution enabled")
logger.info("Pivot-based normalization system enabled")
logger.info("Williams Market Structure integration enabled")
logger.info("COB and training data collection enabled")
# Rate limiting
self.last_request_time = {}
@ -2559,4 +2587,591 @@ class DataProvider:
if attempt < self.max_retries - 1:
time.sleep(5 * (attempt + 1))
return None
return None
# ===== CENTRALIZED DATA COLLECTION METHODS =====
def start_centralized_data_collection(self):
"""Start all centralized data collection processes"""
logger.info("Starting centralized data collection for all models and dashboard")
# Start COB data collection
self.start_cob_data_collection()
# Start training data collection
self.start_training_data_collection()
logger.info("All centralized data collection processes started")
def stop_centralized_data_collection(self):
"""Stop all centralized data collection processes"""
logger.info("Stopping centralized data collection")
# Stop COB collection
self.cob_collection_active = False
if self.cob_collection_thread and self.cob_collection_thread.is_alive():
self.cob_collection_thread.join(timeout=5)
# Stop training data collection
self.training_data_collection_active = False
if self.training_data_thread and self.training_data_thread.is_alive():
self.training_data_thread.join(timeout=5)
logger.info("Centralized data collection stopped")
def start_cob_data_collection(self):
"""Start COB (Consolidated Order Book) data collection"""
if self.cob_collection_active:
logger.warning("COB data collection already active")
return
self.cob_collection_active = True
self.cob_collection_thread = Thread(target=self._cob_collection_worker, daemon=True)
self.cob_collection_thread.start()
logger.info("COB data collection started")
def _cob_collection_worker(self):
"""Worker thread for COB data collection"""
import requests
import time
import threading
logger.info("COB data collection worker started")
# Use separate threads for each symbol to achieve higher update frequency
def collect_symbol_data(symbol):
while self.cob_collection_active:
try:
self._collect_cob_data_for_symbol(symbol)
# Sleep for a very short time to achieve ~120 updates/sec across all symbols
# With 2 symbols, each can update at ~60/sec
time.sleep(0.016) # ~60 updates per second per symbol
except Exception as e:
logger.error(f"Error collecting COB data for {symbol}: {e}")
time.sleep(1) # Short recovery time
# Start a thread for each symbol
threads = []
for symbol in self.symbols:
thread = threading.Thread(target=collect_symbol_data, args=(symbol,), daemon=True)
thread.start()
threads.append(thread)
# Keep the main thread alive
while self.cob_collection_active:
time.sleep(1)
# Join threads when collection is stopped
for thread in threads:
thread.join(timeout=1)
def _collect_cob_data_for_symbol(self, symbol: str):
"""Collect COB data for a specific symbol using Binance REST API"""
try:
import requests
# Convert symbol format
binance_symbol = symbol.replace('/', '').upper()
# Get order book data
url = f"https://api.binance.com/api/v3/depth"
params = {
'symbol': binance_symbol,
'limit': 100 # Get top 100 levels
}
response = requests.get(url, params=params, timeout=5)
if response.status_code == 200:
order_book = response.json()
# Process and cache the data
cob_snapshot = self._process_order_book_data(symbol, order_book)
# Store in cache (ensure cache exists)
if binance_symbol not in self.cob_data_cache:
self.cob_data_cache[binance_symbol] = deque(maxlen=300)
self.cob_data_cache[binance_symbol].append(cob_snapshot)
# Distribute to COB data subscribers
self._distribute_cob_data(symbol, cob_snapshot)
else:
logger.debug(f"Failed to fetch COB data for {symbol}: {response.status_code}")
except Exception as e:
logger.debug(f"Error collecting COB data for {symbol}: {e}")
def _process_order_book_data(self, symbol: str, order_book: dict) -> dict:
"""Process raw order book data into structured COB snapshot with multi-timeframe imbalance metrics"""
try:
bids = [[float(price), float(qty)] for price, qty in order_book.get('bids', [])]
asks = [[float(price), float(qty)] for price, qty in order_book.get('asks', [])]
# Calculate statistics
total_bid_volume = sum(qty for _, qty in bids)
total_ask_volume = sum(qty for _, qty in asks)
best_bid = bids[0][0] if bids else 0
best_ask = asks[0][0] if asks else 0
mid_price = (best_bid + best_ask) / 2 if best_bid and best_ask else 0
spread = best_ask - best_bid if best_bid and best_ask else 0
spread_bps = (spread / mid_price * 10000) if mid_price > 0 else 0
# Calculate current imbalance
imbalance = (total_bid_volume - total_ask_volume) / (total_bid_volume + total_ask_volume) if (total_bid_volume + total_ask_volume) > 0 else 0
# Calculate multi-timeframe imbalances
binance_symbol = symbol.replace('/', '').upper()
# Initialize imbalance metrics
imbalance_1s = imbalance # Current imbalance is 1s
imbalance_5s = imbalance # Default to current if not enough history
imbalance_15s = imbalance
imbalance_60s = imbalance
# Calculate historical imbalances if we have enough data
if binance_symbol in self.cob_data_cache:
cache = self.cob_data_cache[binance_symbol]
now = datetime.now()
# Get snapshots for different timeframes
snapshots_5s = [s for s in cache if (now - s['timestamp']).total_seconds() <= 5]
snapshots_15s = [s for s in cache if (now - s['timestamp']).total_seconds() <= 15]
snapshots_60s = [s for s in cache if (now - s['timestamp']).total_seconds() <= 60]
# Calculate imbalances for each timeframe
if snapshots_5s:
bid_vol_5s = sum(s['stats']['bid_liquidity'] for s in snapshots_5s)
ask_vol_5s = sum(s['stats']['ask_liquidity'] for s in snapshots_5s)
total_vol_5s = bid_vol_5s + ask_vol_5s
imbalance_5s = (bid_vol_5s - ask_vol_5s) / total_vol_5s if total_vol_5s > 0 else 0
if snapshots_15s:
bid_vol_15s = sum(s['stats']['bid_liquidity'] for s in snapshots_15s)
ask_vol_15s = sum(s['stats']['ask_liquidity'] for s in snapshots_15s)
total_vol_15s = bid_vol_15s + ask_vol_15s
imbalance_15s = (bid_vol_15s - ask_vol_15s) / total_vol_15s if total_vol_15s > 0 else 0
if snapshots_60s:
bid_vol_60s = sum(s['stats']['bid_liquidity'] for s in snapshots_60s)
ask_vol_60s = sum(s['stats']['ask_liquidity'] for s in snapshots_60s)
total_vol_60s = bid_vol_60s + ask_vol_60s
imbalance_60s = (bid_vol_60s - ask_vol_60s) / total_vol_60s if total_vol_60s > 0 else 0
cob_snapshot = {
'symbol': symbol,
'timestamp': datetime.now(),
'bids': bids[:20], # Top 20 levels
'asks': asks[:20], # Top 20 levels
'stats': {
'best_bid': best_bid,
'best_ask': best_ask,
'mid_price': mid_price,
'spread': spread,
'spread_bps': spread_bps,
'bid_liquidity': total_bid_volume,
'ask_liquidity': total_ask_volume,
'total_liquidity': total_bid_volume + total_ask_volume,
'imbalance': imbalance,
'imbalance_1s': imbalance_1s,
'imbalance_5s': imbalance_5s,
'imbalance_15s': imbalance_15s,
'imbalance_60s': imbalance_60s,
'levels': len(bids) + len(asks)
}
}
return cob_snapshot
except Exception as e:
logger.error(f"Error processing order book data for {symbol}: {e}")
return {}
def start_training_data_collection(self):
"""Start training data collection for models"""
if self.training_data_collection_active:
logger.warning("Training data collection already active")
return
self.training_data_collection_active = True
self.training_data_thread = Thread(target=self._training_data_collection_worker, daemon=True)
self.training_data_thread.start()
logger.info("Training data collection started")
def _training_data_collection_worker(self):
"""Worker thread for training data collection"""
import time
logger.info("Training data collection worker started")
while self.training_data_collection_active:
try:
# Collect training data for all symbols
for symbol in self.symbols:
training_sample = self._collect_training_sample(symbol)
if training_sample:
binance_symbol = symbol.replace('/', '').upper()
self.training_data_cache[binance_symbol].append(training_sample)
# Distribute to training data subscribers
self._distribute_training_data(symbol, training_sample)
# Sleep for 5 seconds between collections
time.sleep(5)
except Exception as e:
logger.error(f"Error in training data collection worker: {e}")
time.sleep(10) # Wait longer on error
def _collect_training_sample(self, symbol: str) -> Optional[dict]:
"""Collect a training sample for a specific symbol"""
try:
# Get recent market data
recent_data = self.get_historical_data(symbol, '1m', limit=100)
if recent_data is None or len(recent_data) < 50:
return None
# Get recent ticks
recent_ticks = self.get_recent_ticks(symbol, count=100)
if len(recent_ticks) < 10:
return None
# Get COB data
binance_symbol = symbol.replace('/', '').upper()
recent_cob = list(self.cob_data_cache.get(binance_symbol, []))[-10:] if binance_symbol in self.cob_data_cache else []
# Create training sample
training_sample = {
'symbol': symbol,
'timestamp': datetime.now(),
'ohlcv_data': recent_data.tail(50).to_dict('records'),
'tick_data': [
{
'price': tick.price,
'volume': tick.volume,
'timestamp': tick.timestamp
} for tick in recent_ticks[-50:]
],
'cob_data': recent_cob,
'features': self._extract_training_features(symbol, recent_data, recent_ticks, recent_cob)
}
return training_sample
except Exception as e:
logger.error(f"Error collecting training sample for {symbol}: {e}")
return None
def _extract_training_features(self, symbol: str, ohlcv_data: pd.DataFrame,
recent_ticks: List[MarketTick], recent_cob: List[dict]) -> dict:
"""Extract features for training from various data sources"""
try:
features = {}
# OHLCV features
if len(ohlcv_data) > 0:
latest = ohlcv_data.iloc[-1]
features.update({
'price': latest['close'],
'volume': latest['volume'],
'price_change_1m': (latest['close'] - ohlcv_data.iloc[-2]['close']) / ohlcv_data.iloc[-2]['close'] if len(ohlcv_data) > 1 else 0,
'volume_ratio': latest['volume'] / ohlcv_data['volume'].mean() if len(ohlcv_data) > 1 else 1,
'volatility': ohlcv_data['close'].pct_change().std() if len(ohlcv_data) > 1 else 0
})
# Tick features
if recent_ticks:
tick_prices = [tick.price for tick in recent_ticks]
tick_volumes = [tick.volume for tick in recent_ticks]
features.update({
'tick_price_std': np.std(tick_prices) if len(tick_prices) > 1 else 0,
'tick_volume_mean': np.mean(tick_volumes),
'tick_count': len(recent_ticks)
})
# COB features
if recent_cob:
latest_cob = recent_cob[-1]
if 'stats' in latest_cob:
stats = latest_cob['stats']
features.update({
'spread_bps': stats.get('spread_bps', 0),
'imbalance': stats.get('imbalance', 0),
'liquidity': stats.get('total_liquidity', 0),
'cob_levels': stats.get('levels', 0)
})
return features
except Exception as e:
logger.error(f"Error extracting training features for {symbol}: {e}")
return {}
# ===== SUBSCRIPTION METHODS FOR MODELS AND DASHBOARD =====
def subscribe_to_cob_data(self, callback: Callable[[str, dict], None]) -> str:
"""Subscribe to COB data updates"""
subscriber_id = str(uuid.uuid4())
self.cob_data_callbacks.append(callback)
logger.info(f"COB data subscriber added: {subscriber_id}")
return subscriber_id
def subscribe_to_training_data(self, callback: Callable[[str, dict], None]) -> str:
"""Subscribe to training data updates"""
subscriber_id = str(uuid.uuid4())
self.training_data_callbacks.append(callback)
logger.info(f"Training data subscriber added: {subscriber_id}")
return subscriber_id
def subscribe_to_model_predictions(self, callback: Callable[[str, dict], None]) -> str:
"""Subscribe to model prediction updates"""
subscriber_id = str(uuid.uuid4())
self.model_prediction_callbacks.append(callback)
logger.info(f"Model prediction subscriber added: {subscriber_id}")
return subscriber_id
def _distribute_cob_data(self, symbol: str, cob_snapshot: dict):
"""Distribute COB data to all subscribers"""
for callback in self.cob_data_callbacks:
try:
Thread(target=lambda: callback(symbol, cob_snapshot), daemon=True).start()
except Exception as e:
logger.error(f"Error distributing COB data: {e}")
def _distribute_training_data(self, symbol: str, training_sample: dict):
"""Distribute training data to all subscribers"""
for callback in self.training_data_callbacks:
try:
Thread(target=lambda: callback(symbol, training_sample), daemon=True).start()
except Exception as e:
logger.error(f"Error distributing training data: {e}")
def _distribute_model_predictions(self, symbol: str, prediction: dict):
"""Distribute model predictions to all subscribers"""
for callback in self.model_prediction_callbacks:
try:
Thread(target=lambda: callback(symbol, prediction), daemon=True).start()
except Exception as e:
logger.error(f"Error distributing model prediction: {e}")
# ===== DATA ACCESS METHODS FOR MODELS AND DASHBOARD =====
def get_cob_data(self, symbol: str, count: int = 50) -> List[dict]:
"""Get recent COB data for a symbol"""
binance_symbol = symbol.replace('/', '').upper()
if binance_symbol in self.cob_data_cache:
return list(self.cob_data_cache[binance_symbol])[-count:]
return []
def get_training_data(self, symbol: str, count: int = 100) -> List[dict]:
"""Get recent training data for a symbol"""
binance_symbol = symbol.replace('/', '').upper()
if binance_symbol in self.training_data_cache:
return list(self.training_data_cache[binance_symbol])[-count:]
return []
def collect_cob_data(self, symbol: str) -> dict:
"""
Collect Consolidated Order Book (COB) data for a symbol using REST API
This centralized method collects COB data for all consumers (models, dashboard, etc.)
"""
try:
import requests
import time
# Use Binance REST API for order book data
binance_symbol = symbol.replace('/', '')
url = f"https://api.binance.com/api/v3/depth?symbol={binance_symbol}&limit=500"
response = requests.get(url, timeout=5)
if response.status_code == 200:
data = response.json()
# Process order book data
bids = [[float(price), float(qty)] for price, qty in data.get('bids', [])]
asks = [[float(price), float(qty)] for price, qty in data.get('asks', [])]
# Calculate mid price
best_bid = bids[0][0] if bids else 0
best_ask = asks[0][0] if asks else 0
mid_price = (best_bid + best_ask) / 2 if best_bid and best_ask else 0
# Calculate order book stats
bid_liquidity = sum(qty for _, qty in bids[:20])
ask_liquidity = sum(qty for _, qty in asks[:20])
total_liquidity = bid_liquidity + ask_liquidity
# Calculate imbalance
imbalance = (bid_liquidity - ask_liquidity) / total_liquidity if total_liquidity > 0 else 0
# Calculate spread in basis points
spread = (best_ask - best_bid) / mid_price * 10000 if mid_price > 0 else 0
# Create COB snapshot
cob_snapshot = {
'symbol': symbol,
'timestamp': int(time.time() * 1000),
'bids': bids[:50], # Limit to top 50 levels
'asks': asks[:50], # Limit to top 50 levels
'stats': {
'mid_price': mid_price,
'best_bid': best_bid,
'best_ask': best_ask,
'bid_liquidity': bid_liquidity,
'ask_liquidity': ask_liquidity,
'total_liquidity': total_liquidity,
'imbalance': imbalance,
'spread_bps': spread
}
}
# Store in cache
with self.subscriber_lock:
if not hasattr(self, 'cob_data_cache'):
self.cob_data_cache = {}
if symbol not in self.cob_data_cache:
self.cob_data_cache[symbol] = []
# Add to cache with max size limit
self.cob_data_cache[symbol].append(cob_snapshot)
if len(self.cob_data_cache[symbol]) > 300: # Keep 5 minutes of 1s data
self.cob_data_cache[symbol].pop(0)
# Notify subscribers
self._notify_cob_subscribers(symbol, cob_snapshot)
return cob_snapshot
else:
logger.warning(f"Failed to fetch COB data for {symbol}: {response.status_code}")
return {}
except Exception as e:
logger.debug(f"Error collecting COB data for {symbol}: {e}")
return {}
def start_cob_collection(self):
"""
Start COB data collection in background thread
"""
try:
import threading
import time
def cob_collector():
"""Collect COB data using REST API calls"""
logger.info("Starting centralized COB data collection")
while True:
try:
# Collect data for both symbols
for symbol in ['ETH/USDT', 'BTC/USDT']:
self.collect_cob_data(symbol)
# Sleep for 1 second between collections
time.sleep(1)
except Exception as e:
logger.debug(f"Error in COB collection: {e}")
time.sleep(5) # Wait longer on error
# Start collector in background thread
if not hasattr(self, '_cob_thread_started') or not self._cob_thread_started:
cob_thread = threading.Thread(target=cob_collector, daemon=True)
cob_thread.start()
self._cob_thread_started = True
logger.info("Centralized COB data collection started")
except Exception as e:
logger.error(f"Error starting COB collection: {e}")
def _notify_cob_subscribers(self, symbol: str, cob_snapshot: dict):
"""Notify subscribers of new COB data"""
with self.subscriber_lock:
if not hasattr(self, 'cob_subscribers'):
self.cob_subscribers = {}
# Notify all subscribers for this symbol
for subscriber_id, callback in self.cob_subscribers.items():
try:
callback(symbol, cob_snapshot)
except Exception as e:
logger.debug(f"Error notifying COB subscriber {subscriber_id}: {e}")
def subscribe_to_cob(self, callback) -> str:
"""Subscribe to COB data updates"""
with self.subscriber_lock:
if not hasattr(self, 'cob_subscribers'):
self.cob_subscribers = {}
subscriber_id = str(uuid.uuid4())
self.cob_subscribers[subscriber_id] = callback
# Start collection if not already started
self.start_cob_collection()
return subscriber_id
def get_latest_cob_data(self, symbol: str) -> dict:
"""Get latest COB data for a symbol"""
with self.subscriber_lock:
# Convert symbol to Binance format for cache lookup
binance_symbol = symbol.replace('/', '').upper()
logger.debug(f"Getting COB data for {symbol} (binance: {binance_symbol})")
if not hasattr(self, 'cob_data_cache'):
logger.debug("COB data cache not initialized")
return {}
if binance_symbol not in self.cob_data_cache:
logger.debug(f"Symbol {binance_symbol} not in COB cache. Available: {list(self.cob_data_cache.keys())}")
return {}
if not self.cob_data_cache[binance_symbol]:
logger.debug(f"COB cache for {binance_symbol} is empty")
return {}
latest_data = self.cob_data_cache[binance_symbol][-1]
logger.debug(f"Latest COB data type for {binance_symbol}: {type(latest_data)}")
return latest_data
def get_cob_data(self, symbol: str, count: int = 50) -> List[dict]:
"""Get recent COB data for a symbol"""
with self.subscriber_lock:
# Convert symbol to Binance format for cache lookup
binance_symbol = symbol.replace('/', '').upper()
if not hasattr(self, 'cob_data_cache') or binance_symbol not in self.cob_data_cache:
return []
# Return the most recent 'count' snapshots
return list(self.cob_data_cache[binance_symbol])[-count:]
def get_data_summary(self) -> dict:
"""Get summary of all collected data"""
summary = {
'symbols': self.symbols,
'subscribers': {
'tick_subscribers': len(self.subscribers),
'cob_subscribers': len(self.cob_data_callbacks),
'training_subscribers': len(self.training_data_callbacks),
'prediction_subscribers': len(self.model_prediction_callbacks)
},
'data_counts': {},
'collection_status': {
'cob_collection': self.cob_collection_active,
'training_collection': self.training_data_collection_active,
'streaming': self.is_streaming
}
}
# Add data counts for each symbol
for symbol in self.symbols:
binance_symbol = symbol.replace('/', '').upper()
summary['data_counts'][symbol] = {
'ticks': len(self.tick_buffers.get(binance_symbol, [])),
'cob_snapshots': len(self.cob_data_cache.get(binance_symbol, [])),
'training_samples': len(self.training_data_cache.get(binance_symbol, []))
}
return summary