try fixing COB MA and COB data quality

This commit is contained in:
Dobromir Popov
2025-08-09 23:03:45 +03:00
parent 87193f3d6f
commit 31a41785d6
5 changed files with 174 additions and 95 deletions

View File

@ -1059,20 +1059,43 @@ class DataProvider:
return df
def get_historical_data(self, symbol: str, timeframe: str, limit: int = 1000, refresh: bool = False) -> Optional[pd.DataFrame]:
"""Get historical OHLCV data from cache only - no external API calls"""
"""Get historical OHLCV data.
- Prefer cached data for low latency.
- If cache is empty or refresh=True, fetch real data from exchanges.
- Never generate synthetic data.
"""
try:
# Only return cached data - never trigger external API calls
# Serve from cache when available
if symbol in self.cached_data and timeframe in self.cached_data[symbol]:
cached_df = self.cached_data[symbol][timeframe]
if not cached_df.empty:
# Return requested amount from cached data
if not cached_df.empty and not refresh:
return cached_df.tail(limit)
logger.warning(f"No cached data available for {symbol} {timeframe}")
# Cache empty or refresh requested: fetch real data now
df = self._fetch_from_binance(symbol, timeframe, limit)
if (df is None or df.empty):
df = self._fetch_from_mexc(symbol, timeframe, limit)
if df is not None and not df.empty:
df = self._ensure_datetime_index(df)
# Store/merge into cache
if symbol not in self.cached_data:
self.cached_data[symbol] = {}
if timeframe not in self.cached_data[symbol] or self.cached_data[symbol][timeframe].empty:
self.cached_data[symbol][timeframe] = df.tail(1500)
else:
combined_df = pd.concat([self.cached_data[symbol][timeframe], df], ignore_index=False)
combined_df = combined_df[~combined_df.index.duplicated(keep='last')]
combined_df = combined_df.sort_index()
self.cached_data[symbol][timeframe] = combined_df.tail(1500)
logger.info(f"Cached {len(self.cached_data[symbol][timeframe])} candles for {symbol} {timeframe}")
return self.cached_data[symbol][timeframe].tail(limit)
logger.warning(f"No real data available for {symbol} {timeframe} at request time")
return None
except Exception as e:
logger.error(f"Error getting cached data for {symbol} {timeframe}: {e}")
logger.error(f"Error getting historical data for {symbol} {timeframe}: {e}")
return None

View File

@ -271,41 +271,70 @@ class StandardizedDataProvider(DataProvider):
with self.ma_calculation_lock:
# Add current imbalance data to history
self.cob_imbalance_history[symbol].append((timestamp, bid_ask_imbalance))
# Calculate MAs for different timeframes
ma_results = {'1s': {}, '5s': {}, '15s': {}, '60s': {}}
# Get current price for ±5 bucket calculation
current_price = self.current_prices.get(symbol.replace('/', '').upper(), 0.0)
if current_price <= 0:
return ma_results
bucket_size = 1.0 if 'ETH' in symbol else 10.0
# Helper: quantize any floating price to the nearest COB bucket center used in snapshots
def quantize_to_bucket(p: float) -> float:
try:
# Align bucket to integer multiples of bucket_size around the rounded current price
base = round(current_price / bucket_size) * bucket_size
steps = round((p - base) / bucket_size)
return base + steps * bucket_size
except Exception:
return p
# Calculate MAs for ±5 buckets around current price
for i in range(-5, 6):
price = current_price + (i * bucket_size)
raw_price = current_price + (i * bucket_size)
price = quantize_to_bucket(raw_price)
if price <= 0:
continue
# Get historical imbalance data for this price bucket
history = self.cob_imbalance_history[symbol]
# Calculate different MA periods
for period, period_name in [(1, '1s'), (5, '5s'), (15, '15s'), (60, '60s')]:
recent_data = []
cutoff_time = timestamp - timedelta(seconds=period)
for hist_timestamp, hist_imbalance in history:
if hist_timestamp >= cutoff_time and price in hist_imbalance:
if hist_timestamp < cutoff_time:
continue
# Attempt exact price key match; if not found, match nearest bucket key
if price in hist_imbalance:
recent_data.append(hist_imbalance[price])
else:
# Find nearest key within half a bucket
try:
nearest_key = None
min_diff = bucket_size / 2.0
for k in hist_imbalance.keys():
diff = abs(float(k) - price)
if diff <= min_diff:
min_diff = diff
nearest_key = k
if nearest_key is not None:
recent_data.append(hist_imbalance[nearest_key])
except Exception:
pass
# Calculate moving average
if recent_data:
ma_results[period_name][price] = sum(recent_data) / len(recent_data)
ma_results[period_name][price] = float(sum(recent_data) / len(recent_data))
else:
# Respect rule: no synthetic metadata; use 0.0 for unavailable
ma_results[period_name][price] = 0.0
return ma_results
except Exception as e: