cleanup and removed dummy data
This commit is contained in:
@ -224,6 +224,12 @@ class DataProvider:
|
||||
self.cob_data_cache[binance_symbol] = deque(maxlen=300) # 5 minutes of COB data
|
||||
self.training_data_cache[binance_symbol] = deque(maxlen=1000) # Training data buffer
|
||||
|
||||
# Pre-built OHLCV cache for instant BaseDataInput building (optimization from SimplifiedDataIntegration)
|
||||
self._ohlcv_cache = {} # {symbol: {timeframe: List[OHLCVBar]}}
|
||||
self._ohlcv_cache_lock = Lock()
|
||||
self._last_cache_update = {} # {symbol: {timeframe: datetime}}
|
||||
self._cache_refresh_interval = 5 # seconds
|
||||
|
||||
# Data collection threads
|
||||
self.data_collection_active = False
|
||||
|
||||
@ -1387,6 +1393,175 @@ class DataProvider:
|
||||
logger.error(f"Error applying pivot normalization for {symbol}: {e}")
|
||||
return df
|
||||
|
||||
def build_base_data_input(self, symbol: str) -> Optional['BaseDataInput']:
|
||||
"""
|
||||
Build BaseDataInput from cached data (optimized for speed)
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol
|
||||
|
||||
Returns:
|
||||
BaseDataInput with consistent data structure
|
||||
"""
|
||||
try:
|
||||
from .data_models import BaseDataInput
|
||||
|
||||
# Get OHLCV data directly from optimized cache (no validation checks for speed)
|
||||
ohlcv_1s_list = self._get_cached_ohlcv_bars(symbol, '1s', 300)
|
||||
ohlcv_1m_list = self._get_cached_ohlcv_bars(symbol, '1m', 300)
|
||||
ohlcv_1h_list = self._get_cached_ohlcv_bars(symbol, '1h', 300)
|
||||
ohlcv_1d_list = self._get_cached_ohlcv_bars(symbol, '1d', 300)
|
||||
|
||||
# Get BTC reference data
|
||||
btc_symbol = 'BTC/USDT'
|
||||
btc_ohlcv_1s_list = self._get_cached_ohlcv_bars(btc_symbol, '1s', 300)
|
||||
if not btc_ohlcv_1s_list:
|
||||
# Use ETH data as fallback
|
||||
btc_ohlcv_1s_list = ohlcv_1s_list
|
||||
|
||||
# Get cached data (fast lookups)
|
||||
technical_indicators = self._get_latest_technical_indicators(symbol)
|
||||
cob_data = self._get_latest_cob_data_object(symbol)
|
||||
last_predictions = {} # TODO: Implement model prediction caching
|
||||
|
||||
# Build BaseDataInput (no validation for speed - assume data is good)
|
||||
base_data = BaseDataInput(
|
||||
symbol=symbol,
|
||||
timestamp=datetime.now(),
|
||||
ohlcv_1s=ohlcv_1s_list,
|
||||
ohlcv_1m=ohlcv_1m_list,
|
||||
ohlcv_1h=ohlcv_1h_list,
|
||||
ohlcv_1d=ohlcv_1d_list,
|
||||
btc_ohlcv_1s=btc_ohlcv_1s_list,
|
||||
technical_indicators=technical_indicators,
|
||||
cob_data=cob_data,
|
||||
last_predictions=last_predictions
|
||||
)
|
||||
|
||||
return base_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error building BaseDataInput for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def _get_cached_ohlcv_bars(self, symbol: str, timeframe: str, max_count: int) -> List['OHLCVBar']:
|
||||
"""Get OHLCV data list from pre-built cache for instant access"""
|
||||
try:
|
||||
with self._ohlcv_cache_lock:
|
||||
cache_key = f"{symbol}_{timeframe}"
|
||||
|
||||
# Check if we have fresh cached data (updated within last 5 seconds)
|
||||
last_update = self._last_cache_update.get(cache_key)
|
||||
if (last_update and
|
||||
(datetime.now() - last_update).total_seconds() < self._cache_refresh_interval and
|
||||
cache_key in self._ohlcv_cache):
|
||||
|
||||
cached_data = self._ohlcv_cache[cache_key]
|
||||
return cached_data[-max_count:] if len(cached_data) >= max_count else cached_data
|
||||
|
||||
# Need to rebuild cache for this symbol/timeframe
|
||||
data_list = self._build_ohlcv_bar_cache(symbol, timeframe, max_count)
|
||||
|
||||
# Cache the result
|
||||
self._ohlcv_cache[cache_key] = data_list
|
||||
self._last_cache_update[cache_key] = datetime.now()
|
||||
|
||||
return data_list[-max_count:] if len(data_list) >= max_count else data_list
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting cached OHLCV bars for {symbol}/{timeframe}: {e}")
|
||||
return []
|
||||
|
||||
def _build_ohlcv_bar_cache(self, symbol: str, timeframe: str, max_count: int) -> List['OHLCVBar']:
|
||||
"""Build OHLCV bar cache from historical and current data"""
|
||||
try:
|
||||
from .data_models import OHLCVBar
|
||||
data_list = []
|
||||
|
||||
# Get historical data first (this should be fast as it's already cached)
|
||||
historical_df = self.get_historical_data(symbol, timeframe, limit=max_count)
|
||||
if historical_df is not None and not historical_df.empty:
|
||||
# Convert historical data to OHLCVBar objects
|
||||
for idx, row in historical_df.tail(max_count).iterrows():
|
||||
bar = OHLCVBar(
|
||||
symbol=symbol,
|
||||
timestamp=idx if hasattr(idx, 'to_pydatetime') else datetime.now(),
|
||||
open=float(row['open']),
|
||||
high=float(row['high']),
|
||||
low=float(row['low']),
|
||||
close=float(row['close']),
|
||||
volume=float(row['volume']),
|
||||
timeframe=timeframe
|
||||
)
|
||||
data_list.append(bar)
|
||||
|
||||
return data_list
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error building OHLCV bar cache for {symbol}/{timeframe}: {e}")
|
||||
return []
|
||||
|
||||
def _get_latest_technical_indicators(self, symbol: str) -> Dict[str, float]:
|
||||
"""Get latest technical indicators for a symbol"""
|
||||
try:
|
||||
# Get latest data and calculate indicators
|
||||
df = self.get_historical_data(symbol, '1h', limit=50)
|
||||
if df is not None and not df.empty:
|
||||
df_with_indicators = self._add_technical_indicators(df)
|
||||
if not df_with_indicators.empty:
|
||||
# Return the latest indicators as a dict
|
||||
latest_row = df_with_indicators.iloc[-1]
|
||||
indicators = {}
|
||||
for col in df_with_indicators.columns:
|
||||
if col not in ['open', 'high', 'low', 'close', 'volume', 'timestamp']:
|
||||
indicators[col] = float(latest_row[col]) if pd.notna(latest_row[col]) else 0.0
|
||||
return indicators
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting technical indicators for {symbol}: {e}")
|
||||
return {}
|
||||
|
||||
def _get_latest_cob_data_object(self, symbol: str) -> Optional['COBData']:
|
||||
"""Get latest COB data as COBData object"""
|
||||
try:
|
||||
from .data_models import COBData
|
||||
|
||||
# Get latest COB data from cache
|
||||
cob_data = self.get_latest_cob_data(symbol)
|
||||
if cob_data and 'current_price' in cob_data:
|
||||
return COBData(
|
||||
symbol=symbol,
|
||||
timestamp=datetime.now(),
|
||||
current_price=cob_data['current_price'],
|
||||
bucket_size=1.0 if 'ETH' in symbol else 10.0,
|
||||
price_buckets=cob_data.get('price_buckets', {}),
|
||||
bid_ask_imbalance=cob_data.get('bid_ask_imbalance', {}),
|
||||
volume_weighted_prices=cob_data.get('volume_weighted_prices', {}),
|
||||
order_flow_metrics=cob_data.get('order_flow_metrics', {}),
|
||||
ma_1s_imbalance=cob_data.get('ma_1s_imbalance', {}),
|
||||
ma_5s_imbalance=cob_data.get('ma_5s_imbalance', {}),
|
||||
ma_15s_imbalance=cob_data.get('ma_15s_imbalance', {}),
|
||||
ma_60s_imbalance=cob_data.get('ma_60s_imbalance', {})
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting COB data object for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def invalidate_ohlcv_cache(self, symbol: str):
|
||||
"""Invalidate OHLCV cache for a symbol when new data arrives"""
|
||||
try:
|
||||
with self._ohlcv_cache_lock:
|
||||
# Remove cached data for all timeframes of this symbol
|
||||
keys_to_remove = [key for key in self._ohlcv_cache.keys() if key.startswith(f"{symbol}_")]
|
||||
for key in keys_to_remove:
|
||||
if key in self._ohlcv_cache:
|
||||
del self._ohlcv_cache[key]
|
||||
if key in self._last_cache_update:
|
||||
del self._last_cache_update[key]
|
||||
except Exception as e:
|
||||
logger.error(f"Error invalidating OHLCV cache for {symbol}: {e}")
|
||||
|
||||
def _add_basic_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Add basic indicators for small datasets"""
|
||||
try:
|
||||
|
Reference in New Issue
Block a user