277 lines
11 KiB
Python
277 lines
11 KiB
Python
"""
|
|
Simplified Data Integration for Orchestrator
|
|
|
|
Replaces complex FIFO queues with simple cache-based data access.
|
|
Integrates with SmartDataUpdater for efficient data management.
|
|
"""
|
|
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Any
|
|
import pandas as pd
|
|
|
|
from .data_cache import get_data_cache
|
|
from .smart_data_updater import SmartDataUpdater
|
|
from .data_models import BaseDataInput, OHLCVBar
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class SimplifiedDataIntegration:
|
|
"""
|
|
Simplified data integration that replaces FIFO queues with efficient caching
|
|
"""
|
|
|
|
def __init__(self, data_provider, symbols: List[str]):
|
|
self.data_provider = data_provider
|
|
self.symbols = symbols
|
|
self.cache = get_data_cache()
|
|
|
|
# Initialize smart data updater
|
|
self.data_updater = SmartDataUpdater(data_provider, symbols)
|
|
|
|
# Register for tick data if available
|
|
self._setup_tick_integration()
|
|
|
|
logger.info(f"SimplifiedDataIntegration initialized for {symbols}")
|
|
|
|
def start(self):
|
|
"""Start the data integration system"""
|
|
self.data_updater.start()
|
|
logger.info("SimplifiedDataIntegration started")
|
|
|
|
def stop(self):
|
|
"""Stop the data integration system"""
|
|
self.data_updater.stop()
|
|
logger.info("SimplifiedDataIntegration stopped")
|
|
|
|
def _setup_tick_integration(self):
|
|
"""Setup integration with tick data sources"""
|
|
try:
|
|
# Register callbacks for tick data if available
|
|
if hasattr(self.data_provider, 'register_tick_callback'):
|
|
self.data_provider.register_tick_callback(self._on_tick_data)
|
|
|
|
# Register for WebSocket data if available
|
|
if hasattr(self.data_provider, 'register_websocket_callback'):
|
|
self.data_provider.register_websocket_callback(self._on_websocket_data)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Tick integration setup failed: {e}")
|
|
|
|
def _on_tick_data(self, symbol: str, price: float, volume: float, timestamp: datetime = None):
|
|
"""Handle incoming tick data"""
|
|
self.data_updater.add_tick(symbol, price, volume, timestamp)
|
|
|
|
def _on_websocket_data(self, symbol: str, data: Dict[str, Any]):
|
|
"""Handle WebSocket data updates"""
|
|
try:
|
|
# Extract price and volume from WebSocket data
|
|
if 'price' in data and 'volume' in data:
|
|
self.data_updater.add_tick(symbol, data['price'], data['volume'])
|
|
except Exception as e:
|
|
logger.error(f"Error processing WebSocket data: {e}")
|
|
|
|
def build_base_data_input(self, symbol: str) -> Optional[BaseDataInput]:
|
|
"""
|
|
Build BaseDataInput from cached data (much simpler than FIFO queues)
|
|
|
|
Args:
|
|
symbol: Trading symbol
|
|
|
|
Returns:
|
|
BaseDataInput with consistent data structure
|
|
"""
|
|
try:
|
|
# Check if we have minimum required data
|
|
required_timeframes = ['1s', '1m', '1h', '1d']
|
|
missing_timeframes = []
|
|
|
|
for timeframe in required_timeframes:
|
|
if not self.cache.has_data(f'ohlcv_{timeframe}', symbol, max_age_seconds=300):
|
|
missing_timeframes.append(timeframe)
|
|
|
|
if missing_timeframes:
|
|
logger.warning(f"Missing data for {symbol}: {missing_timeframes}")
|
|
|
|
# Try to use historical data as fallback
|
|
if not self._try_historical_fallback(symbol, missing_timeframes):
|
|
return None
|
|
|
|
# Get current OHLCV data
|
|
ohlcv_1s_list = self._get_ohlcv_data_list(symbol, '1s', 300)
|
|
ohlcv_1m_list = self._get_ohlcv_data_list(symbol, '1m', 300)
|
|
ohlcv_1h_list = self._get_ohlcv_data_list(symbol, '1h', 300)
|
|
ohlcv_1d_list = self._get_ohlcv_data_list(symbol, '1d', 300)
|
|
|
|
# Get BTC reference data
|
|
btc_symbol = 'BTC/USDT'
|
|
btc_ohlcv_1s_list = self._get_ohlcv_data_list(btc_symbol, '1s', 300)
|
|
if not btc_ohlcv_1s_list:
|
|
# Use ETH data as fallback
|
|
btc_ohlcv_1s_list = ohlcv_1s_list
|
|
logger.debug(f"Using {symbol} data as BTC fallback")
|
|
|
|
# Get technical indicators
|
|
technical_indicators = self.cache.get('technical_indicators', symbol) or {}
|
|
|
|
# Get COB data if available
|
|
cob_data = self.cache.get('cob_data', symbol)
|
|
|
|
# Get recent model predictions
|
|
last_predictions = self._get_recent_predictions(symbol)
|
|
|
|
# Build BaseDataInput
|
|
base_data = BaseDataInput(
|
|
symbol=symbol,
|
|
timestamp=datetime.now(),
|
|
ohlcv_1s=ohlcv_1s_list,
|
|
ohlcv_1m=ohlcv_1m_list,
|
|
ohlcv_1h=ohlcv_1h_list,
|
|
ohlcv_1d=ohlcv_1d_list,
|
|
btc_ohlcv_1s=btc_ohlcv_1s_list,
|
|
technical_indicators=technical_indicators,
|
|
cob_data=cob_data,
|
|
last_predictions=last_predictions
|
|
)
|
|
|
|
# Validate the data
|
|
if not base_data.validate():
|
|
logger.warning(f"BaseDataInput validation failed for {symbol}")
|
|
return None
|
|
|
|
return base_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error building BaseDataInput for {symbol}: {e}")
|
|
return None
|
|
|
|
def _get_ohlcv_data_list(self, symbol: str, timeframe: str, max_count: int) -> List[OHLCVBar]:
|
|
"""Get OHLCV data list from cache and historical data"""
|
|
try:
|
|
data_list = []
|
|
|
|
# Get historical data first
|
|
historical_df = self.cache.get_historical_data(symbol, timeframe)
|
|
if historical_df is not None and not historical_df.empty:
|
|
# Convert historical data to OHLCVBar objects
|
|
for idx, row in historical_df.tail(max_count - 1).iterrows():
|
|
bar = OHLCVBar(
|
|
symbol=symbol,
|
|
timestamp=idx if hasattr(idx, 'to_pydatetime') else datetime.now(),
|
|
open=float(row['open']),
|
|
high=float(row['high']),
|
|
low=float(row['low']),
|
|
close=float(row['close']),
|
|
volume=float(row['volume']),
|
|
timeframe=timeframe
|
|
)
|
|
data_list.append(bar)
|
|
|
|
# Add current data from cache
|
|
current_ohlcv = self.cache.get(f'ohlcv_{timeframe}', symbol)
|
|
if current_ohlcv and isinstance(current_ohlcv, OHLCVBar):
|
|
data_list.append(current_ohlcv)
|
|
|
|
# Ensure we have the right amount of data (pad if necessary)
|
|
while len(data_list) < max_count:
|
|
# Pad with the last available data or create dummy data
|
|
if data_list:
|
|
last_bar = data_list[-1]
|
|
dummy_bar = OHLCVBar(
|
|
symbol=symbol,
|
|
timestamp=last_bar.timestamp,
|
|
open=last_bar.close,
|
|
high=last_bar.close,
|
|
low=last_bar.close,
|
|
close=last_bar.close,
|
|
volume=0.0,
|
|
timeframe=timeframe
|
|
)
|
|
else:
|
|
# Create completely dummy data
|
|
dummy_bar = OHLCVBar(
|
|
symbol=symbol,
|
|
timestamp=datetime.now(),
|
|
open=0.0, high=0.0, low=0.0, close=0.0, volume=0.0,
|
|
timeframe=timeframe
|
|
)
|
|
data_list.append(dummy_bar)
|
|
|
|
return data_list[-max_count:] # Return last max_count items
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting OHLCV data list for {symbol} {timeframe}: {e}")
|
|
return []
|
|
|
|
def _try_historical_fallback(self, symbol: str, missing_timeframes: List[str]) -> bool:
|
|
"""Try to use historical data for missing timeframes"""
|
|
try:
|
|
for timeframe in missing_timeframes:
|
|
historical_df = self.cache.get_historical_data(symbol, timeframe)
|
|
if historical_df is not None and not historical_df.empty:
|
|
# Use latest historical data as current data
|
|
latest_row = historical_df.iloc[-1]
|
|
ohlcv_bar = OHLCVBar(
|
|
symbol=symbol,
|
|
timestamp=historical_df.index[-1] if hasattr(historical_df.index[-1], 'to_pydatetime') else datetime.now(),
|
|
open=float(latest_row['open']),
|
|
high=float(latest_row['high']),
|
|
low=float(latest_row['low']),
|
|
close=float(latest_row['close']),
|
|
volume=float(latest_row['volume']),
|
|
timeframe=timeframe
|
|
)
|
|
|
|
self.cache.update(f'ohlcv_{timeframe}', symbol, ohlcv_bar, 'historical_fallback')
|
|
logger.info(f"Used historical fallback for {symbol} {timeframe}")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in historical fallback: {e}")
|
|
return False
|
|
|
|
def _get_recent_predictions(self, symbol: str) -> Dict[str, Any]:
|
|
"""Get recent model predictions"""
|
|
try:
|
|
predictions = {}
|
|
|
|
# Get predictions from cache
|
|
for model_type in ['cnn', 'rl', 'extrema']:
|
|
prediction_data = self.cache.get(f'prediction_{model_type}', symbol)
|
|
if prediction_data:
|
|
predictions[model_type] = prediction_data
|
|
|
|
return predictions
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting recent predictions for {symbol}: {e}")
|
|
return {}
|
|
|
|
def update_model_prediction(self, model_name: str, symbol: str, prediction_data: Any):
|
|
"""Update model prediction in cache"""
|
|
self.cache.update(f'prediction_{model_name}', symbol, prediction_data, model_name)
|
|
|
|
def get_current_price(self, symbol: str) -> Optional[float]:
|
|
"""Get current price for a symbol"""
|
|
return self.data_updater.get_current_price(symbol)
|
|
|
|
def get_cache_status(self) -> Dict[str, Any]:
|
|
"""Get cache status for monitoring"""
|
|
return {
|
|
'cache_status': self.cache.get_status(),
|
|
'updater_status': self.data_updater.get_status()
|
|
}
|
|
|
|
def has_sufficient_data(self, symbol: str) -> bool:
|
|
"""Check if we have sufficient data for model predictions"""
|
|
required_data = ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d']
|
|
|
|
for data_type in required_data:
|
|
if not self.cache.has_data(data_type, symbol, max_age_seconds=300):
|
|
# Check historical data as fallback
|
|
timeframe = data_type.split('_')[1]
|
|
if not self.cache.has_historical_data(symbol, timeframe, min_bars=50):
|
|
return False
|
|
|
|
return True |