Files
gogo2/core/simplified_data_integration.py
2025-07-26 22:17:29 +03:00

277 lines
11 KiB
Python

"""
Simplified Data Integration for Orchestrator
Replaces complex FIFO queues with simple cache-based data access.
Integrates with SmartDataUpdater for efficient data management.
"""
import logging
from datetime import datetime
from typing import Dict, List, Optional, Any
import pandas as pd
from .data_cache import get_data_cache
from .smart_data_updater import SmartDataUpdater
from .data_models import BaseDataInput, OHLCVBar
logger = logging.getLogger(__name__)
class SimplifiedDataIntegration:
"""
Simplified data integration that replaces FIFO queues with efficient caching
"""
def __init__(self, data_provider, symbols: List[str]):
self.data_provider = data_provider
self.symbols = symbols
self.cache = get_data_cache()
# Initialize smart data updater
self.data_updater = SmartDataUpdater(data_provider, symbols)
# Register for tick data if available
self._setup_tick_integration()
logger.info(f"SimplifiedDataIntegration initialized for {symbols}")
def start(self):
"""Start the data integration system"""
self.data_updater.start()
logger.info("SimplifiedDataIntegration started")
def stop(self):
"""Stop the data integration system"""
self.data_updater.stop()
logger.info("SimplifiedDataIntegration stopped")
def _setup_tick_integration(self):
"""Setup integration with tick data sources"""
try:
# Register callbacks for tick data if available
if hasattr(self.data_provider, 'register_tick_callback'):
self.data_provider.register_tick_callback(self._on_tick_data)
# Register for WebSocket data if available
if hasattr(self.data_provider, 'register_websocket_callback'):
self.data_provider.register_websocket_callback(self._on_websocket_data)
except Exception as e:
logger.warning(f"Tick integration setup failed: {e}")
def _on_tick_data(self, symbol: str, price: float, volume: float, timestamp: datetime = None):
"""Handle incoming tick data"""
self.data_updater.add_tick(symbol, price, volume, timestamp)
def _on_websocket_data(self, symbol: str, data: Dict[str, Any]):
"""Handle WebSocket data updates"""
try:
# Extract price and volume from WebSocket data
if 'price' in data and 'volume' in data:
self.data_updater.add_tick(symbol, data['price'], data['volume'])
except Exception as e:
logger.error(f"Error processing WebSocket data: {e}")
def build_base_data_input(self, symbol: str) -> Optional[BaseDataInput]:
"""
Build BaseDataInput from cached data (much simpler than FIFO queues)
Args:
symbol: Trading symbol
Returns:
BaseDataInput with consistent data structure
"""
try:
# Check if we have minimum required data
required_timeframes = ['1s', '1m', '1h', '1d']
missing_timeframes = []
for timeframe in required_timeframes:
if not self.cache.has_data(f'ohlcv_{timeframe}', symbol, max_age_seconds=300):
missing_timeframes.append(timeframe)
if missing_timeframes:
logger.warning(f"Missing data for {symbol}: {missing_timeframes}")
# Try to use historical data as fallback
if not self._try_historical_fallback(symbol, missing_timeframes):
return None
# Get current OHLCV data
ohlcv_1s_list = self._get_ohlcv_data_list(symbol, '1s', 300)
ohlcv_1m_list = self._get_ohlcv_data_list(symbol, '1m', 300)
ohlcv_1h_list = self._get_ohlcv_data_list(symbol, '1h', 300)
ohlcv_1d_list = self._get_ohlcv_data_list(symbol, '1d', 300)
# Get BTC reference data
btc_symbol = 'BTC/USDT'
btc_ohlcv_1s_list = self._get_ohlcv_data_list(btc_symbol, '1s', 300)
if not btc_ohlcv_1s_list:
# Use ETH data as fallback
btc_ohlcv_1s_list = ohlcv_1s_list
logger.debug(f"Using {symbol} data as BTC fallback")
# Get technical indicators
technical_indicators = self.cache.get('technical_indicators', symbol) or {}
# Get COB data if available
cob_data = self.cache.get('cob_data', symbol)
# Get recent model predictions
last_predictions = self._get_recent_predictions(symbol)
# Build BaseDataInput
base_data = BaseDataInput(
symbol=symbol,
timestamp=datetime.now(),
ohlcv_1s=ohlcv_1s_list,
ohlcv_1m=ohlcv_1m_list,
ohlcv_1h=ohlcv_1h_list,
ohlcv_1d=ohlcv_1d_list,
btc_ohlcv_1s=btc_ohlcv_1s_list,
technical_indicators=technical_indicators,
cob_data=cob_data,
last_predictions=last_predictions
)
# Validate the data
if not base_data.validate():
logger.warning(f"BaseDataInput validation failed for {symbol}")
return None
return base_data
except Exception as e:
logger.error(f"Error building BaseDataInput for {symbol}: {e}")
return None
def _get_ohlcv_data_list(self, symbol: str, timeframe: str, max_count: int) -> List[OHLCVBar]:
"""Get OHLCV data list from cache and historical data"""
try:
data_list = []
# Get historical data first
historical_df = self.cache.get_historical_data(symbol, timeframe)
if historical_df is not None and not historical_df.empty:
# Convert historical data to OHLCVBar objects
for idx, row in historical_df.tail(max_count - 1).iterrows():
bar = OHLCVBar(
symbol=symbol,
timestamp=idx if hasattr(idx, 'to_pydatetime') else datetime.now(),
open=float(row['open']),
high=float(row['high']),
low=float(row['low']),
close=float(row['close']),
volume=float(row['volume']),
timeframe=timeframe
)
data_list.append(bar)
# Add current data from cache
current_ohlcv = self.cache.get(f'ohlcv_{timeframe}', symbol)
if current_ohlcv and isinstance(current_ohlcv, OHLCVBar):
data_list.append(current_ohlcv)
# Ensure we have the right amount of data (pad if necessary)
while len(data_list) < max_count:
# Pad with the last available data or create dummy data
if data_list:
last_bar = data_list[-1]
dummy_bar = OHLCVBar(
symbol=symbol,
timestamp=last_bar.timestamp,
open=last_bar.close,
high=last_bar.close,
low=last_bar.close,
close=last_bar.close,
volume=0.0,
timeframe=timeframe
)
else:
# Create completely dummy data
dummy_bar = OHLCVBar(
symbol=symbol,
timestamp=datetime.now(),
open=0.0, high=0.0, low=0.0, close=0.0, volume=0.0,
timeframe=timeframe
)
data_list.append(dummy_bar)
return data_list[-max_count:] # Return last max_count items
except Exception as e:
logger.error(f"Error getting OHLCV data list for {symbol} {timeframe}: {e}")
return []
def _try_historical_fallback(self, symbol: str, missing_timeframes: List[str]) -> bool:
"""Try to use historical data for missing timeframes"""
try:
for timeframe in missing_timeframes:
historical_df = self.cache.get_historical_data(symbol, timeframe)
if historical_df is not None and not historical_df.empty:
# Use latest historical data as current data
latest_row = historical_df.iloc[-1]
ohlcv_bar = OHLCVBar(
symbol=symbol,
timestamp=historical_df.index[-1] if hasattr(historical_df.index[-1], 'to_pydatetime') else datetime.now(),
open=float(latest_row['open']),
high=float(latest_row['high']),
low=float(latest_row['low']),
close=float(latest_row['close']),
volume=float(latest_row['volume']),
timeframe=timeframe
)
self.cache.update(f'ohlcv_{timeframe}', symbol, ohlcv_bar, 'historical_fallback')
logger.info(f"Used historical fallback for {symbol} {timeframe}")
return True
except Exception as e:
logger.error(f"Error in historical fallback: {e}")
return False
def _get_recent_predictions(self, symbol: str) -> Dict[str, Any]:
"""Get recent model predictions"""
try:
predictions = {}
# Get predictions from cache
for model_type in ['cnn', 'rl', 'extrema']:
prediction_data = self.cache.get(f'prediction_{model_type}', symbol)
if prediction_data:
predictions[model_type] = prediction_data
return predictions
except Exception as e:
logger.error(f"Error getting recent predictions for {symbol}: {e}")
return {}
def update_model_prediction(self, model_name: str, symbol: str, prediction_data: Any):
"""Update model prediction in cache"""
self.cache.update(f'prediction_{model_name}', symbol, prediction_data, model_name)
def get_current_price(self, symbol: str) -> Optional[float]:
"""Get current price for a symbol"""
return self.data_updater.get_current_price(symbol)
def get_cache_status(self) -> Dict[str, Any]:
"""Get cache status for monitoring"""
return {
'cache_status': self.cache.get_status(),
'updater_status': self.data_updater.get_status()
}
def has_sufficient_data(self, symbol: str) -> bool:
"""Check if we have sufficient data for model predictions"""
required_data = ['ohlcv_1s', 'ohlcv_1m', 'ohlcv_1h', 'ohlcv_1d']
for data_type in required_data:
if not self.cache.has_data(data_type, symbol, max_age_seconds=300):
# Check historical data as fallback
timeframe = data_type.split('_')[1]
if not self.cache.has_historical_data(symbol, timeframe, min_bars=50):
return False
return True