scalping dash also works initially
This commit is contained in:
@ -7,6 +7,7 @@ This enhanced orchestrator implements:
|
||||
3. Multi-symbol (ETH, BTC) coordinated decision making
|
||||
4. Perfect move marking for CNN backpropagation training
|
||||
5. Market environment adaptation through RL evaluation
|
||||
6. Universal data format compliance (5 timeseries streams)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@ -22,6 +23,8 @@ import torch
|
||||
|
||||
from .config import get_config
|
||||
from .data_provider import DataProvider
|
||||
from .universal_data_adapter import UniversalDataAdapter, UniversalDataStream
|
||||
from .realtime_tick_processor import RealTimeTickProcessor, ProcessedTickFeatures, integrate_with_orchestrator
|
||||
from models import get_model_registry, ModelInterface, CNNModelInterface, RLAgentInterface
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -70,6 +73,7 @@ class MarketState:
|
||||
volume: float
|
||||
trend_strength: float
|
||||
market_regime: str # 'trending', 'ranging', 'volatile'
|
||||
universal_data: UniversalDataStream # Universal format data
|
||||
|
||||
@dataclass
|
||||
class PerfectMove:
|
||||
@ -86,6 +90,7 @@ class PerfectMove:
|
||||
class EnhancedTradingOrchestrator:
|
||||
"""
|
||||
Enhanced orchestrator with sophisticated multi-modal decision making
|
||||
and universal data format compliance
|
||||
"""
|
||||
|
||||
def __init__(self, data_provider: DataProvider = None):
|
||||
@ -94,6 +99,15 @@ class EnhancedTradingOrchestrator:
|
||||
self.data_provider = data_provider or DataProvider()
|
||||
self.model_registry = get_model_registry()
|
||||
|
||||
# Initialize universal data adapter
|
||||
self.universal_adapter = UniversalDataAdapter(self.data_provider)
|
||||
|
||||
# Initialize real-time tick processor for ultra-low latency processing
|
||||
self.tick_processor = RealTimeTickProcessor(symbols=self.config.symbols)
|
||||
|
||||
# Real-time tick features storage
|
||||
self.realtime_tick_features = {symbol: deque(maxlen=100) for symbol in self.config.symbols}
|
||||
|
||||
# Multi-symbol configuration
|
||||
self.symbols = self.config.symbols
|
||||
self.timeframes = self.config.timeframes
|
||||
@ -123,22 +137,28 @@ class EnhancedTradingOrchestrator:
|
||||
self.decision_callbacks = []
|
||||
self.learning_callbacks = []
|
||||
|
||||
logger.info("Enhanced TradingOrchestrator initialized")
|
||||
# Integrate tick processor with orchestrator
|
||||
integrate_with_orchestrator(self, self.tick_processor)
|
||||
|
||||
logger.info("Enhanced TradingOrchestrator initialized with Universal Data Format")
|
||||
logger.info(f"Symbols: {self.symbols}")
|
||||
logger.info(f"Timeframes: {self.timeframes}")
|
||||
logger.info(f"Universal format: ETH ticks, 1m, 1h, 1d + BTC reference ticks")
|
||||
logger.info(f"Enhanced confidence threshold: {self.confidence_threshold}")
|
||||
logger.info("Real-time tick processor integrated for ultra-low latency processing")
|
||||
|
||||
def _initialize_timeframe_weights(self) -> Dict[str, float]:
|
||||
"""Initialize weights for different timeframes"""
|
||||
# Higher timeframes get more weight for trend direction
|
||||
# Lower timeframes get more weight for entry/exit timing
|
||||
base_weights = {
|
||||
'1m': 0.05, # Noise filtering
|
||||
'1s': 0.60, # Primary scalping signal (ticks)
|
||||
'1m': 0.20, # Short-term confirmation
|
||||
'5m': 0.10, # Short-term momentum
|
||||
'15m': 0.15, # Entry/exit timing
|
||||
'1h': 0.25, # Medium-term trend
|
||||
'1h': 0.15, # Medium-term trend
|
||||
'4h': 0.25, # Stronger trend confirmation
|
||||
'1d': 0.20 # Long-term direction
|
||||
'1d': 0.05 # Long-term direction (minimal for scalping)
|
||||
}
|
||||
|
||||
# Normalize weights for configured timeframes
|
||||
@ -163,19 +183,42 @@ class EnhancedTradingOrchestrator:
|
||||
|
||||
async def make_coordinated_decisions(self) -> Dict[str, Optional[TradingAction]]:
|
||||
"""
|
||||
Make coordinated trading decisions across all symbols
|
||||
Make coordinated trading decisions across all symbols using universal data format
|
||||
"""
|
||||
decisions = {}
|
||||
|
||||
try:
|
||||
# Get market states for all symbols
|
||||
market_states = await self._get_all_market_states()
|
||||
# Get universal data stream (5 timeseries)
|
||||
universal_stream = self.universal_adapter.get_universal_data_stream()
|
||||
|
||||
if universal_stream is None:
|
||||
logger.warning("Failed to get universal data stream")
|
||||
return decisions
|
||||
|
||||
# Validate universal format
|
||||
is_valid, issues = self.universal_adapter.validate_universal_format(universal_stream)
|
||||
if not is_valid:
|
||||
logger.warning(f"Universal data format validation failed: {issues}")
|
||||
return decisions
|
||||
|
||||
logger.info("UNIVERSAL DATA STREAM ACTIVE:")
|
||||
logger.info(f" ETH ticks: {len(universal_stream.eth_ticks)} samples")
|
||||
logger.info(f" ETH 1m: {len(universal_stream.eth_1m)} candles")
|
||||
logger.info(f" ETH 1h: {len(universal_stream.eth_1h)} candles")
|
||||
logger.info(f" ETH 1d: {len(universal_stream.eth_1d)} candles")
|
||||
logger.info(f" BTC reference: {len(universal_stream.btc_ticks)} samples")
|
||||
logger.info(f" Data quality: {universal_stream.metadata['data_quality']['overall_score']:.2f}")
|
||||
|
||||
# Get market states for all symbols using universal data
|
||||
market_states = await self._get_all_market_states_universal(universal_stream)
|
||||
|
||||
# Get enhanced predictions for all symbols
|
||||
symbol_predictions = {}
|
||||
for symbol in self.symbols:
|
||||
if symbol in market_states:
|
||||
predictions = await self._get_enhanced_predictions(symbol, market_states[symbol])
|
||||
predictions = await self._get_enhanced_predictions_universal(
|
||||
symbol, market_states[symbol], universal_stream
|
||||
)
|
||||
symbol_predictions[symbol] = predictions
|
||||
|
||||
# Coordinate decisions considering symbol correlations
|
||||
@ -198,76 +241,125 @@ class EnhancedTradingOrchestrator:
|
||||
|
||||
return decisions
|
||||
|
||||
async def _get_all_market_states(self) -> Dict[str, MarketState]:
|
||||
"""Get current market state for all symbols"""
|
||||
async def _get_all_market_states_universal(self, universal_stream: UniversalDataStream) -> Dict[str, MarketState]:
|
||||
"""Get current market state for all symbols using universal data format"""
|
||||
market_states = {}
|
||||
|
||||
for symbol in self.symbols:
|
||||
try:
|
||||
# Get current market data for all timeframes
|
||||
prices = {}
|
||||
features = {}
|
||||
try:
|
||||
# Create market state for ETH/USDT (primary trading pair)
|
||||
if 'ETH/USDT' in self.symbols:
|
||||
eth_prices = {}
|
||||
eth_features = {}
|
||||
|
||||
for timeframe in self.timeframes:
|
||||
# Get current price
|
||||
current_price = self.data_provider.get_current_price(symbol)
|
||||
if current_price:
|
||||
prices[timeframe] = current_price
|
||||
|
||||
# Get feature matrix for this timeframe
|
||||
feature_matrix = self.data_provider.get_feature_matrix(
|
||||
symbol=symbol,
|
||||
timeframes=[timeframe],
|
||||
window_size=20 # Standard window
|
||||
)
|
||||
if feature_matrix is not None:
|
||||
features[timeframe] = feature_matrix
|
||||
# Extract prices from universal stream
|
||||
if len(universal_stream.eth_ticks) > 0:
|
||||
eth_prices['1s'] = float(universal_stream.eth_ticks[-1, 4]) # Close price from ticks
|
||||
if len(universal_stream.eth_1m) > 0:
|
||||
eth_prices['1m'] = float(universal_stream.eth_1m[-1, 4]) # Close price from 1m
|
||||
if len(universal_stream.eth_1h) > 0:
|
||||
eth_prices['1h'] = float(universal_stream.eth_1h[-1, 4]) # Close price from 1h
|
||||
if len(universal_stream.eth_1d) > 0:
|
||||
eth_prices['1d'] = float(universal_stream.eth_1d[-1, 4]) # Close price from 1d
|
||||
|
||||
if prices and features:
|
||||
# Calculate market metrics
|
||||
volatility = self._calculate_volatility(symbol)
|
||||
volume = self._get_current_volume(symbol)
|
||||
trend_strength = self._calculate_trend_strength(symbol)
|
||||
market_regime = self._determine_market_regime(symbol)
|
||||
|
||||
market_state = MarketState(
|
||||
symbol=symbol,
|
||||
timestamp=datetime.now(),
|
||||
prices=prices,
|
||||
features=features,
|
||||
volatility=volatility,
|
||||
volume=volume,
|
||||
trend_strength=trend_strength,
|
||||
market_regime=market_regime
|
||||
)
|
||||
|
||||
market_states[symbol] = market_state
|
||||
|
||||
# Store for historical tracking
|
||||
self.market_states[symbol].append(market_state)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting market state for {symbol}: {e}")
|
||||
# Extract features from universal stream (OHLCV data)
|
||||
eth_features['1s'] = universal_stream.eth_ticks[:, 1:] if universal_stream.eth_ticks.shape[1] > 5 else universal_stream.eth_ticks
|
||||
eth_features['1m'] = universal_stream.eth_1m[:, 1:] if universal_stream.eth_1m.shape[1] > 5 else universal_stream.eth_1m
|
||||
eth_features['1h'] = universal_stream.eth_1h[:, 1:] if universal_stream.eth_1h.shape[1] > 5 else universal_stream.eth_1h
|
||||
eth_features['1d'] = universal_stream.eth_1d[:, 1:] if universal_stream.eth_1d.shape[1] > 5 else universal_stream.eth_1d
|
||||
|
||||
# Calculate market metrics
|
||||
volatility = self._calculate_volatility_from_universal('ETH/USDT', universal_stream)
|
||||
volume = self._get_current_volume_from_universal('ETH/USDT', universal_stream)
|
||||
trend_strength = self._calculate_trend_strength_from_universal('ETH/USDT', universal_stream)
|
||||
market_regime = self._determine_market_regime_from_universal('ETH/USDT', universal_stream)
|
||||
|
||||
eth_market_state = MarketState(
|
||||
symbol='ETH/USDT',
|
||||
timestamp=universal_stream.timestamp,
|
||||
prices=eth_prices,
|
||||
features=eth_features,
|
||||
volatility=volatility,
|
||||
volume=volume,
|
||||
trend_strength=trend_strength,
|
||||
market_regime=market_regime,
|
||||
universal_data=universal_stream
|
||||
)
|
||||
|
||||
market_states['ETH/USDT'] = eth_market_state
|
||||
self.market_states['ETH/USDT'].append(eth_market_state)
|
||||
|
||||
# Create market state for BTC/USDT (reference pair)
|
||||
if 'BTC/USDT' in self.symbols:
|
||||
btc_prices = {}
|
||||
btc_features = {}
|
||||
|
||||
# Extract BTC reference data
|
||||
if len(universal_stream.btc_ticks) > 0:
|
||||
btc_prices['1s'] = float(universal_stream.btc_ticks[-1, 4]) # Close price from BTC ticks
|
||||
|
||||
btc_features['1s'] = universal_stream.btc_ticks[:, 1:] if universal_stream.btc_ticks.shape[1] > 5 else universal_stream.btc_ticks
|
||||
|
||||
# Calculate BTC metrics
|
||||
btc_volatility = self._calculate_volatility_from_universal('BTC/USDT', universal_stream)
|
||||
btc_volume = self._get_current_volume_from_universal('BTC/USDT', universal_stream)
|
||||
btc_trend_strength = self._calculate_trend_strength_from_universal('BTC/USDT', universal_stream)
|
||||
btc_market_regime = self._determine_market_regime_from_universal('BTC/USDT', universal_stream)
|
||||
|
||||
btc_market_state = MarketState(
|
||||
symbol='BTC/USDT',
|
||||
timestamp=universal_stream.timestamp,
|
||||
prices=btc_prices,
|
||||
features=btc_features,
|
||||
volatility=btc_volatility,
|
||||
volume=btc_volume,
|
||||
trend_strength=btc_trend_strength,
|
||||
market_regime=btc_market_regime,
|
||||
universal_data=universal_stream
|
||||
)
|
||||
|
||||
market_states['BTC/USDT'] = btc_market_state
|
||||
self.market_states['BTC/USDT'].append(btc_market_state)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating market states from universal data: {e}")
|
||||
|
||||
return market_states
|
||||
|
||||
async def _get_enhanced_predictions(self, symbol: str, market_state: MarketState) -> List[EnhancedPrediction]:
|
||||
"""Get enhanced predictions with timeframe breakdown"""
|
||||
async def _get_enhanced_predictions_universal(self, symbol: str, market_state: MarketState,
|
||||
universal_stream: UniversalDataStream) -> List[EnhancedPrediction]:
|
||||
"""Get enhanced predictions using universal data format"""
|
||||
predictions = []
|
||||
|
||||
for model_name, model in self.model_registry.models.items():
|
||||
try:
|
||||
if isinstance(model, CNNModelInterface):
|
||||
# Get CNN predictions for each timeframe
|
||||
# Format universal data for CNN model
|
||||
cnn_data = self.universal_adapter.format_for_model(universal_stream, 'cnn')
|
||||
|
||||
# Get CNN predictions for each timeframe using universal data
|
||||
timeframe_predictions = []
|
||||
|
||||
for timeframe in self.timeframes:
|
||||
if timeframe in market_state.features:
|
||||
feature_matrix = market_state.features[timeframe]
|
||||
|
||||
# Get timeframe-specific prediction
|
||||
action_probs, confidence = await self._get_timeframe_prediction(
|
||||
model, feature_matrix, timeframe, market_state
|
||||
# ETH timeframes (primary trading pair)
|
||||
if symbol == 'ETH/USDT':
|
||||
timeframe_data_map = {
|
||||
'1s': cnn_data.get('eth_ticks'),
|
||||
'1m': cnn_data.get('eth_1m'),
|
||||
'1h': cnn_data.get('eth_1h'),
|
||||
'1d': cnn_data.get('eth_1d')
|
||||
}
|
||||
# BTC reference
|
||||
elif symbol == 'BTC/USDT':
|
||||
timeframe_data_map = {
|
||||
'1s': cnn_data.get('btc_ticks')
|
||||
}
|
||||
else:
|
||||
continue
|
||||
|
||||
for timeframe, feature_matrix in timeframe_data_map.items():
|
||||
if feature_matrix is not None and len(feature_matrix) > 0:
|
||||
# Get timeframe-specific prediction using universal data
|
||||
action_probs, confidence = await self._get_timeframe_prediction_universal(
|
||||
model, feature_matrix, timeframe, market_state, universal_stream
|
||||
)
|
||||
|
||||
if action_probs is not None:
|
||||
@ -285,7 +377,8 @@ class EnhancedTradingOrchestrator:
|
||||
market_features={
|
||||
'volatility': market_state.volatility,
|
||||
'volume': market_state.volume,
|
||||
'trend_strength': market_state.trend_strength
|
||||
'trend_strength': market_state.trend_strength,
|
||||
'data_quality': universal_stream.metadata['data_quality']['overall_score']
|
||||
}
|
||||
)
|
||||
timeframe_predictions.append(tf_prediction)
|
||||
@ -305,7 +398,9 @@ class EnhancedTradingOrchestrator:
|
||||
timestamp=datetime.now(),
|
||||
metadata={
|
||||
'market_regime': market_state.market_regime,
|
||||
'symbol_correlation': self._get_symbol_correlation(symbol)
|
||||
'symbol_correlation': self._get_symbol_correlation(symbol),
|
||||
'universal_data_quality': universal_stream.metadata['data_quality'],
|
||||
'data_freshness': universal_stream.metadata['data_freshness']
|
||||
}
|
||||
)
|
||||
predictions.append(enhanced_pred)
|
||||
@ -315,9 +410,10 @@ class EnhancedTradingOrchestrator:
|
||||
|
||||
return predictions
|
||||
|
||||
async def _get_timeframe_prediction(self, model: CNNModelInterface, feature_matrix: np.ndarray,
|
||||
timeframe: str, market_state: MarketState) -> Tuple[Optional[np.ndarray], float]:
|
||||
"""Get prediction for specific timeframe with enhanced context"""
|
||||
async def _get_timeframe_prediction_universal(self, model: CNNModelInterface, feature_matrix: np.ndarray,
|
||||
timeframe: str, market_state: MarketState,
|
||||
universal_stream: UniversalDataStream) -> Tuple[Optional[np.ndarray], float]:
|
||||
"""Get prediction for specific timeframe using universal data format"""
|
||||
try:
|
||||
# Check if model supports timeframe-specific prediction
|
||||
if hasattr(model, 'predict_timeframe'):
|
||||
@ -326,9 +422,9 @@ class EnhancedTradingOrchestrator:
|
||||
action_probs, confidence = model.predict(feature_matrix)
|
||||
|
||||
if action_probs is not None and confidence is not None:
|
||||
# Enhance confidence based on market conditions
|
||||
enhanced_confidence = self._enhance_confidence_with_context(
|
||||
confidence, timeframe, market_state
|
||||
# Enhance confidence based on universal data quality and market conditions
|
||||
enhanced_confidence = self._enhance_confidence_with_universal_context(
|
||||
confidence, timeframe, market_state, universal_stream
|
||||
)
|
||||
return action_probs, enhanced_confidence
|
||||
|
||||
@ -337,20 +433,39 @@ class EnhancedTradingOrchestrator:
|
||||
|
||||
return None, 0.0
|
||||
|
||||
def _enhance_confidence_with_context(self, base_confidence: float, timeframe: str,
|
||||
market_state: MarketState) -> float:
|
||||
"""Enhance confidence score based on market context"""
|
||||
def _enhance_confidence_with_universal_context(self, base_confidence: float, timeframe: str,
|
||||
market_state: MarketState,
|
||||
universal_stream: UniversalDataStream) -> float:
|
||||
"""Enhance confidence score based on universal data context"""
|
||||
enhanced = base_confidence
|
||||
|
||||
# Adjust based on data quality from universal stream
|
||||
data_quality = universal_stream.metadata['data_quality']['overall_score']
|
||||
enhanced *= data_quality
|
||||
|
||||
# Adjust based on data freshness
|
||||
freshness = universal_stream.metadata.get('data_freshness', {})
|
||||
if timeframe in ['1s', '1m']:
|
||||
# For short timeframes, penalize stale data more heavily
|
||||
eth_freshness = freshness.get(f'eth_{timeframe}', 0)
|
||||
if eth_freshness > 60: # More than 1 minute old
|
||||
enhanced *= 0.8
|
||||
|
||||
# Adjust based on market regime
|
||||
if market_state.market_regime == 'trending':
|
||||
enhanced *= 1.1 # More confident in trending markets
|
||||
elif market_state.market_regime == 'volatile':
|
||||
enhanced *= 0.8 # Less confident in volatile markets
|
||||
|
||||
# Adjust based on timeframe reliability
|
||||
# Adjust based on timeframe reliability for scalping
|
||||
timeframe_reliability = {
|
||||
'1m': 0.7, '5m': 0.8, '15m': 0.9, '1h': 1.0, '4h': 1.1, '1d': 1.2
|
||||
'1s': 1.0, # Primary scalping timeframe
|
||||
'1m': 0.9, # Short-term confirmation
|
||||
'5m': 0.8, # Short-term momentum
|
||||
'15m': 0.9, # Entry/exit timing
|
||||
'1h': 0.8, # Medium-term trend
|
||||
'4h': 0.7, # Longer-term (less relevant for scalping)
|
||||
'1d': 0.6 # Long-term direction (minimal for scalping)
|
||||
}
|
||||
enhanced *= timeframe_reliability.get(timeframe, 1.0)
|
||||
|
||||
@ -360,6 +475,18 @@ class EnhancedTradingOrchestrator:
|
||||
elif market_state.volume < 0.5: # Low volume
|
||||
enhanced *= 0.9
|
||||
|
||||
# Adjust based on correlation with BTC (for ETH trades)
|
||||
if market_state.symbol == 'ETH/USDT' and len(universal_stream.btc_ticks) > 1:
|
||||
# Check ETH-BTC correlation strength
|
||||
eth_momentum = (universal_stream.eth_ticks[-1, 4] - universal_stream.eth_ticks[-2, 4]) / universal_stream.eth_ticks[-2, 4]
|
||||
btc_momentum = (universal_stream.btc_ticks[-1, 4] - universal_stream.btc_ticks[-2, 4]) / universal_stream.btc_ticks[-2, 4]
|
||||
|
||||
# If ETH and BTC are moving in same direction, increase confidence
|
||||
if (eth_momentum > 0 and btc_momentum > 0) or (eth_momentum < 0 and btc_momentum < 0):
|
||||
enhanced *= 1.05
|
||||
else:
|
||||
enhanced *= 0.95
|
||||
|
||||
return min(enhanced, 1.0) # Cap at 1.0
|
||||
|
||||
def _combine_timeframe_predictions(self, timeframe_predictions: List[TimeframePrediction],
|
||||
@ -524,7 +651,7 @@ class EnhancedTradingOrchestrator:
|
||||
initial_state = evaluation_item['market_state_before']
|
||||
|
||||
# Get current market state for comparison
|
||||
current_market_states = await self._get_all_market_states()
|
||||
current_market_states = await self._get_all_market_states_universal(self.universal_adapter.get_universal_data_stream())
|
||||
current_state = current_market_states.get(action.symbol)
|
||||
|
||||
if current_state:
|
||||
@ -625,38 +752,165 @@ class EnhancedTradingOrchestrator:
|
||||
except Exception as e:
|
||||
logger.error(f"Error marking perfect move: {e}")
|
||||
|
||||
def get_recent_perfect_moves(self, limit: int = 10) -> List[PerfectMove]:
|
||||
"""Get recent perfect moves for display/monitoring"""
|
||||
return list(self.perfect_moves)[-limit:]
|
||||
|
||||
async def queue_action_for_evaluation(self, action: TradingAction):
|
||||
"""Queue a trading action for future RL evaluation"""
|
||||
try:
|
||||
# Get current market state
|
||||
market_states = await self._get_all_market_states_universal(self.universal_adapter.get_universal_data_stream())
|
||||
if action.symbol in market_states:
|
||||
evaluation_item = {
|
||||
'action': action,
|
||||
'market_state_before': market_states[action.symbol],
|
||||
'timestamp': datetime.now()
|
||||
}
|
||||
self.rl_evaluation_queue.append(evaluation_item)
|
||||
logger.debug(f"Queued action for RL evaluation: {action.action} {action.symbol}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error queuing action for evaluation: {e}")
|
||||
|
||||
def get_perfect_moves_for_training(self, symbol: str = None, timeframe: str = None,
|
||||
limit: int = 1000) -> List[PerfectMove]:
|
||||
"""Get perfect moves for CNN training"""
|
||||
moves = list(self.perfect_moves)
|
||||
|
||||
# Filter by symbol if specified
|
||||
if symbol:
|
||||
moves = [m for m in moves if m.symbol == symbol]
|
||||
moves = [move for move in moves if move.symbol == symbol]
|
||||
|
||||
# Filter by timeframe if specified
|
||||
if timeframe:
|
||||
moves = [m for m in moves if m.timeframe == timeframe]
|
||||
moves = [move for move in moves if move.timeframe == timeframe]
|
||||
|
||||
return moves[-limit:] if limit else moves
|
||||
return moves[-limit:] # Return most recent moves
|
||||
|
||||
# Helper methods for market analysis
|
||||
# Helper methods for market analysis using universal data
|
||||
def _calculate_volatility_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> float:
|
||||
"""Calculate current volatility for symbol using universal data"""
|
||||
try:
|
||||
if symbol == 'ETH/USDT' and len(universal_stream.eth_ticks) > 10:
|
||||
# Calculate volatility from tick data
|
||||
prices = universal_stream.eth_ticks[-10:, 4] # Last 10 close prices
|
||||
returns = np.diff(prices) / prices[:-1]
|
||||
volatility = np.std(returns) * np.sqrt(86400) # Annualized volatility
|
||||
return float(volatility)
|
||||
elif symbol == 'BTC/USDT' and len(universal_stream.btc_ticks) > 10:
|
||||
# Calculate volatility from BTC tick data
|
||||
prices = universal_stream.btc_ticks[-10:, 4] # Last 10 close prices
|
||||
returns = np.diff(prices) / prices[:-1]
|
||||
volatility = np.std(returns) * np.sqrt(86400) # Annualized volatility
|
||||
return float(volatility)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating volatility from universal data: {e}")
|
||||
|
||||
return 0.02 # Default 2% volatility
|
||||
|
||||
def _get_current_volume_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> float:
|
||||
"""Get current volume ratio compared to average using universal data"""
|
||||
try:
|
||||
if symbol == 'ETH/USDT':
|
||||
# Use 1m data for volume analysis
|
||||
if len(universal_stream.eth_1m) > 10:
|
||||
volumes = universal_stream.eth_1m[-10:, 5] # Last 10 volume values
|
||||
current_volume = universal_stream.eth_1m[-1, 5]
|
||||
avg_volume = np.mean(volumes[:-1])
|
||||
if avg_volume > 0:
|
||||
return float(current_volume / avg_volume)
|
||||
elif symbol == 'BTC/USDT':
|
||||
# Use BTC tick data for volume analysis
|
||||
if len(universal_stream.btc_ticks) > 10:
|
||||
volumes = universal_stream.btc_ticks[-10:, 5] # Last 10 volume values
|
||||
current_volume = universal_stream.btc_ticks[-1, 5]
|
||||
avg_volume = np.mean(volumes[:-1])
|
||||
if avg_volume > 0:
|
||||
return float(current_volume / avg_volume)
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating volume from universal data: {e}")
|
||||
|
||||
return 1.0 # Normal volume
|
||||
|
||||
def _calculate_trend_strength_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> float:
|
||||
"""Calculate trend strength using universal data"""
|
||||
try:
|
||||
if symbol == 'ETH/USDT':
|
||||
# Use multiple timeframes to determine trend strength
|
||||
trend_scores = []
|
||||
|
||||
# Check 1m trend
|
||||
if len(universal_stream.eth_1m) > 20:
|
||||
prices = universal_stream.eth_1m[-20:, 4] # Last 20 close prices
|
||||
slope = np.polyfit(range(len(prices)), prices, 1)[0]
|
||||
trend_scores.append(abs(slope) / np.mean(prices))
|
||||
|
||||
# Check 1h trend
|
||||
if len(universal_stream.eth_1h) > 10:
|
||||
prices = universal_stream.eth_1h[-10:, 4] # Last 10 close prices
|
||||
slope = np.polyfit(range(len(prices)), prices, 1)[0]
|
||||
trend_scores.append(abs(slope) / np.mean(prices))
|
||||
|
||||
if trend_scores:
|
||||
return float(np.mean(trend_scores))
|
||||
|
||||
elif symbol == 'BTC/USDT':
|
||||
# Use BTC tick data for trend analysis
|
||||
if len(universal_stream.btc_ticks) > 20:
|
||||
prices = universal_stream.btc_ticks[-20:, 4] # Last 20 close prices
|
||||
slope = np.polyfit(range(len(prices)), prices, 1)[0]
|
||||
return float(abs(slope) / np.mean(prices))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating trend strength from universal data: {e}")
|
||||
|
||||
return 0.5 # Moderate trend
|
||||
|
||||
def _determine_market_regime_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> str:
|
||||
"""Determine current market regime using universal data"""
|
||||
try:
|
||||
if symbol == 'ETH/USDT':
|
||||
# Analyze volatility and trend from multiple timeframes
|
||||
volatility = self._calculate_volatility_from_universal(symbol, universal_stream)
|
||||
trend_strength = self._calculate_trend_strength_from_universal(symbol, universal_stream)
|
||||
|
||||
# Determine regime based on volatility and trend
|
||||
if volatility > 0.05: # High volatility
|
||||
return 'volatile'
|
||||
elif trend_strength > 0.002: # Strong trend
|
||||
return 'trending'
|
||||
else:
|
||||
return 'ranging'
|
||||
|
||||
elif symbol == 'BTC/USDT':
|
||||
# Analyze BTC regime
|
||||
volatility = self._calculate_volatility_from_universal(symbol, universal_stream)
|
||||
|
||||
if volatility > 0.04: # High volatility for BTC
|
||||
return 'volatile'
|
||||
else:
|
||||
return 'trending' # Default for BTC
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error determining market regime from universal data: {e}")
|
||||
|
||||
return 'trending' # Default regime
|
||||
|
||||
# Legacy helper methods (kept for compatibility)
|
||||
def _calculate_volatility(self, symbol: str) -> float:
|
||||
"""Calculate current volatility for symbol"""
|
||||
# Placeholder - implement based on your data provider
|
||||
"""Calculate current volatility for symbol (legacy method)"""
|
||||
return 0.02 # 2% default volatility
|
||||
|
||||
def _get_current_volume(self, symbol: str) -> float:
|
||||
"""Get current volume ratio compared to average"""
|
||||
# Placeholder - implement based on your data provider
|
||||
"""Get current volume ratio compared to average (legacy method)"""
|
||||
return 1.0 # Normal volume
|
||||
|
||||
def _calculate_trend_strength(self, symbol: str) -> float:
|
||||
"""Calculate trend strength (0 = no trend, 1 = strong trend)"""
|
||||
# Placeholder - implement based on your data provider
|
||||
"""Calculate trend strength (legacy method)"""
|
||||
return 0.5 # Moderate trend
|
||||
|
||||
def _determine_market_regime(self, symbol: str) -> str:
|
||||
"""Determine current market regime"""
|
||||
# Placeholder - implement based on your analysis
|
||||
"""Determine current market regime (legacy method)"""
|
||||
return 'trending' # Default to trending
|
||||
|
||||
def _get_symbol_correlation(self, symbol: str) -> Dict[str, float]:
|
||||
@ -697,6 +951,47 @@ class EnhancedTradingOrchestrator:
|
||||
|
||||
return np.array(state_components, dtype=np.float32)
|
||||
|
||||
def process_realtime_features(self, feature_dict: Dict[str, Any]):
|
||||
"""Process real-time tick features from the tick processor"""
|
||||
try:
|
||||
symbol = feature_dict['symbol']
|
||||
|
||||
# Store the features
|
||||
if symbol in self.realtime_tick_features:
|
||||
self.realtime_tick_features[symbol].append(feature_dict)
|
||||
|
||||
# Log high-confidence features
|
||||
if feature_dict['confidence'] > 0.8:
|
||||
logger.info(f"High-confidence tick features for {symbol}: confidence={feature_dict['confidence']:.3f}")
|
||||
|
||||
# Trigger immediate decision if we have very high confidence features
|
||||
if feature_dict['confidence'] > 0.9:
|
||||
logger.info(f"Ultra-high confidence tick signal for {symbol} - triggering immediate analysis")
|
||||
# Could trigger immediate decision making here
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing real-time features: {e}")
|
||||
|
||||
async def start_realtime_processing(self):
|
||||
"""Start real-time tick processing"""
|
||||
try:
|
||||
await self.tick_processor.start_processing()
|
||||
logger.info("Real-time tick processing started")
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting real-time tick processing: {e}")
|
||||
|
||||
async def stop_realtime_processing(self):
|
||||
"""Stop real-time tick processing"""
|
||||
try:
|
||||
await self.tick_processor.stop_processing()
|
||||
logger.info("Real-time tick processing stopped")
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping real-time tick processing: {e}")
|
||||
|
||||
def get_realtime_tick_stats(self) -> Dict[str, Any]:
|
||||
"""Get real-time tick processing statistics"""
|
||||
return self.tick_processor.get_processing_stats()
|
||||
|
||||
def get_performance_metrics(self) -> Dict[str, Any]:
|
||||
"""Get performance metrics for dashboard compatibility"""
|
||||
total_actions = sum(len(actions) for actions in self.recent_actions.values())
|
||||
@ -706,6 +1001,9 @@ class EnhancedTradingOrchestrator:
|
||||
win_rate = 0.78 # 78% win rate
|
||||
total_pnl = 247.85 # Strong positive P&L from 500x leverage
|
||||
|
||||
# Add tick processing stats
|
||||
tick_stats = self.get_realtime_tick_stats()
|
||||
|
||||
return {
|
||||
'total_actions': total_actions,
|
||||
'perfect_moves': perfect_moves_count,
|
||||
@ -716,5 +1014,57 @@ class EnhancedTradingOrchestrator:
|
||||
'confidence_threshold': self.confidence_threshold,
|
||||
'decision_frequency': self.decision_frequency,
|
||||
'leverage': '500x', # Ultra-fast scalping
|
||||
'primary_timeframe': '1s' # Main scalping timeframe
|
||||
}
|
||||
'primary_timeframe': '1s', # Main scalping timeframe
|
||||
'tick_processing': tick_stats # Real-time tick processing stats
|
||||
}
|
||||
|
||||
def analyze_market_conditions(self, symbol: str) -> Dict[str, Any]:
|
||||
"""Analyze current market conditions for a given symbol"""
|
||||
try:
|
||||
# Get basic market data
|
||||
data = self.data_provider.get_historical_data(symbol, '1m', limit=50)
|
||||
|
||||
if data is None or data.empty:
|
||||
return {
|
||||
'status': 'no_data',
|
||||
'symbol': symbol,
|
||||
'analysis': 'No market data available'
|
||||
}
|
||||
|
||||
# Basic market analysis
|
||||
current_price = data['close'].iloc[-1]
|
||||
price_change = (current_price - data['close'].iloc[-2]) / data['close'].iloc[-2] * 100
|
||||
|
||||
# Volatility calculation
|
||||
volatility = data['close'].pct_change().std() * 100
|
||||
|
||||
# Volume analysis
|
||||
avg_volume = data['volume'].mean()
|
||||
current_volume = data['volume'].iloc[-1]
|
||||
volume_ratio = current_volume / avg_volume if avg_volume > 0 else 1.0
|
||||
|
||||
# Trend analysis
|
||||
ma_short = data['close'].rolling(10).mean().iloc[-1]
|
||||
ma_long = data['close'].rolling(30).mean().iloc[-1]
|
||||
trend = 'bullish' if ma_short > ma_long else 'bearish'
|
||||
|
||||
return {
|
||||
'status': 'success',
|
||||
'symbol': symbol,
|
||||
'current_price': current_price,
|
||||
'price_change': price_change,
|
||||
'volatility': volatility,
|
||||
'volume_ratio': volume_ratio,
|
||||
'trend': trend,
|
||||
'analysis': f"{symbol} is {trend} with {volatility:.2f}% volatility",
|
||||
'timestamp': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing market conditions for {symbol}: {e}")
|
||||
return {
|
||||
'status': 'error',
|
||||
'symbol': symbol,
|
||||
'error': str(e),
|
||||
'analysis': f'Error analyzing {symbol}'
|
||||
}
|
BIN
core/prediction_tracker.py
Normal file
BIN
core/prediction_tracker.py
Normal file
Binary file not shown.
649
core/realtime_tick_processor.py
Normal file
649
core/realtime_tick_processor.py
Normal file
@ -0,0 +1,649 @@
|
||||
"""
|
||||
Real-Time Tick Processing Neural Network Module
|
||||
|
||||
This module acts as a Neural Network DPS (Data Processing System) alternative,
|
||||
processing raw tick data with ultra-low latency and feeding processed features
|
||||
to trading models in real-time.
|
||||
|
||||
Features:
|
||||
- Real-time tick ingestion with volume processing
|
||||
- Neural network feature extraction from tick streams
|
||||
- Ultra-low latency processing (sub-millisecond)
|
||||
- Volume-weighted price analysis
|
||||
- Microstructure pattern detection
|
||||
- Real-time feature streaming to models
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple, Any, Deque
|
||||
from collections import deque
|
||||
from threading import Thread, Lock
|
||||
import websockets
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class TickData:
|
||||
"""Raw tick data structure"""
|
||||
timestamp: datetime
|
||||
price: float
|
||||
volume: float
|
||||
side: str # 'buy' or 'sell'
|
||||
trade_id: Optional[str] = None
|
||||
|
||||
@dataclass
|
||||
class ProcessedTickFeatures:
|
||||
"""Processed tick features for model consumption"""
|
||||
timestamp: datetime
|
||||
price_features: np.ndarray # Price-based features
|
||||
volume_features: np.ndarray # Volume-based features
|
||||
microstructure_features: np.ndarray # Market microstructure features
|
||||
neural_features: np.ndarray # Neural network extracted features
|
||||
confidence: float # Feature quality confidence
|
||||
|
||||
class TickProcessingNN(nn.Module):
|
||||
"""
|
||||
Neural Network for real-time tick processing
|
||||
Extracts high-level features from raw tick data
|
||||
"""
|
||||
|
||||
def __init__(self, input_size: int = 9, hidden_size: int = 128, output_size: int = 64):
|
||||
super(TickProcessingNN, self).__init__()
|
||||
|
||||
# Tick sequence processing layers
|
||||
self.tick_encoder = nn.Sequential(
|
||||
nn.Linear(input_size, hidden_size),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.1),
|
||||
nn.Linear(hidden_size, hidden_size),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.1)
|
||||
)
|
||||
|
||||
# LSTM for temporal patterns
|
||||
self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True, num_layers=2)
|
||||
|
||||
# Attention mechanism for important tick selection
|
||||
self.attention = nn.MultiheadAttention(hidden_size, num_heads=8, batch_first=True)
|
||||
|
||||
# Feature extraction heads
|
||||
self.price_head = nn.Linear(hidden_size, 16) # Price pattern features
|
||||
self.volume_head = nn.Linear(hidden_size, 16) # Volume pattern features
|
||||
self.microstructure_head = nn.Linear(hidden_size, 16) # Microstructure features
|
||||
|
||||
# Final feature fusion
|
||||
self.feature_fusion = nn.Sequential(
|
||||
nn.Linear(48, output_size), # 16+16+16 = 48
|
||||
nn.ReLU(),
|
||||
nn.Linear(output_size, output_size)
|
||||
)
|
||||
|
||||
# Confidence estimation
|
||||
self.confidence_head = nn.Sequential(
|
||||
nn.Linear(output_size, 32),
|
||||
nn.ReLU(),
|
||||
nn.Linear(32, 1),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
def forward(self, tick_sequence: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Process tick sequence and extract features
|
||||
|
||||
Args:
|
||||
tick_sequence: [batch, sequence_length, features]
|
||||
|
||||
Returns:
|
||||
features: [batch, output_size] - extracted features
|
||||
confidence: [batch, 1] - feature confidence
|
||||
"""
|
||||
batch_size, seq_len, _ = tick_sequence.shape
|
||||
|
||||
# Encode each tick
|
||||
encoded = self.tick_encoder(tick_sequence) # [batch, seq_len, hidden_size]
|
||||
|
||||
# LSTM processing for temporal patterns
|
||||
lstm_out, _ = self.lstm(encoded) # [batch, seq_len, hidden_size]
|
||||
|
||||
# Attention to focus on important ticks
|
||||
attended, _ = self.attention(lstm_out, lstm_out, lstm_out) # [batch, seq_len, hidden_size]
|
||||
|
||||
# Use the last attended output
|
||||
final_features = attended[:, -1, :] # [batch, hidden_size]
|
||||
|
||||
# Extract specialized features
|
||||
price_features = self.price_head(final_features)
|
||||
volume_features = self.volume_head(final_features)
|
||||
microstructure_features = self.microstructure_head(final_features)
|
||||
|
||||
# Fuse all features
|
||||
combined_features = torch.cat([price_features, volume_features, microstructure_features], dim=1)
|
||||
final_features = self.feature_fusion(combined_features)
|
||||
|
||||
# Estimate confidence
|
||||
confidence = self.confidence_head(final_features)
|
||||
|
||||
return final_features, confidence
|
||||
|
||||
class RealTimeTickProcessor:
|
||||
"""
|
||||
Real-time tick processing system with neural network feature extraction
|
||||
Acts as a DPS alternative for ultra-low latency tick processing
|
||||
"""
|
||||
|
||||
def __init__(self, symbols: List[str] = None, tick_buffer_size: int = 1000):
|
||||
"""Initialize the real-time tick processor"""
|
||||
self.symbols = symbols or ['ETH/USDT', 'BTC/USDT']
|
||||
self.tick_buffer_size = tick_buffer_size
|
||||
|
||||
# Tick storage buffers
|
||||
self.tick_buffers: Dict[str, Deque[TickData]] = {}
|
||||
self.processed_features: Dict[str, Deque[ProcessedTickFeatures]] = {}
|
||||
|
||||
# Initialize buffers for each symbol
|
||||
for symbol in self.symbols:
|
||||
self.tick_buffers[symbol] = deque(maxlen=tick_buffer_size)
|
||||
self.processed_features[symbol] = deque(maxlen=100) # Keep last 100 processed features
|
||||
|
||||
# Neural network for feature extraction
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
self.tick_nn = TickProcessingNN(input_size=9).to(self.device)
|
||||
self.tick_nn.eval() # Start in evaluation mode
|
||||
|
||||
# Processing parameters
|
||||
self.processing_window = 50 # Number of ticks to process at once
|
||||
self.min_ticks_for_processing = 10 # Minimum ticks before processing
|
||||
|
||||
# Real-time streaming
|
||||
self.streaming = False
|
||||
self.websocket_tasks = {}
|
||||
self.processing_threads = {}
|
||||
|
||||
# Performance tracking
|
||||
self.processing_times = deque(maxlen=1000)
|
||||
self.tick_counts = {symbol: 0 for symbol in self.symbols}
|
||||
|
||||
# Thread safety
|
||||
self.data_lock = Lock()
|
||||
|
||||
# Feature subscribers (models that want real-time features)
|
||||
self.feature_subscribers = []
|
||||
|
||||
logger.info(f"RealTimeTickProcessor initialized for symbols: {self.symbols}")
|
||||
logger.info(f"Neural network device: {self.device}")
|
||||
logger.info(f"Tick buffer size: {tick_buffer_size}")
|
||||
|
||||
def add_feature_subscriber(self, callback):
|
||||
"""Add a callback function to receive processed features"""
|
||||
self.feature_subscribers.append(callback)
|
||||
logger.info(f"Added feature subscriber: {callback.__name__}")
|
||||
|
||||
def remove_feature_subscriber(self, callback):
|
||||
"""Remove a feature subscriber"""
|
||||
if callback in self.feature_subscribers:
|
||||
self.feature_subscribers.remove(callback)
|
||||
logger.info(f"Removed feature subscriber: {callback.__name__}")
|
||||
|
||||
async def start_processing(self):
|
||||
"""Start real-time tick processing"""
|
||||
logger.info("Starting real-time tick processing...")
|
||||
self.streaming = True
|
||||
|
||||
# Start WebSocket streams for each symbol
|
||||
for symbol in self.symbols:
|
||||
task = asyncio.create_task(self._websocket_stream(symbol))
|
||||
self.websocket_tasks[symbol] = task
|
||||
|
||||
# Start processing thread for each symbol
|
||||
thread = Thread(target=self._processing_loop, args=(symbol,), daemon=True)
|
||||
thread.start()
|
||||
self.processing_threads[symbol] = thread
|
||||
|
||||
logger.info("Real-time tick processing started")
|
||||
|
||||
async def stop_processing(self):
|
||||
"""Stop real-time tick processing"""
|
||||
logger.info("Stopping real-time tick processing...")
|
||||
self.streaming = False
|
||||
|
||||
# Cancel WebSocket tasks
|
||||
for symbol, task in self.websocket_tasks.items():
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
self.websocket_tasks.clear()
|
||||
logger.info("Real-time tick processing stopped")
|
||||
|
||||
async def _websocket_stream(self, symbol: str):
|
||||
"""WebSocket stream for real-time tick data"""
|
||||
binance_symbol = symbol.replace('/', '').lower()
|
||||
url = f"wss://stream.binance.com:9443/ws/{binance_symbol}@trade"
|
||||
|
||||
while self.streaming:
|
||||
try:
|
||||
async with websockets.connect(url) as websocket:
|
||||
logger.info(f"Tick WebSocket connected for {symbol}")
|
||||
|
||||
async for message in websocket:
|
||||
if not self.streaming:
|
||||
break
|
||||
|
||||
try:
|
||||
data = json.loads(message)
|
||||
await self._process_raw_tick(symbol, data)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing tick for {symbol}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket error for {symbol}: {e}")
|
||||
if self.streaming:
|
||||
logger.info(f"Reconnecting tick WebSocket for {symbol} in 2 seconds...")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
async def _process_raw_tick(self, symbol: str, raw_data: Dict):
|
||||
"""Process raw tick data from WebSocket"""
|
||||
try:
|
||||
# Extract tick information
|
||||
tick = TickData(
|
||||
timestamp=datetime.fromtimestamp(int(raw_data['T']) / 1000),
|
||||
price=float(raw_data['p']),
|
||||
volume=float(raw_data['q']),
|
||||
side='buy' if raw_data['m'] == False else 'sell', # m=true means buyer is market maker (sell)
|
||||
trade_id=raw_data.get('t')
|
||||
)
|
||||
|
||||
# Add to buffer
|
||||
with self.data_lock:
|
||||
self.tick_buffers[symbol].append(tick)
|
||||
self.tick_counts[symbol] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing raw tick for {symbol}: {e}")
|
||||
|
||||
def _processing_loop(self, symbol: str):
|
||||
"""Main processing loop for a symbol"""
|
||||
logger.info(f"Starting processing loop for {symbol}")
|
||||
|
||||
while self.streaming:
|
||||
try:
|
||||
# Check if we have enough ticks to process
|
||||
with self.data_lock:
|
||||
tick_count = len(self.tick_buffers[symbol])
|
||||
|
||||
if tick_count >= self.min_ticks_for_processing:
|
||||
start_time = time.time()
|
||||
|
||||
# Process ticks
|
||||
features = self._extract_neural_features(symbol)
|
||||
|
||||
if features is not None:
|
||||
# Store processed features
|
||||
with self.data_lock:
|
||||
self.processed_features[symbol].append(features)
|
||||
|
||||
# Notify subscribers
|
||||
self._notify_feature_subscribers(symbol, features)
|
||||
|
||||
# Track processing time
|
||||
processing_time = (time.time() - start_time) * 1000 # Convert to ms
|
||||
self.processing_times.append(processing_time)
|
||||
|
||||
if len(self.processing_times) % 100 == 0:
|
||||
avg_time = np.mean(list(self.processing_times))
|
||||
logger.info(f"Average processing time: {avg_time:.2f}ms")
|
||||
|
||||
# Small sleep to prevent CPU overload
|
||||
time.sleep(0.001) # 1ms sleep for ultra-low latency
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in processing loop for {symbol}: {e}")
|
||||
time.sleep(0.01) # Longer sleep on error
|
||||
|
||||
def _extract_neural_features(self, symbol: str) -> Optional[ProcessedTickFeatures]:
|
||||
"""Extract neural network features from recent ticks"""
|
||||
try:
|
||||
with self.data_lock:
|
||||
# Get recent ticks
|
||||
recent_ticks = list(self.tick_buffers[symbol])[-self.processing_window:]
|
||||
|
||||
if len(recent_ticks) < self.min_ticks_for_processing:
|
||||
return None
|
||||
|
||||
# Convert ticks to neural network input
|
||||
tick_features = self._ticks_to_features(recent_ticks)
|
||||
|
||||
# Process with neural network
|
||||
with torch.no_grad():
|
||||
tick_tensor = torch.FloatTensor(tick_features).unsqueeze(0).to(self.device)
|
||||
neural_features, confidence = self.tick_nn(tick_tensor)
|
||||
|
||||
neural_features = neural_features.cpu().numpy().flatten()
|
||||
confidence = confidence.cpu().numpy().item()
|
||||
|
||||
# Extract traditional features
|
||||
price_features = self._extract_price_features(recent_ticks)
|
||||
volume_features = self._extract_volume_features(recent_ticks)
|
||||
microstructure_features = self._extract_microstructure_features(recent_ticks)
|
||||
|
||||
# Create processed features object
|
||||
processed = ProcessedTickFeatures(
|
||||
timestamp=recent_ticks[-1].timestamp,
|
||||
price_features=price_features,
|
||||
volume_features=volume_features,
|
||||
microstructure_features=microstructure_features,
|
||||
neural_features=neural_features,
|
||||
confidence=confidence
|
||||
)
|
||||
|
||||
return processed
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting neural features for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def _ticks_to_features(self, ticks: List[TickData]) -> np.ndarray:
|
||||
"""Convert tick data to neural network input features"""
|
||||
features = []
|
||||
|
||||
for i, tick in enumerate(ticks):
|
||||
tick_features = [
|
||||
tick.price,
|
||||
tick.volume,
|
||||
1.0 if tick.side == 'buy' else 0.0, # Buy/sell indicator
|
||||
tick.timestamp.timestamp(), # Timestamp
|
||||
]
|
||||
|
||||
# Add relative features if we have previous ticks
|
||||
if i > 0:
|
||||
prev_tick = ticks[i-1]
|
||||
price_change = (tick.price - prev_tick.price) / prev_tick.price
|
||||
volume_ratio = tick.volume / (prev_tick.volume + 1e-8)
|
||||
time_delta = (tick.timestamp - prev_tick.timestamp).total_seconds()
|
||||
|
||||
tick_features.extend([
|
||||
price_change,
|
||||
volume_ratio,
|
||||
time_delta
|
||||
])
|
||||
else:
|
||||
tick_features.extend([0.0, 1.0, 0.0]) # Default values for first tick
|
||||
|
||||
# Add moving averages if we have enough data
|
||||
if i >= 5:
|
||||
recent_prices = [t.price for t in ticks[max(0, i-4):i+1]]
|
||||
recent_volumes = [t.volume for t in ticks[max(0, i-4):i+1]]
|
||||
|
||||
price_ma = np.mean(recent_prices)
|
||||
volume_ma = np.mean(recent_volumes)
|
||||
|
||||
tick_features.extend([
|
||||
(tick.price - price_ma) / price_ma, # Price deviation from MA
|
||||
(tick.volume - volume_ma) / (volume_ma + 1e-8) # Volume deviation from MA
|
||||
])
|
||||
else:
|
||||
tick_features.extend([0.0, 0.0])
|
||||
|
||||
features.append(tick_features)
|
||||
|
||||
# Pad or truncate to fixed size
|
||||
target_length = self.processing_window
|
||||
if len(features) < target_length:
|
||||
# Pad with zeros
|
||||
padding = [[0.0] * len(features[0])] * (target_length - len(features))
|
||||
features = padding + features
|
||||
elif len(features) > target_length:
|
||||
# Take the most recent ticks
|
||||
features = features[-target_length:]
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
def _extract_price_features(self, ticks: List[TickData]) -> np.ndarray:
|
||||
"""Extract price-based features"""
|
||||
prices = np.array([tick.price for tick in ticks])
|
||||
|
||||
features = [
|
||||
prices[-1], # Current price
|
||||
np.mean(prices), # Average price
|
||||
np.std(prices), # Price volatility
|
||||
np.max(prices), # High
|
||||
np.min(prices), # Low
|
||||
(prices[-1] - prices[0]) / prices[0] if prices[0] != 0 else 0, # Total return
|
||||
]
|
||||
|
||||
# Price momentum features
|
||||
if len(prices) >= 10:
|
||||
short_ma = np.mean(prices[-5:])
|
||||
long_ma = np.mean(prices[-10:])
|
||||
momentum = (short_ma - long_ma) / long_ma if long_ma != 0 else 0
|
||||
features.append(momentum)
|
||||
else:
|
||||
features.append(0.0)
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
def _extract_volume_features(self, ticks: List[TickData]) -> np.ndarray:
|
||||
"""Extract volume-based features"""
|
||||
volumes = np.array([tick.volume for tick in ticks])
|
||||
buy_volumes = np.array([tick.volume for tick in ticks if tick.side == 'buy'])
|
||||
sell_volumes = np.array([tick.volume for tick in ticks if tick.side == 'sell'])
|
||||
|
||||
features = [
|
||||
np.sum(volumes), # Total volume
|
||||
np.mean(volumes), # Average volume
|
||||
np.std(volumes), # Volume volatility
|
||||
np.sum(buy_volumes) if len(buy_volumes) > 0 else 0, # Buy volume
|
||||
np.sum(sell_volumes) if len(sell_volumes) > 0 else 0, # Sell volume
|
||||
]
|
||||
|
||||
# Volume imbalance
|
||||
total_buy = np.sum(buy_volumes) if len(buy_volumes) > 0 else 0
|
||||
total_sell = np.sum(sell_volumes) if len(sell_volumes) > 0 else 0
|
||||
total_volume = total_buy + total_sell
|
||||
|
||||
if total_volume > 0:
|
||||
buy_ratio = total_buy / total_volume
|
||||
volume_imbalance = buy_ratio - 0.5 # -0.5 to 0.5 range
|
||||
else:
|
||||
volume_imbalance = 0.0
|
||||
|
||||
features.append(volume_imbalance)
|
||||
|
||||
# VWAP (Volume Weighted Average Price)
|
||||
if np.sum(volumes) > 0:
|
||||
prices = np.array([tick.price for tick in ticks])
|
||||
vwap = np.sum(prices * volumes) / np.sum(volumes)
|
||||
current_price = ticks[-1].price
|
||||
vwap_deviation = (current_price - vwap) / vwap if vwap != 0 else 0
|
||||
else:
|
||||
vwap_deviation = 0.0
|
||||
|
||||
features.append(vwap_deviation)
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
def _extract_microstructure_features(self, ticks: List[TickData]) -> np.ndarray:
|
||||
"""Extract market microstructure features"""
|
||||
features = []
|
||||
|
||||
# Trade frequency
|
||||
if len(ticks) >= 2:
|
||||
time_deltas = [(ticks[i].timestamp - ticks[i-1].timestamp).total_seconds()
|
||||
for i in range(1, len(ticks))]
|
||||
avg_time_delta = np.mean(time_deltas)
|
||||
trade_frequency = 1.0 / avg_time_delta if avg_time_delta > 0 else 0
|
||||
else:
|
||||
trade_frequency = 0.0
|
||||
|
||||
features.append(trade_frequency)
|
||||
|
||||
# Price impact features
|
||||
prices = [tick.price for tick in ticks]
|
||||
volumes = [tick.volume for tick in ticks]
|
||||
|
||||
if len(prices) >= 3:
|
||||
# Calculate price changes and corresponding volumes
|
||||
price_changes = [(prices[i] - prices[i-1]) / prices[i-1]
|
||||
for i in range(1, len(prices)) if prices[i-1] != 0]
|
||||
corresponding_volumes = volumes[1:len(price_changes)+1]
|
||||
|
||||
if len(price_changes) > 0 and len(corresponding_volumes) > 0:
|
||||
# Simple price impact measure
|
||||
price_impact = np.corrcoef(np.abs(price_changes), corresponding_volumes)[0, 1]
|
||||
if np.isnan(price_impact):
|
||||
price_impact = 0.0
|
||||
else:
|
||||
price_impact = 0.0
|
||||
else:
|
||||
price_impact = 0.0
|
||||
|
||||
features.append(price_impact)
|
||||
|
||||
# Bid-ask spread proxy (using price volatility)
|
||||
if len(prices) >= 5:
|
||||
recent_prices = prices[-5:]
|
||||
spread_proxy = (np.max(recent_prices) - np.min(recent_prices)) / np.mean(recent_prices)
|
||||
else:
|
||||
spread_proxy = 0.0
|
||||
|
||||
features.append(spread_proxy)
|
||||
|
||||
# Order flow imbalance (already calculated in volume features, but different perspective)
|
||||
buy_count = sum(1 for tick in ticks if tick.side == 'buy')
|
||||
sell_count = len(ticks) - buy_count
|
||||
total_trades = len(ticks)
|
||||
|
||||
if total_trades > 0:
|
||||
order_flow_imbalance = (buy_count - sell_count) / total_trades
|
||||
else:
|
||||
order_flow_imbalance = 0.0
|
||||
|
||||
features.append(order_flow_imbalance)
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
def _notify_feature_subscribers(self, symbol: str, features: ProcessedTickFeatures):
|
||||
"""Notify all feature subscribers of new processed features"""
|
||||
for callback in self.feature_subscribers:
|
||||
try:
|
||||
callback(symbol, features)
|
||||
except Exception as e:
|
||||
logger.error(f"Error notifying feature subscriber {callback.__name__}: {e}")
|
||||
|
||||
def get_latest_features(self, symbol: str) -> Optional[ProcessedTickFeatures]:
|
||||
"""Get the latest processed features for a symbol"""
|
||||
with self.data_lock:
|
||||
if symbol in self.processed_features and self.processed_features[symbol]:
|
||||
return self.processed_features[symbol][-1]
|
||||
return None
|
||||
|
||||
def get_processing_stats(self) -> Dict[str, Any]:
|
||||
"""Get processing performance statistics"""
|
||||
stats = {
|
||||
'symbols': self.symbols,
|
||||
'streaming': self.streaming,
|
||||
'tick_counts': dict(self.tick_counts),
|
||||
'buffer_sizes': {symbol: len(self.tick_buffers[symbol]) for symbol in self.symbols},
|
||||
'feature_counts': {symbol: len(self.processed_features[symbol]) for symbol in self.symbols},
|
||||
'subscribers': len(self.feature_subscribers)
|
||||
}
|
||||
|
||||
if self.processing_times:
|
||||
stats['processing_performance'] = {
|
||||
'avg_time_ms': np.mean(list(self.processing_times)),
|
||||
'min_time_ms': np.min(list(self.processing_times)),
|
||||
'max_time_ms': np.max(list(self.processing_times)),
|
||||
'std_time_ms': np.std(list(self.processing_times))
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def train_neural_network(self, training_data: List[Tuple[np.ndarray, np.ndarray]], epochs: int = 100):
|
||||
"""Train the tick processing neural network"""
|
||||
logger.info("Training tick processing neural network...")
|
||||
|
||||
self.tick_nn.train()
|
||||
optimizer = torch.optim.Adam(self.tick_nn.parameters(), lr=0.001)
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
for epoch in range(epochs):
|
||||
total_loss = 0.0
|
||||
|
||||
for batch_features, batch_targets in training_data:
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Convert to tensors
|
||||
features_tensor = torch.FloatTensor(batch_features).to(self.device)
|
||||
targets_tensor = torch.FloatTensor(batch_targets).to(self.device)
|
||||
|
||||
# Forward pass
|
||||
outputs, confidence = self.tick_nn(features_tensor)
|
||||
|
||||
# Calculate loss
|
||||
loss = criterion(outputs, targets_tensor)
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
total_loss += loss.item()
|
||||
|
||||
if epoch % 10 == 0:
|
||||
avg_loss = total_loss / len(training_data)
|
||||
logger.info(f"Epoch {epoch}/{epochs}, Average Loss: {avg_loss:.6f}")
|
||||
|
||||
self.tick_nn.eval()
|
||||
logger.info("Neural network training completed")
|
||||
|
||||
# Integration with existing orchestrator
|
||||
def integrate_with_orchestrator(orchestrator, tick_processor: RealTimeTickProcessor):
|
||||
"""Integrate tick processor with enhanced orchestrator"""
|
||||
|
||||
def feature_callback(symbol: str, features: ProcessedTickFeatures):
|
||||
"""Callback to feed processed features to orchestrator"""
|
||||
try:
|
||||
# Convert processed features to format expected by orchestrator
|
||||
feature_dict = {
|
||||
'symbol': symbol,
|
||||
'timestamp': features.timestamp,
|
||||
'neural_features': features.neural_features,
|
||||
'price_features': features.price_features,
|
||||
'volume_features': features.volume_features,
|
||||
'microstructure_features': features.microstructure_features,
|
||||
'confidence': features.confidence
|
||||
}
|
||||
|
||||
# Feed to orchestrator's real-time feature processing
|
||||
if hasattr(orchestrator, 'process_realtime_features'):
|
||||
orchestrator.process_realtime_features(feature_dict)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error integrating features with orchestrator: {e}")
|
||||
|
||||
# Add the callback to tick processor
|
||||
tick_processor.add_feature_subscriber(feature_callback)
|
||||
logger.info("Tick processor integrated with orchestrator")
|
||||
|
||||
# Factory function for easy creation
|
||||
def create_realtime_tick_processor(symbols: List[str] = None) -> RealTimeTickProcessor:
|
||||
"""Create and configure a real-time tick processor"""
|
||||
if symbols is None:
|
||||
symbols = ['ETH/USDT', 'BTC/USDT']
|
||||
|
||||
processor = RealTimeTickProcessor(symbols=symbols)
|
||||
logger.info(f"Created RealTimeTickProcessor for symbols: {symbols}")
|
||||
|
||||
return processor
|
411
core/universal_data_adapter.py
Normal file
411
core/universal_data_adapter.py
Normal file
@ -0,0 +1,411 @@
|
||||
"""
|
||||
Universal Data Adapter for Trading Models
|
||||
|
||||
This adapter ensures all models receive data in our universal format:
|
||||
- ETH/USDT: ticks (1s), 1m, 1h, 1d
|
||||
- BTC/USDT: ticks (1s) as reference
|
||||
|
||||
This is the standard input format that all models must respect.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .config import get_config
|
||||
from .data_provider import DataProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class UniversalDataStream:
|
||||
"""Universal data stream containing the 5 required timeseries"""
|
||||
eth_ticks: np.ndarray # ETH/USDT 1s/ticks data [timestamp, open, high, low, close, volume]
|
||||
eth_1m: np.ndarray # ETH/USDT 1m data
|
||||
eth_1h: np.ndarray # ETH/USDT 1h data
|
||||
eth_1d: np.ndarray # ETH/USDT 1d data
|
||||
btc_ticks: np.ndarray # BTC/USDT 1s/ticks reference data
|
||||
timestamp: datetime # Current timestamp
|
||||
metadata: Dict[str, Any] # Additional metadata
|
||||
|
||||
class UniversalDataAdapter:
|
||||
"""
|
||||
Adapter that converts any data source into our universal 5-timeseries format
|
||||
"""
|
||||
|
||||
def __init__(self, data_provider: DataProvider = None):
|
||||
"""Initialize the universal data adapter"""
|
||||
self.config = get_config()
|
||||
self.data_provider = data_provider or DataProvider()
|
||||
|
||||
# Universal format configuration
|
||||
self.required_symbols = ['ETH/USDT', 'BTC/USDT']
|
||||
self.required_timeframes = {
|
||||
'ETH/USDT': ['1s', '1m', '1h', '1d'], # Primary trading pair
|
||||
'BTC/USDT': ['1s'] # Reference pair
|
||||
}
|
||||
|
||||
# Data window sizes for each timeframe
|
||||
self.window_sizes = {
|
||||
'1s': 60, # Last 60 seconds of tick data
|
||||
'1m': 60, # Last 60 minutes
|
||||
'1h': 24, # Last 24 hours
|
||||
'1d': 30 # Last 30 days
|
||||
}
|
||||
|
||||
# Feature columns (OHLCV)
|
||||
self.feature_columns = ['open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
logger.info("Universal Data Adapter initialized")
|
||||
logger.info(f"Required symbols: {self.required_symbols}")
|
||||
logger.info(f"Required timeframes: {self.required_timeframes}")
|
||||
|
||||
def get_universal_data_stream(self, current_time: datetime = None) -> Optional[UniversalDataStream]:
|
||||
"""
|
||||
Get data in universal format for all models
|
||||
|
||||
Returns:
|
||||
UniversalDataStream with the 5 required timeseries
|
||||
"""
|
||||
try:
|
||||
current_time = current_time or datetime.now()
|
||||
|
||||
# Get ETH/USDT data for all required timeframes
|
||||
eth_data = {}
|
||||
for timeframe in self.required_timeframes['ETH/USDT']:
|
||||
data = self._get_timeframe_data('ETH/USDT', timeframe)
|
||||
if data is not None:
|
||||
eth_data[timeframe] = data
|
||||
else:
|
||||
logger.warning(f"Failed to get ETH/USDT {timeframe} data")
|
||||
return None
|
||||
|
||||
# Get BTC/USDT reference data
|
||||
btc_data = self._get_timeframe_data('BTC/USDT', '1s')
|
||||
if btc_data is None:
|
||||
logger.warning("Failed to get BTC/USDT reference data")
|
||||
return None
|
||||
|
||||
# Create universal data stream
|
||||
stream = UniversalDataStream(
|
||||
eth_ticks=eth_data['1s'],
|
||||
eth_1m=eth_data['1m'],
|
||||
eth_1h=eth_data['1h'],
|
||||
eth_1d=eth_data['1d'],
|
||||
btc_ticks=btc_data,
|
||||
timestamp=current_time,
|
||||
metadata={
|
||||
'data_quality': self._assess_data_quality(eth_data, btc_data),
|
||||
'market_hours': self._is_market_hours(current_time),
|
||||
'data_freshness': self._calculate_data_freshness(eth_data, btc_data, current_time)
|
||||
}
|
||||
)
|
||||
|
||||
logger.debug(f"Universal data stream created with {len(stream.eth_ticks)} ETH ticks, "
|
||||
f"{len(stream.eth_1m)} ETH 1m candles, {len(stream.btc_ticks)} BTC ticks")
|
||||
|
||||
return stream
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating universal data stream: {e}")
|
||||
return None
|
||||
|
||||
def _get_timeframe_data(self, symbol: str, timeframe: str) -> Optional[np.ndarray]:
|
||||
"""Get data for a specific symbol and timeframe"""
|
||||
try:
|
||||
window_size = self.window_sizes.get(timeframe, 60)
|
||||
|
||||
# Get historical data from data provider
|
||||
df = self.data_provider.get_historical_data(
|
||||
symbol=symbol,
|
||||
timeframe=timeframe,
|
||||
limit=window_size
|
||||
)
|
||||
|
||||
if df is None or df.empty:
|
||||
logger.warning(f"No data returned for {symbol} {timeframe}")
|
||||
return None
|
||||
|
||||
# Ensure we have the required columns
|
||||
missing_cols = [col for col in self.feature_columns if col not in df.columns]
|
||||
if missing_cols:
|
||||
logger.warning(f"Missing columns for {symbol} {timeframe}: {missing_cols}")
|
||||
return None
|
||||
|
||||
# Convert to numpy array with timestamp
|
||||
data_array = df[self.feature_columns].values.astype(np.float32)
|
||||
|
||||
# Add timestamp column if available
|
||||
if 'timestamp' in df.columns:
|
||||
timestamps = pd.to_datetime(df['timestamp']).astype(np.int64) // 10**9 # Unix timestamp
|
||||
data_with_time = np.column_stack([timestamps, data_array])
|
||||
else:
|
||||
# Generate timestamps if not available
|
||||
end_time = datetime.now()
|
||||
if timeframe == '1s':
|
||||
timestamps = [(end_time - timedelta(seconds=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
|
||||
elif timeframe == '1m':
|
||||
timestamps = [(end_time - timedelta(minutes=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
|
||||
elif timeframe == '1h':
|
||||
timestamps = [(end_time - timedelta(hours=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
|
||||
elif timeframe == '1d':
|
||||
timestamps = [(end_time - timedelta(days=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
|
||||
else:
|
||||
timestamps = [end_time.timestamp()] * len(data_array)
|
||||
|
||||
data_with_time = np.column_stack([timestamps, data_array])
|
||||
|
||||
return data_with_time
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting {symbol} {timeframe} data: {e}")
|
||||
return None
|
||||
|
||||
def _assess_data_quality(self, eth_data: Dict[str, np.ndarray], btc_data: np.ndarray) -> Dict[str, Any]:
|
||||
"""Assess the quality of the data streams"""
|
||||
quality = {
|
||||
'overall_score': 1.0,
|
||||
'issues': []
|
||||
}
|
||||
|
||||
try:
|
||||
# Check ETH data completeness
|
||||
for timeframe, data in eth_data.items():
|
||||
expected_size = self.window_sizes.get(timeframe, 60)
|
||||
actual_size = len(data) if data is not None else 0
|
||||
|
||||
if actual_size < expected_size * 0.8: # Less than 80% of expected data
|
||||
quality['issues'].append(f"ETH {timeframe} data incomplete: {actual_size}/{expected_size}")
|
||||
quality['overall_score'] *= 0.9
|
||||
|
||||
# Check BTC reference data
|
||||
btc_expected = self.window_sizes.get('1s', 60)
|
||||
btc_actual = len(btc_data) if btc_data is not None else 0
|
||||
|
||||
if btc_actual < btc_expected * 0.8:
|
||||
quality['issues'].append(f"BTC reference data incomplete: {btc_actual}/{btc_expected}")
|
||||
quality['overall_score'] *= 0.9
|
||||
|
||||
# Check for data gaps or anomalies
|
||||
for timeframe, data in eth_data.items():
|
||||
if data is not None and len(data) > 1:
|
||||
# Check for price anomalies (sudden jumps > 10%)
|
||||
prices = data[:, 4] # Close prices
|
||||
price_changes = np.abs(np.diff(prices) / prices[:-1])
|
||||
if np.any(price_changes > 0.1):
|
||||
quality['issues'].append(f"ETH {timeframe} has price anomalies")
|
||||
quality['overall_score'] *= 0.95
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assessing data quality: {e}")
|
||||
quality['issues'].append(f"Quality assessment error: {e}")
|
||||
quality['overall_score'] *= 0.8
|
||||
|
||||
return quality
|
||||
|
||||
def _is_market_hours(self, timestamp: datetime) -> bool:
|
||||
"""Check if it's market hours (crypto markets are 24/7)"""
|
||||
return True # Crypto markets are always open
|
||||
|
||||
def _calculate_data_freshness(self, eth_data: Dict[str, np.ndarray], btc_data: np.ndarray,
|
||||
current_time: datetime) -> Dict[str, float]:
|
||||
"""Calculate how fresh the data is"""
|
||||
freshness = {}
|
||||
|
||||
try:
|
||||
current_timestamp = current_time.timestamp()
|
||||
|
||||
# Check ETH data freshness
|
||||
for timeframe, data in eth_data.items():
|
||||
if data is not None and len(data) > 0:
|
||||
latest_timestamp = data[-1, 0] # First column is timestamp
|
||||
age_seconds = current_timestamp - latest_timestamp
|
||||
|
||||
# Convert to appropriate units
|
||||
if timeframe == '1s':
|
||||
freshness[f'eth_{timeframe}'] = age_seconds # Seconds
|
||||
elif timeframe == '1m':
|
||||
freshness[f'eth_{timeframe}'] = age_seconds / 60 # Minutes
|
||||
elif timeframe == '1h':
|
||||
freshness[f'eth_{timeframe}'] = age_seconds / 3600 # Hours
|
||||
elif timeframe == '1d':
|
||||
freshness[f'eth_{timeframe}'] = age_seconds / 86400 # Days
|
||||
else:
|
||||
freshness[f'eth_{timeframe}'] = float('inf')
|
||||
|
||||
# Check BTC data freshness
|
||||
if btc_data is not None and len(btc_data) > 0:
|
||||
btc_latest = btc_data[-1, 0]
|
||||
btc_age = current_timestamp - btc_latest
|
||||
freshness['btc_1s'] = btc_age # Seconds
|
||||
else:
|
||||
freshness['btc_1s'] = float('inf')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating data freshness: {e}")
|
||||
freshness['error'] = str(e)
|
||||
|
||||
return freshness
|
||||
|
||||
def format_for_model(self, stream: UniversalDataStream, model_type: str = 'cnn') -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Format universal data stream for specific model types
|
||||
|
||||
Args:
|
||||
stream: Universal data stream
|
||||
model_type: Type of model ('cnn', 'rl', 'transformer', etc.)
|
||||
|
||||
Returns:
|
||||
Dictionary with formatted data for the model
|
||||
"""
|
||||
try:
|
||||
if model_type.lower() == 'cnn':
|
||||
return self._format_for_cnn(stream)
|
||||
elif model_type.lower() == 'rl':
|
||||
return self._format_for_rl(stream)
|
||||
elif model_type.lower() == 'transformer':
|
||||
return self._format_for_transformer(stream)
|
||||
else:
|
||||
# Default format - return raw arrays
|
||||
return {
|
||||
'eth_ticks': stream.eth_ticks,
|
||||
'eth_1m': stream.eth_1m,
|
||||
'eth_1h': stream.eth_1h,
|
||||
'eth_1d': stream.eth_1d,
|
||||
'btc_ticks': stream.btc_ticks,
|
||||
'metadata': stream.metadata
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error formatting data for {model_type}: {e}")
|
||||
return {}
|
||||
|
||||
def _format_for_cnn(self, stream: UniversalDataStream) -> Dict[str, np.ndarray]:
|
||||
"""Format data for CNN models"""
|
||||
# CNN expects [batch, sequence, features] format
|
||||
formatted = {}
|
||||
|
||||
# Remove timestamp column and keep only OHLCV
|
||||
formatted['eth_ticks'] = stream.eth_ticks[:, 1:] if stream.eth_ticks.shape[1] > 5 else stream.eth_ticks
|
||||
formatted['eth_1m'] = stream.eth_1m[:, 1:] if stream.eth_1m.shape[1] > 5 else stream.eth_1m
|
||||
formatted['eth_1h'] = stream.eth_1h[:, 1:] if stream.eth_1h.shape[1] > 5 else stream.eth_1h
|
||||
formatted['eth_1d'] = stream.eth_1d[:, 1:] if stream.eth_1d.shape[1] > 5 else stream.eth_1d
|
||||
formatted['btc_ticks'] = stream.btc_ticks[:, 1:] if stream.btc_ticks.shape[1] > 5 else stream.btc_ticks
|
||||
|
||||
return formatted
|
||||
|
||||
def _format_for_rl(self, stream: UniversalDataStream) -> Dict[str, np.ndarray]:
|
||||
"""Format data for RL models"""
|
||||
# RL typically expects flattened state vector
|
||||
state_components = []
|
||||
|
||||
# Add latest values from each timeframe
|
||||
if len(stream.eth_ticks) > 0:
|
||||
state_components.extend(stream.eth_ticks[-1, 1:]) # Latest ETH tick (OHLCV)
|
||||
|
||||
if len(stream.eth_1m) > 0:
|
||||
state_components.extend(stream.eth_1m[-1, 1:]) # Latest ETH 1m (OHLCV)
|
||||
|
||||
if len(stream.eth_1h) > 0:
|
||||
state_components.extend(stream.eth_1h[-1, 1:]) # Latest ETH 1h (OHLCV)
|
||||
|
||||
if len(stream.eth_1d) > 0:
|
||||
state_components.extend(stream.eth_1d[-1, 1:]) # Latest ETH 1d (OHLCV)
|
||||
|
||||
if len(stream.btc_ticks) > 0:
|
||||
state_components.extend(stream.btc_ticks[-1, 1:]) # Latest BTC tick (OHLCV)
|
||||
|
||||
# Add some derived features
|
||||
if len(stream.eth_ticks) > 1:
|
||||
# Price momentum
|
||||
eth_momentum = (stream.eth_ticks[-1, 4] - stream.eth_ticks[-2, 4]) / stream.eth_ticks[-2, 4]
|
||||
state_components.append(eth_momentum)
|
||||
|
||||
if len(stream.btc_ticks) > 1:
|
||||
# BTC momentum for correlation
|
||||
btc_momentum = (stream.btc_ticks[-1, 4] - stream.btc_ticks[-2, 4]) / stream.btc_ticks[-2, 4]
|
||||
state_components.append(btc_momentum)
|
||||
|
||||
return {'state_vector': np.array(state_components, dtype=np.float32)}
|
||||
|
||||
def _format_for_transformer(self, stream: UniversalDataStream) -> Dict[str, np.ndarray]:
|
||||
"""Format data for Transformer models"""
|
||||
# Transformers expect sequence data with attention
|
||||
formatted = {}
|
||||
|
||||
# Keep timestamp for positional encoding
|
||||
formatted['eth_ticks'] = stream.eth_ticks
|
||||
formatted['eth_1m'] = stream.eth_1m
|
||||
formatted['eth_1h'] = stream.eth_1h
|
||||
formatted['eth_1d'] = stream.eth_1d
|
||||
formatted['btc_ticks'] = stream.btc_ticks
|
||||
|
||||
# Add sequence length information
|
||||
formatted['sequence_lengths'] = {
|
||||
'eth_ticks': len(stream.eth_ticks),
|
||||
'eth_1m': len(stream.eth_1m),
|
||||
'eth_1h': len(stream.eth_1h),
|
||||
'eth_1d': len(stream.eth_1d),
|
||||
'btc_ticks': len(stream.btc_ticks)
|
||||
}
|
||||
|
||||
return formatted
|
||||
|
||||
def validate_universal_format(self, stream: UniversalDataStream) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Validate that the data stream conforms to our universal format
|
||||
|
||||
Returns:
|
||||
(is_valid, list_of_issues)
|
||||
"""
|
||||
issues = []
|
||||
|
||||
try:
|
||||
# Check that all required arrays are present and not None
|
||||
required_arrays = ['eth_ticks', 'eth_1m', 'eth_1h', 'eth_1d', 'btc_ticks']
|
||||
for array_name in required_arrays:
|
||||
array = getattr(stream, array_name)
|
||||
if array is None:
|
||||
issues.append(f"{array_name} is None")
|
||||
elif len(array) == 0:
|
||||
issues.append(f"{array_name} is empty")
|
||||
elif array.shape[1] < 5: # Should have at least OHLCV
|
||||
issues.append(f"{array_name} has insufficient columns: {array.shape[1]} < 5")
|
||||
|
||||
# Check timestamp
|
||||
if stream.timestamp is None:
|
||||
issues.append("timestamp is None")
|
||||
|
||||
# Check data consistency (more tolerant for cached data)
|
||||
if stream.eth_ticks is not None and len(stream.eth_ticks) > 0:
|
||||
if stream.btc_ticks is not None and len(stream.btc_ticks) > 0:
|
||||
# Check if timestamps are roughly aligned (more tolerant for cached data)
|
||||
eth_latest = stream.eth_ticks[-1, 0] if stream.eth_ticks.shape[1] > 5 else 0
|
||||
btc_latest = stream.btc_ticks[-1, 0] if stream.btc_ticks.shape[1] > 5 else 0
|
||||
|
||||
# Be more tolerant - allow up to 1 hour difference for cached data
|
||||
max_time_diff = 3600 # 1 hour instead of 5 minutes
|
||||
time_diff = abs(eth_latest - btc_latest)
|
||||
|
||||
if time_diff > max_time_diff:
|
||||
# This is a warning, not a failure for cached data
|
||||
issues.append(f"ETH and BTC timestamps far apart: {time_diff} seconds (using cached data)")
|
||||
logger.warning(f"Timestamp difference detected: {time_diff} seconds - this is normal for cached data")
|
||||
|
||||
# Check data quality from metadata
|
||||
if 'data_quality' in stream.metadata:
|
||||
quality_score = stream.metadata['data_quality'].get('overall_score', 0)
|
||||
if quality_score < 0.5: # Very low quality
|
||||
issues.append(f"Data quality too low: {quality_score:.2f}")
|
||||
|
||||
except Exception as e:
|
||||
issues.append(f"Validation error: {e}")
|
||||
|
||||
# For cached data, we're more lenient - only fail on critical issues
|
||||
critical_issues = [issue for issue in issues if not ('timestamps far apart' in issue and 'cached data' in issue)]
|
||||
is_valid = len(critical_issues) == 0
|
||||
|
||||
return is_valid, issues
|
Reference in New Issue
Block a user