scalping dash also works initially

This commit is contained in:
Dobromir Popov
2025-05-26 16:02:40 +03:00
parent 39942386b1
commit c97177aa88
39 changed files with 7272 additions and 1076 deletions

View File

@ -7,6 +7,7 @@ This enhanced orchestrator implements:
3. Multi-symbol (ETH, BTC) coordinated decision making
4. Perfect move marking for CNN backpropagation training
5. Market environment adaptation through RL evaluation
6. Universal data format compliance (5 timeseries streams)
"""
import asyncio
@ -22,6 +23,8 @@ import torch
from .config import get_config
from .data_provider import DataProvider
from .universal_data_adapter import UniversalDataAdapter, UniversalDataStream
from .realtime_tick_processor import RealTimeTickProcessor, ProcessedTickFeatures, integrate_with_orchestrator
from models import get_model_registry, ModelInterface, CNNModelInterface, RLAgentInterface
logger = logging.getLogger(__name__)
@ -70,6 +73,7 @@ class MarketState:
volume: float
trend_strength: float
market_regime: str # 'trending', 'ranging', 'volatile'
universal_data: UniversalDataStream # Universal format data
@dataclass
class PerfectMove:
@ -86,6 +90,7 @@ class PerfectMove:
class EnhancedTradingOrchestrator:
"""
Enhanced orchestrator with sophisticated multi-modal decision making
and universal data format compliance
"""
def __init__(self, data_provider: DataProvider = None):
@ -94,6 +99,15 @@ class EnhancedTradingOrchestrator:
self.data_provider = data_provider or DataProvider()
self.model_registry = get_model_registry()
# Initialize universal data adapter
self.universal_adapter = UniversalDataAdapter(self.data_provider)
# Initialize real-time tick processor for ultra-low latency processing
self.tick_processor = RealTimeTickProcessor(symbols=self.config.symbols)
# Real-time tick features storage
self.realtime_tick_features = {symbol: deque(maxlen=100) for symbol in self.config.symbols}
# Multi-symbol configuration
self.symbols = self.config.symbols
self.timeframes = self.config.timeframes
@ -123,22 +137,28 @@ class EnhancedTradingOrchestrator:
self.decision_callbacks = []
self.learning_callbacks = []
logger.info("Enhanced TradingOrchestrator initialized")
# Integrate tick processor with orchestrator
integrate_with_orchestrator(self, self.tick_processor)
logger.info("Enhanced TradingOrchestrator initialized with Universal Data Format")
logger.info(f"Symbols: {self.symbols}")
logger.info(f"Timeframes: {self.timeframes}")
logger.info(f"Universal format: ETH ticks, 1m, 1h, 1d + BTC reference ticks")
logger.info(f"Enhanced confidence threshold: {self.confidence_threshold}")
logger.info("Real-time tick processor integrated for ultra-low latency processing")
def _initialize_timeframe_weights(self) -> Dict[str, float]:
"""Initialize weights for different timeframes"""
# Higher timeframes get more weight for trend direction
# Lower timeframes get more weight for entry/exit timing
base_weights = {
'1m': 0.05, # Noise filtering
'1s': 0.60, # Primary scalping signal (ticks)
'1m': 0.20, # Short-term confirmation
'5m': 0.10, # Short-term momentum
'15m': 0.15, # Entry/exit timing
'1h': 0.25, # Medium-term trend
'1h': 0.15, # Medium-term trend
'4h': 0.25, # Stronger trend confirmation
'1d': 0.20 # Long-term direction
'1d': 0.05 # Long-term direction (minimal for scalping)
}
# Normalize weights for configured timeframes
@ -163,19 +183,42 @@ class EnhancedTradingOrchestrator:
async def make_coordinated_decisions(self) -> Dict[str, Optional[TradingAction]]:
"""
Make coordinated trading decisions across all symbols
Make coordinated trading decisions across all symbols using universal data format
"""
decisions = {}
try:
# Get market states for all symbols
market_states = await self._get_all_market_states()
# Get universal data stream (5 timeseries)
universal_stream = self.universal_adapter.get_universal_data_stream()
if universal_stream is None:
logger.warning("Failed to get universal data stream")
return decisions
# Validate universal format
is_valid, issues = self.universal_adapter.validate_universal_format(universal_stream)
if not is_valid:
logger.warning(f"Universal data format validation failed: {issues}")
return decisions
logger.info("UNIVERSAL DATA STREAM ACTIVE:")
logger.info(f" ETH ticks: {len(universal_stream.eth_ticks)} samples")
logger.info(f" ETH 1m: {len(universal_stream.eth_1m)} candles")
logger.info(f" ETH 1h: {len(universal_stream.eth_1h)} candles")
logger.info(f" ETH 1d: {len(universal_stream.eth_1d)} candles")
logger.info(f" BTC reference: {len(universal_stream.btc_ticks)} samples")
logger.info(f" Data quality: {universal_stream.metadata['data_quality']['overall_score']:.2f}")
# Get market states for all symbols using universal data
market_states = await self._get_all_market_states_universal(universal_stream)
# Get enhanced predictions for all symbols
symbol_predictions = {}
for symbol in self.symbols:
if symbol in market_states:
predictions = await self._get_enhanced_predictions(symbol, market_states[symbol])
predictions = await self._get_enhanced_predictions_universal(
symbol, market_states[symbol], universal_stream
)
symbol_predictions[symbol] = predictions
# Coordinate decisions considering symbol correlations
@ -198,76 +241,125 @@ class EnhancedTradingOrchestrator:
return decisions
async def _get_all_market_states(self) -> Dict[str, MarketState]:
"""Get current market state for all symbols"""
async def _get_all_market_states_universal(self, universal_stream: UniversalDataStream) -> Dict[str, MarketState]:
"""Get current market state for all symbols using universal data format"""
market_states = {}
for symbol in self.symbols:
try:
# Get current market data for all timeframes
prices = {}
features = {}
try:
# Create market state for ETH/USDT (primary trading pair)
if 'ETH/USDT' in self.symbols:
eth_prices = {}
eth_features = {}
for timeframe in self.timeframes:
# Get current price
current_price = self.data_provider.get_current_price(symbol)
if current_price:
prices[timeframe] = current_price
# Get feature matrix for this timeframe
feature_matrix = self.data_provider.get_feature_matrix(
symbol=symbol,
timeframes=[timeframe],
window_size=20 # Standard window
)
if feature_matrix is not None:
features[timeframe] = feature_matrix
# Extract prices from universal stream
if len(universal_stream.eth_ticks) > 0:
eth_prices['1s'] = float(universal_stream.eth_ticks[-1, 4]) # Close price from ticks
if len(universal_stream.eth_1m) > 0:
eth_prices['1m'] = float(universal_stream.eth_1m[-1, 4]) # Close price from 1m
if len(universal_stream.eth_1h) > 0:
eth_prices['1h'] = float(universal_stream.eth_1h[-1, 4]) # Close price from 1h
if len(universal_stream.eth_1d) > 0:
eth_prices['1d'] = float(universal_stream.eth_1d[-1, 4]) # Close price from 1d
if prices and features:
# Calculate market metrics
volatility = self._calculate_volatility(symbol)
volume = self._get_current_volume(symbol)
trend_strength = self._calculate_trend_strength(symbol)
market_regime = self._determine_market_regime(symbol)
market_state = MarketState(
symbol=symbol,
timestamp=datetime.now(),
prices=prices,
features=features,
volatility=volatility,
volume=volume,
trend_strength=trend_strength,
market_regime=market_regime
)
market_states[symbol] = market_state
# Store for historical tracking
self.market_states[symbol].append(market_state)
except Exception as e:
logger.error(f"Error getting market state for {symbol}: {e}")
# Extract features from universal stream (OHLCV data)
eth_features['1s'] = universal_stream.eth_ticks[:, 1:] if universal_stream.eth_ticks.shape[1] > 5 else universal_stream.eth_ticks
eth_features['1m'] = universal_stream.eth_1m[:, 1:] if universal_stream.eth_1m.shape[1] > 5 else universal_stream.eth_1m
eth_features['1h'] = universal_stream.eth_1h[:, 1:] if universal_stream.eth_1h.shape[1] > 5 else universal_stream.eth_1h
eth_features['1d'] = universal_stream.eth_1d[:, 1:] if universal_stream.eth_1d.shape[1] > 5 else universal_stream.eth_1d
# Calculate market metrics
volatility = self._calculate_volatility_from_universal('ETH/USDT', universal_stream)
volume = self._get_current_volume_from_universal('ETH/USDT', universal_stream)
trend_strength = self._calculate_trend_strength_from_universal('ETH/USDT', universal_stream)
market_regime = self._determine_market_regime_from_universal('ETH/USDT', universal_stream)
eth_market_state = MarketState(
symbol='ETH/USDT',
timestamp=universal_stream.timestamp,
prices=eth_prices,
features=eth_features,
volatility=volatility,
volume=volume,
trend_strength=trend_strength,
market_regime=market_regime,
universal_data=universal_stream
)
market_states['ETH/USDT'] = eth_market_state
self.market_states['ETH/USDT'].append(eth_market_state)
# Create market state for BTC/USDT (reference pair)
if 'BTC/USDT' in self.symbols:
btc_prices = {}
btc_features = {}
# Extract BTC reference data
if len(universal_stream.btc_ticks) > 0:
btc_prices['1s'] = float(universal_stream.btc_ticks[-1, 4]) # Close price from BTC ticks
btc_features['1s'] = universal_stream.btc_ticks[:, 1:] if universal_stream.btc_ticks.shape[1] > 5 else universal_stream.btc_ticks
# Calculate BTC metrics
btc_volatility = self._calculate_volatility_from_universal('BTC/USDT', universal_stream)
btc_volume = self._get_current_volume_from_universal('BTC/USDT', universal_stream)
btc_trend_strength = self._calculate_trend_strength_from_universal('BTC/USDT', universal_stream)
btc_market_regime = self._determine_market_regime_from_universal('BTC/USDT', universal_stream)
btc_market_state = MarketState(
symbol='BTC/USDT',
timestamp=universal_stream.timestamp,
prices=btc_prices,
features=btc_features,
volatility=btc_volatility,
volume=btc_volume,
trend_strength=btc_trend_strength,
market_regime=btc_market_regime,
universal_data=universal_stream
)
market_states['BTC/USDT'] = btc_market_state
self.market_states['BTC/USDT'].append(btc_market_state)
except Exception as e:
logger.error(f"Error creating market states from universal data: {e}")
return market_states
async def _get_enhanced_predictions(self, symbol: str, market_state: MarketState) -> List[EnhancedPrediction]:
"""Get enhanced predictions with timeframe breakdown"""
async def _get_enhanced_predictions_universal(self, symbol: str, market_state: MarketState,
universal_stream: UniversalDataStream) -> List[EnhancedPrediction]:
"""Get enhanced predictions using universal data format"""
predictions = []
for model_name, model in self.model_registry.models.items():
try:
if isinstance(model, CNNModelInterface):
# Get CNN predictions for each timeframe
# Format universal data for CNN model
cnn_data = self.universal_adapter.format_for_model(universal_stream, 'cnn')
# Get CNN predictions for each timeframe using universal data
timeframe_predictions = []
for timeframe in self.timeframes:
if timeframe in market_state.features:
feature_matrix = market_state.features[timeframe]
# Get timeframe-specific prediction
action_probs, confidence = await self._get_timeframe_prediction(
model, feature_matrix, timeframe, market_state
# ETH timeframes (primary trading pair)
if symbol == 'ETH/USDT':
timeframe_data_map = {
'1s': cnn_data.get('eth_ticks'),
'1m': cnn_data.get('eth_1m'),
'1h': cnn_data.get('eth_1h'),
'1d': cnn_data.get('eth_1d')
}
# BTC reference
elif symbol == 'BTC/USDT':
timeframe_data_map = {
'1s': cnn_data.get('btc_ticks')
}
else:
continue
for timeframe, feature_matrix in timeframe_data_map.items():
if feature_matrix is not None and len(feature_matrix) > 0:
# Get timeframe-specific prediction using universal data
action_probs, confidence = await self._get_timeframe_prediction_universal(
model, feature_matrix, timeframe, market_state, universal_stream
)
if action_probs is not None:
@ -285,7 +377,8 @@ class EnhancedTradingOrchestrator:
market_features={
'volatility': market_state.volatility,
'volume': market_state.volume,
'trend_strength': market_state.trend_strength
'trend_strength': market_state.trend_strength,
'data_quality': universal_stream.metadata['data_quality']['overall_score']
}
)
timeframe_predictions.append(tf_prediction)
@ -305,7 +398,9 @@ class EnhancedTradingOrchestrator:
timestamp=datetime.now(),
metadata={
'market_regime': market_state.market_regime,
'symbol_correlation': self._get_symbol_correlation(symbol)
'symbol_correlation': self._get_symbol_correlation(symbol),
'universal_data_quality': universal_stream.metadata['data_quality'],
'data_freshness': universal_stream.metadata['data_freshness']
}
)
predictions.append(enhanced_pred)
@ -315,9 +410,10 @@ class EnhancedTradingOrchestrator:
return predictions
async def _get_timeframe_prediction(self, model: CNNModelInterface, feature_matrix: np.ndarray,
timeframe: str, market_state: MarketState) -> Tuple[Optional[np.ndarray], float]:
"""Get prediction for specific timeframe with enhanced context"""
async def _get_timeframe_prediction_universal(self, model: CNNModelInterface, feature_matrix: np.ndarray,
timeframe: str, market_state: MarketState,
universal_stream: UniversalDataStream) -> Tuple[Optional[np.ndarray], float]:
"""Get prediction for specific timeframe using universal data format"""
try:
# Check if model supports timeframe-specific prediction
if hasattr(model, 'predict_timeframe'):
@ -326,9 +422,9 @@ class EnhancedTradingOrchestrator:
action_probs, confidence = model.predict(feature_matrix)
if action_probs is not None and confidence is not None:
# Enhance confidence based on market conditions
enhanced_confidence = self._enhance_confidence_with_context(
confidence, timeframe, market_state
# Enhance confidence based on universal data quality and market conditions
enhanced_confidence = self._enhance_confidence_with_universal_context(
confidence, timeframe, market_state, universal_stream
)
return action_probs, enhanced_confidence
@ -337,20 +433,39 @@ class EnhancedTradingOrchestrator:
return None, 0.0
def _enhance_confidence_with_context(self, base_confidence: float, timeframe: str,
market_state: MarketState) -> float:
"""Enhance confidence score based on market context"""
def _enhance_confidence_with_universal_context(self, base_confidence: float, timeframe: str,
market_state: MarketState,
universal_stream: UniversalDataStream) -> float:
"""Enhance confidence score based on universal data context"""
enhanced = base_confidence
# Adjust based on data quality from universal stream
data_quality = universal_stream.metadata['data_quality']['overall_score']
enhanced *= data_quality
# Adjust based on data freshness
freshness = universal_stream.metadata.get('data_freshness', {})
if timeframe in ['1s', '1m']:
# For short timeframes, penalize stale data more heavily
eth_freshness = freshness.get(f'eth_{timeframe}', 0)
if eth_freshness > 60: # More than 1 minute old
enhanced *= 0.8
# Adjust based on market regime
if market_state.market_regime == 'trending':
enhanced *= 1.1 # More confident in trending markets
elif market_state.market_regime == 'volatile':
enhanced *= 0.8 # Less confident in volatile markets
# Adjust based on timeframe reliability
# Adjust based on timeframe reliability for scalping
timeframe_reliability = {
'1m': 0.7, '5m': 0.8, '15m': 0.9, '1h': 1.0, '4h': 1.1, '1d': 1.2
'1s': 1.0, # Primary scalping timeframe
'1m': 0.9, # Short-term confirmation
'5m': 0.8, # Short-term momentum
'15m': 0.9, # Entry/exit timing
'1h': 0.8, # Medium-term trend
'4h': 0.7, # Longer-term (less relevant for scalping)
'1d': 0.6 # Long-term direction (minimal for scalping)
}
enhanced *= timeframe_reliability.get(timeframe, 1.0)
@ -360,6 +475,18 @@ class EnhancedTradingOrchestrator:
elif market_state.volume < 0.5: # Low volume
enhanced *= 0.9
# Adjust based on correlation with BTC (for ETH trades)
if market_state.symbol == 'ETH/USDT' and len(universal_stream.btc_ticks) > 1:
# Check ETH-BTC correlation strength
eth_momentum = (universal_stream.eth_ticks[-1, 4] - universal_stream.eth_ticks[-2, 4]) / universal_stream.eth_ticks[-2, 4]
btc_momentum = (universal_stream.btc_ticks[-1, 4] - universal_stream.btc_ticks[-2, 4]) / universal_stream.btc_ticks[-2, 4]
# If ETH and BTC are moving in same direction, increase confidence
if (eth_momentum > 0 and btc_momentum > 0) or (eth_momentum < 0 and btc_momentum < 0):
enhanced *= 1.05
else:
enhanced *= 0.95
return min(enhanced, 1.0) # Cap at 1.0
def _combine_timeframe_predictions(self, timeframe_predictions: List[TimeframePrediction],
@ -524,7 +651,7 @@ class EnhancedTradingOrchestrator:
initial_state = evaluation_item['market_state_before']
# Get current market state for comparison
current_market_states = await self._get_all_market_states()
current_market_states = await self._get_all_market_states_universal(self.universal_adapter.get_universal_data_stream())
current_state = current_market_states.get(action.symbol)
if current_state:
@ -625,38 +752,165 @@ class EnhancedTradingOrchestrator:
except Exception as e:
logger.error(f"Error marking perfect move: {e}")
def get_recent_perfect_moves(self, limit: int = 10) -> List[PerfectMove]:
"""Get recent perfect moves for display/monitoring"""
return list(self.perfect_moves)[-limit:]
async def queue_action_for_evaluation(self, action: TradingAction):
"""Queue a trading action for future RL evaluation"""
try:
# Get current market state
market_states = await self._get_all_market_states_universal(self.universal_adapter.get_universal_data_stream())
if action.symbol in market_states:
evaluation_item = {
'action': action,
'market_state_before': market_states[action.symbol],
'timestamp': datetime.now()
}
self.rl_evaluation_queue.append(evaluation_item)
logger.debug(f"Queued action for RL evaluation: {action.action} {action.symbol}")
except Exception as e:
logger.error(f"Error queuing action for evaluation: {e}")
def get_perfect_moves_for_training(self, symbol: str = None, timeframe: str = None,
limit: int = 1000) -> List[PerfectMove]:
"""Get perfect moves for CNN training"""
moves = list(self.perfect_moves)
# Filter by symbol if specified
if symbol:
moves = [m for m in moves if m.symbol == symbol]
moves = [move for move in moves if move.symbol == symbol]
# Filter by timeframe if specified
if timeframe:
moves = [m for m in moves if m.timeframe == timeframe]
moves = [move for move in moves if move.timeframe == timeframe]
return moves[-limit:] if limit else moves
return moves[-limit:] # Return most recent moves
# Helper methods for market analysis
# Helper methods for market analysis using universal data
def _calculate_volatility_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> float:
"""Calculate current volatility for symbol using universal data"""
try:
if symbol == 'ETH/USDT' and len(universal_stream.eth_ticks) > 10:
# Calculate volatility from tick data
prices = universal_stream.eth_ticks[-10:, 4] # Last 10 close prices
returns = np.diff(prices) / prices[:-1]
volatility = np.std(returns) * np.sqrt(86400) # Annualized volatility
return float(volatility)
elif symbol == 'BTC/USDT' and len(universal_stream.btc_ticks) > 10:
# Calculate volatility from BTC tick data
prices = universal_stream.btc_ticks[-10:, 4] # Last 10 close prices
returns = np.diff(prices) / prices[:-1]
volatility = np.std(returns) * np.sqrt(86400) # Annualized volatility
return float(volatility)
except Exception as e:
logger.error(f"Error calculating volatility from universal data: {e}")
return 0.02 # Default 2% volatility
def _get_current_volume_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> float:
"""Get current volume ratio compared to average using universal data"""
try:
if symbol == 'ETH/USDT':
# Use 1m data for volume analysis
if len(universal_stream.eth_1m) > 10:
volumes = universal_stream.eth_1m[-10:, 5] # Last 10 volume values
current_volume = universal_stream.eth_1m[-1, 5]
avg_volume = np.mean(volumes[:-1])
if avg_volume > 0:
return float(current_volume / avg_volume)
elif symbol == 'BTC/USDT':
# Use BTC tick data for volume analysis
if len(universal_stream.btc_ticks) > 10:
volumes = universal_stream.btc_ticks[-10:, 5] # Last 10 volume values
current_volume = universal_stream.btc_ticks[-1, 5]
avg_volume = np.mean(volumes[:-1])
if avg_volume > 0:
return float(current_volume / avg_volume)
except Exception as e:
logger.error(f"Error calculating volume from universal data: {e}")
return 1.0 # Normal volume
def _calculate_trend_strength_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> float:
"""Calculate trend strength using universal data"""
try:
if symbol == 'ETH/USDT':
# Use multiple timeframes to determine trend strength
trend_scores = []
# Check 1m trend
if len(universal_stream.eth_1m) > 20:
prices = universal_stream.eth_1m[-20:, 4] # Last 20 close prices
slope = np.polyfit(range(len(prices)), prices, 1)[0]
trend_scores.append(abs(slope) / np.mean(prices))
# Check 1h trend
if len(universal_stream.eth_1h) > 10:
prices = universal_stream.eth_1h[-10:, 4] # Last 10 close prices
slope = np.polyfit(range(len(prices)), prices, 1)[0]
trend_scores.append(abs(slope) / np.mean(prices))
if trend_scores:
return float(np.mean(trend_scores))
elif symbol == 'BTC/USDT':
# Use BTC tick data for trend analysis
if len(universal_stream.btc_ticks) > 20:
prices = universal_stream.btc_ticks[-20:, 4] # Last 20 close prices
slope = np.polyfit(range(len(prices)), prices, 1)[0]
return float(abs(slope) / np.mean(prices))
except Exception as e:
logger.error(f"Error calculating trend strength from universal data: {e}")
return 0.5 # Moderate trend
def _determine_market_regime_from_universal(self, symbol: str, universal_stream: UniversalDataStream) -> str:
"""Determine current market regime using universal data"""
try:
if symbol == 'ETH/USDT':
# Analyze volatility and trend from multiple timeframes
volatility = self._calculate_volatility_from_universal(symbol, universal_stream)
trend_strength = self._calculate_trend_strength_from_universal(symbol, universal_stream)
# Determine regime based on volatility and trend
if volatility > 0.05: # High volatility
return 'volatile'
elif trend_strength > 0.002: # Strong trend
return 'trending'
else:
return 'ranging'
elif symbol == 'BTC/USDT':
# Analyze BTC regime
volatility = self._calculate_volatility_from_universal(symbol, universal_stream)
if volatility > 0.04: # High volatility for BTC
return 'volatile'
else:
return 'trending' # Default for BTC
except Exception as e:
logger.error(f"Error determining market regime from universal data: {e}")
return 'trending' # Default regime
# Legacy helper methods (kept for compatibility)
def _calculate_volatility(self, symbol: str) -> float:
"""Calculate current volatility for symbol"""
# Placeholder - implement based on your data provider
"""Calculate current volatility for symbol (legacy method)"""
return 0.02 # 2% default volatility
def _get_current_volume(self, symbol: str) -> float:
"""Get current volume ratio compared to average"""
# Placeholder - implement based on your data provider
"""Get current volume ratio compared to average (legacy method)"""
return 1.0 # Normal volume
def _calculate_trend_strength(self, symbol: str) -> float:
"""Calculate trend strength (0 = no trend, 1 = strong trend)"""
# Placeholder - implement based on your data provider
"""Calculate trend strength (legacy method)"""
return 0.5 # Moderate trend
def _determine_market_regime(self, symbol: str) -> str:
"""Determine current market regime"""
# Placeholder - implement based on your analysis
"""Determine current market regime (legacy method)"""
return 'trending' # Default to trending
def _get_symbol_correlation(self, symbol: str) -> Dict[str, float]:
@ -697,6 +951,47 @@ class EnhancedTradingOrchestrator:
return np.array(state_components, dtype=np.float32)
def process_realtime_features(self, feature_dict: Dict[str, Any]):
"""Process real-time tick features from the tick processor"""
try:
symbol = feature_dict['symbol']
# Store the features
if symbol in self.realtime_tick_features:
self.realtime_tick_features[symbol].append(feature_dict)
# Log high-confidence features
if feature_dict['confidence'] > 0.8:
logger.info(f"High-confidence tick features for {symbol}: confidence={feature_dict['confidence']:.3f}")
# Trigger immediate decision if we have very high confidence features
if feature_dict['confidence'] > 0.9:
logger.info(f"Ultra-high confidence tick signal for {symbol} - triggering immediate analysis")
# Could trigger immediate decision making here
except Exception as e:
logger.error(f"Error processing real-time features: {e}")
async def start_realtime_processing(self):
"""Start real-time tick processing"""
try:
await self.tick_processor.start_processing()
logger.info("Real-time tick processing started")
except Exception as e:
logger.error(f"Error starting real-time tick processing: {e}")
async def stop_realtime_processing(self):
"""Stop real-time tick processing"""
try:
await self.tick_processor.stop_processing()
logger.info("Real-time tick processing stopped")
except Exception as e:
logger.error(f"Error stopping real-time tick processing: {e}")
def get_realtime_tick_stats(self) -> Dict[str, Any]:
"""Get real-time tick processing statistics"""
return self.tick_processor.get_processing_stats()
def get_performance_metrics(self) -> Dict[str, Any]:
"""Get performance metrics for dashboard compatibility"""
total_actions = sum(len(actions) for actions in self.recent_actions.values())
@ -706,6 +1001,9 @@ class EnhancedTradingOrchestrator:
win_rate = 0.78 # 78% win rate
total_pnl = 247.85 # Strong positive P&L from 500x leverage
# Add tick processing stats
tick_stats = self.get_realtime_tick_stats()
return {
'total_actions': total_actions,
'perfect_moves': perfect_moves_count,
@ -716,5 +1014,57 @@ class EnhancedTradingOrchestrator:
'confidence_threshold': self.confidence_threshold,
'decision_frequency': self.decision_frequency,
'leverage': '500x', # Ultra-fast scalping
'primary_timeframe': '1s' # Main scalping timeframe
}
'primary_timeframe': '1s', # Main scalping timeframe
'tick_processing': tick_stats # Real-time tick processing stats
}
def analyze_market_conditions(self, symbol: str) -> Dict[str, Any]:
"""Analyze current market conditions for a given symbol"""
try:
# Get basic market data
data = self.data_provider.get_historical_data(symbol, '1m', limit=50)
if data is None or data.empty:
return {
'status': 'no_data',
'symbol': symbol,
'analysis': 'No market data available'
}
# Basic market analysis
current_price = data['close'].iloc[-1]
price_change = (current_price - data['close'].iloc[-2]) / data['close'].iloc[-2] * 100
# Volatility calculation
volatility = data['close'].pct_change().std() * 100
# Volume analysis
avg_volume = data['volume'].mean()
current_volume = data['volume'].iloc[-1]
volume_ratio = current_volume / avg_volume if avg_volume > 0 else 1.0
# Trend analysis
ma_short = data['close'].rolling(10).mean().iloc[-1]
ma_long = data['close'].rolling(30).mean().iloc[-1]
trend = 'bullish' if ma_short > ma_long else 'bearish'
return {
'status': 'success',
'symbol': symbol,
'current_price': current_price,
'price_change': price_change,
'volatility': volatility,
'volume_ratio': volume_ratio,
'trend': trend,
'analysis': f"{symbol} is {trend} with {volatility:.2f}% volatility",
'timestamp': datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error analyzing market conditions for {symbol}: {e}")
return {
'status': 'error',
'symbol': symbol,
'error': str(e),
'analysis': f'Error analyzing {symbol}'
}

BIN
core/prediction_tracker.py Normal file

Binary file not shown.

View File

@ -0,0 +1,649 @@
"""
Real-Time Tick Processing Neural Network Module
This module acts as a Neural Network DPS (Data Processing System) alternative,
processing raw tick data with ultra-low latency and feeding processed features
to trading models in real-time.
Features:
- Real-time tick ingestion with volume processing
- Neural network feature extraction from tick streams
- Ultra-low latency processing (sub-millisecond)
- Volume-weighted price analysis
- Microstructure pattern detection
- Real-time feature streaming to models
"""
import asyncio
import logging
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any, Deque
from collections import deque
from threading import Thread, Lock
import websockets
import json
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@dataclass
class TickData:
"""Raw tick data structure"""
timestamp: datetime
price: float
volume: float
side: str # 'buy' or 'sell'
trade_id: Optional[str] = None
@dataclass
class ProcessedTickFeatures:
"""Processed tick features for model consumption"""
timestamp: datetime
price_features: np.ndarray # Price-based features
volume_features: np.ndarray # Volume-based features
microstructure_features: np.ndarray # Market microstructure features
neural_features: np.ndarray # Neural network extracted features
confidence: float # Feature quality confidence
class TickProcessingNN(nn.Module):
"""
Neural Network for real-time tick processing
Extracts high-level features from raw tick data
"""
def __init__(self, input_size: int = 9, hidden_size: int = 128, output_size: int = 64):
super(TickProcessingNN, self).__init__()
# Tick sequence processing layers
self.tick_encoder = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.Dropout(0.1),
nn.Linear(hidden_size, hidden_size),
nn.ReLU(),
nn.Dropout(0.1)
)
# LSTM for temporal patterns
self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True, num_layers=2)
# Attention mechanism for important tick selection
self.attention = nn.MultiheadAttention(hidden_size, num_heads=8, batch_first=True)
# Feature extraction heads
self.price_head = nn.Linear(hidden_size, 16) # Price pattern features
self.volume_head = nn.Linear(hidden_size, 16) # Volume pattern features
self.microstructure_head = nn.Linear(hidden_size, 16) # Microstructure features
# Final feature fusion
self.feature_fusion = nn.Sequential(
nn.Linear(48, output_size), # 16+16+16 = 48
nn.ReLU(),
nn.Linear(output_size, output_size)
)
# Confidence estimation
self.confidence_head = nn.Sequential(
nn.Linear(output_size, 32),
nn.ReLU(),
nn.Linear(32, 1),
nn.Sigmoid()
)
def forward(self, tick_sequence: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Process tick sequence and extract features
Args:
tick_sequence: [batch, sequence_length, features]
Returns:
features: [batch, output_size] - extracted features
confidence: [batch, 1] - feature confidence
"""
batch_size, seq_len, _ = tick_sequence.shape
# Encode each tick
encoded = self.tick_encoder(tick_sequence) # [batch, seq_len, hidden_size]
# LSTM processing for temporal patterns
lstm_out, _ = self.lstm(encoded) # [batch, seq_len, hidden_size]
# Attention to focus on important ticks
attended, _ = self.attention(lstm_out, lstm_out, lstm_out) # [batch, seq_len, hidden_size]
# Use the last attended output
final_features = attended[:, -1, :] # [batch, hidden_size]
# Extract specialized features
price_features = self.price_head(final_features)
volume_features = self.volume_head(final_features)
microstructure_features = self.microstructure_head(final_features)
# Fuse all features
combined_features = torch.cat([price_features, volume_features, microstructure_features], dim=1)
final_features = self.feature_fusion(combined_features)
# Estimate confidence
confidence = self.confidence_head(final_features)
return final_features, confidence
class RealTimeTickProcessor:
"""
Real-time tick processing system with neural network feature extraction
Acts as a DPS alternative for ultra-low latency tick processing
"""
def __init__(self, symbols: List[str] = None, tick_buffer_size: int = 1000):
"""Initialize the real-time tick processor"""
self.symbols = symbols or ['ETH/USDT', 'BTC/USDT']
self.tick_buffer_size = tick_buffer_size
# Tick storage buffers
self.tick_buffers: Dict[str, Deque[TickData]] = {}
self.processed_features: Dict[str, Deque[ProcessedTickFeatures]] = {}
# Initialize buffers for each symbol
for symbol in self.symbols:
self.tick_buffers[symbol] = deque(maxlen=tick_buffer_size)
self.processed_features[symbol] = deque(maxlen=100) # Keep last 100 processed features
# Neural network for feature extraction
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.tick_nn = TickProcessingNN(input_size=9).to(self.device)
self.tick_nn.eval() # Start in evaluation mode
# Processing parameters
self.processing_window = 50 # Number of ticks to process at once
self.min_ticks_for_processing = 10 # Minimum ticks before processing
# Real-time streaming
self.streaming = False
self.websocket_tasks = {}
self.processing_threads = {}
# Performance tracking
self.processing_times = deque(maxlen=1000)
self.tick_counts = {symbol: 0 for symbol in self.symbols}
# Thread safety
self.data_lock = Lock()
# Feature subscribers (models that want real-time features)
self.feature_subscribers = []
logger.info(f"RealTimeTickProcessor initialized for symbols: {self.symbols}")
logger.info(f"Neural network device: {self.device}")
logger.info(f"Tick buffer size: {tick_buffer_size}")
def add_feature_subscriber(self, callback):
"""Add a callback function to receive processed features"""
self.feature_subscribers.append(callback)
logger.info(f"Added feature subscriber: {callback.__name__}")
def remove_feature_subscriber(self, callback):
"""Remove a feature subscriber"""
if callback in self.feature_subscribers:
self.feature_subscribers.remove(callback)
logger.info(f"Removed feature subscriber: {callback.__name__}")
async def start_processing(self):
"""Start real-time tick processing"""
logger.info("Starting real-time tick processing...")
self.streaming = True
# Start WebSocket streams for each symbol
for symbol in self.symbols:
task = asyncio.create_task(self._websocket_stream(symbol))
self.websocket_tasks[symbol] = task
# Start processing thread for each symbol
thread = Thread(target=self._processing_loop, args=(symbol,), daemon=True)
thread.start()
self.processing_threads[symbol] = thread
logger.info("Real-time tick processing started")
async def stop_processing(self):
"""Stop real-time tick processing"""
logger.info("Stopping real-time tick processing...")
self.streaming = False
# Cancel WebSocket tasks
for symbol, task in self.websocket_tasks.items():
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
self.websocket_tasks.clear()
logger.info("Real-time tick processing stopped")
async def _websocket_stream(self, symbol: str):
"""WebSocket stream for real-time tick data"""
binance_symbol = symbol.replace('/', '').lower()
url = f"wss://stream.binance.com:9443/ws/{binance_symbol}@trade"
while self.streaming:
try:
async with websockets.connect(url) as websocket:
logger.info(f"Tick WebSocket connected for {symbol}")
async for message in websocket:
if not self.streaming:
break
try:
data = json.loads(message)
await self._process_raw_tick(symbol, data)
except Exception as e:
logger.warning(f"Error processing tick for {symbol}: {e}")
except Exception as e:
logger.error(f"WebSocket error for {symbol}: {e}")
if self.streaming:
logger.info(f"Reconnecting tick WebSocket for {symbol} in 2 seconds...")
await asyncio.sleep(2)
async def _process_raw_tick(self, symbol: str, raw_data: Dict):
"""Process raw tick data from WebSocket"""
try:
# Extract tick information
tick = TickData(
timestamp=datetime.fromtimestamp(int(raw_data['T']) / 1000),
price=float(raw_data['p']),
volume=float(raw_data['q']),
side='buy' if raw_data['m'] == False else 'sell', # m=true means buyer is market maker (sell)
trade_id=raw_data.get('t')
)
# Add to buffer
with self.data_lock:
self.tick_buffers[symbol].append(tick)
self.tick_counts[symbol] += 1
except Exception as e:
logger.error(f"Error processing raw tick for {symbol}: {e}")
def _processing_loop(self, symbol: str):
"""Main processing loop for a symbol"""
logger.info(f"Starting processing loop for {symbol}")
while self.streaming:
try:
# Check if we have enough ticks to process
with self.data_lock:
tick_count = len(self.tick_buffers[symbol])
if tick_count >= self.min_ticks_for_processing:
start_time = time.time()
# Process ticks
features = self._extract_neural_features(symbol)
if features is not None:
# Store processed features
with self.data_lock:
self.processed_features[symbol].append(features)
# Notify subscribers
self._notify_feature_subscribers(symbol, features)
# Track processing time
processing_time = (time.time() - start_time) * 1000 # Convert to ms
self.processing_times.append(processing_time)
if len(self.processing_times) % 100 == 0:
avg_time = np.mean(list(self.processing_times))
logger.info(f"Average processing time: {avg_time:.2f}ms")
# Small sleep to prevent CPU overload
time.sleep(0.001) # 1ms sleep for ultra-low latency
except Exception as e:
logger.error(f"Error in processing loop for {symbol}: {e}")
time.sleep(0.01) # Longer sleep on error
def _extract_neural_features(self, symbol: str) -> Optional[ProcessedTickFeatures]:
"""Extract neural network features from recent ticks"""
try:
with self.data_lock:
# Get recent ticks
recent_ticks = list(self.tick_buffers[symbol])[-self.processing_window:]
if len(recent_ticks) < self.min_ticks_for_processing:
return None
# Convert ticks to neural network input
tick_features = self._ticks_to_features(recent_ticks)
# Process with neural network
with torch.no_grad():
tick_tensor = torch.FloatTensor(tick_features).unsqueeze(0).to(self.device)
neural_features, confidence = self.tick_nn(tick_tensor)
neural_features = neural_features.cpu().numpy().flatten()
confidence = confidence.cpu().numpy().item()
# Extract traditional features
price_features = self._extract_price_features(recent_ticks)
volume_features = self._extract_volume_features(recent_ticks)
microstructure_features = self._extract_microstructure_features(recent_ticks)
# Create processed features object
processed = ProcessedTickFeatures(
timestamp=recent_ticks[-1].timestamp,
price_features=price_features,
volume_features=volume_features,
microstructure_features=microstructure_features,
neural_features=neural_features,
confidence=confidence
)
return processed
except Exception as e:
logger.error(f"Error extracting neural features for {symbol}: {e}")
return None
def _ticks_to_features(self, ticks: List[TickData]) -> np.ndarray:
"""Convert tick data to neural network input features"""
features = []
for i, tick in enumerate(ticks):
tick_features = [
tick.price,
tick.volume,
1.0 if tick.side == 'buy' else 0.0, # Buy/sell indicator
tick.timestamp.timestamp(), # Timestamp
]
# Add relative features if we have previous ticks
if i > 0:
prev_tick = ticks[i-1]
price_change = (tick.price - prev_tick.price) / prev_tick.price
volume_ratio = tick.volume / (prev_tick.volume + 1e-8)
time_delta = (tick.timestamp - prev_tick.timestamp).total_seconds()
tick_features.extend([
price_change,
volume_ratio,
time_delta
])
else:
tick_features.extend([0.0, 1.0, 0.0]) # Default values for first tick
# Add moving averages if we have enough data
if i >= 5:
recent_prices = [t.price for t in ticks[max(0, i-4):i+1]]
recent_volumes = [t.volume for t in ticks[max(0, i-4):i+1]]
price_ma = np.mean(recent_prices)
volume_ma = np.mean(recent_volumes)
tick_features.extend([
(tick.price - price_ma) / price_ma, # Price deviation from MA
(tick.volume - volume_ma) / (volume_ma + 1e-8) # Volume deviation from MA
])
else:
tick_features.extend([0.0, 0.0])
features.append(tick_features)
# Pad or truncate to fixed size
target_length = self.processing_window
if len(features) < target_length:
# Pad with zeros
padding = [[0.0] * len(features[0])] * (target_length - len(features))
features = padding + features
elif len(features) > target_length:
# Take the most recent ticks
features = features[-target_length:]
return np.array(features, dtype=np.float32)
def _extract_price_features(self, ticks: List[TickData]) -> np.ndarray:
"""Extract price-based features"""
prices = np.array([tick.price for tick in ticks])
features = [
prices[-1], # Current price
np.mean(prices), # Average price
np.std(prices), # Price volatility
np.max(prices), # High
np.min(prices), # Low
(prices[-1] - prices[0]) / prices[0] if prices[0] != 0 else 0, # Total return
]
# Price momentum features
if len(prices) >= 10:
short_ma = np.mean(prices[-5:])
long_ma = np.mean(prices[-10:])
momentum = (short_ma - long_ma) / long_ma if long_ma != 0 else 0
features.append(momentum)
else:
features.append(0.0)
return np.array(features, dtype=np.float32)
def _extract_volume_features(self, ticks: List[TickData]) -> np.ndarray:
"""Extract volume-based features"""
volumes = np.array([tick.volume for tick in ticks])
buy_volumes = np.array([tick.volume for tick in ticks if tick.side == 'buy'])
sell_volumes = np.array([tick.volume for tick in ticks if tick.side == 'sell'])
features = [
np.sum(volumes), # Total volume
np.mean(volumes), # Average volume
np.std(volumes), # Volume volatility
np.sum(buy_volumes) if len(buy_volumes) > 0 else 0, # Buy volume
np.sum(sell_volumes) if len(sell_volumes) > 0 else 0, # Sell volume
]
# Volume imbalance
total_buy = np.sum(buy_volumes) if len(buy_volumes) > 0 else 0
total_sell = np.sum(sell_volumes) if len(sell_volumes) > 0 else 0
total_volume = total_buy + total_sell
if total_volume > 0:
buy_ratio = total_buy / total_volume
volume_imbalance = buy_ratio - 0.5 # -0.5 to 0.5 range
else:
volume_imbalance = 0.0
features.append(volume_imbalance)
# VWAP (Volume Weighted Average Price)
if np.sum(volumes) > 0:
prices = np.array([tick.price for tick in ticks])
vwap = np.sum(prices * volumes) / np.sum(volumes)
current_price = ticks[-1].price
vwap_deviation = (current_price - vwap) / vwap if vwap != 0 else 0
else:
vwap_deviation = 0.0
features.append(vwap_deviation)
return np.array(features, dtype=np.float32)
def _extract_microstructure_features(self, ticks: List[TickData]) -> np.ndarray:
"""Extract market microstructure features"""
features = []
# Trade frequency
if len(ticks) >= 2:
time_deltas = [(ticks[i].timestamp - ticks[i-1].timestamp).total_seconds()
for i in range(1, len(ticks))]
avg_time_delta = np.mean(time_deltas)
trade_frequency = 1.0 / avg_time_delta if avg_time_delta > 0 else 0
else:
trade_frequency = 0.0
features.append(trade_frequency)
# Price impact features
prices = [tick.price for tick in ticks]
volumes = [tick.volume for tick in ticks]
if len(prices) >= 3:
# Calculate price changes and corresponding volumes
price_changes = [(prices[i] - prices[i-1]) / prices[i-1]
for i in range(1, len(prices)) if prices[i-1] != 0]
corresponding_volumes = volumes[1:len(price_changes)+1]
if len(price_changes) > 0 and len(corresponding_volumes) > 0:
# Simple price impact measure
price_impact = np.corrcoef(np.abs(price_changes), corresponding_volumes)[0, 1]
if np.isnan(price_impact):
price_impact = 0.0
else:
price_impact = 0.0
else:
price_impact = 0.0
features.append(price_impact)
# Bid-ask spread proxy (using price volatility)
if len(prices) >= 5:
recent_prices = prices[-5:]
spread_proxy = (np.max(recent_prices) - np.min(recent_prices)) / np.mean(recent_prices)
else:
spread_proxy = 0.0
features.append(spread_proxy)
# Order flow imbalance (already calculated in volume features, but different perspective)
buy_count = sum(1 for tick in ticks if tick.side == 'buy')
sell_count = len(ticks) - buy_count
total_trades = len(ticks)
if total_trades > 0:
order_flow_imbalance = (buy_count - sell_count) / total_trades
else:
order_flow_imbalance = 0.0
features.append(order_flow_imbalance)
return np.array(features, dtype=np.float32)
def _notify_feature_subscribers(self, symbol: str, features: ProcessedTickFeatures):
"""Notify all feature subscribers of new processed features"""
for callback in self.feature_subscribers:
try:
callback(symbol, features)
except Exception as e:
logger.error(f"Error notifying feature subscriber {callback.__name__}: {e}")
def get_latest_features(self, symbol: str) -> Optional[ProcessedTickFeatures]:
"""Get the latest processed features for a symbol"""
with self.data_lock:
if symbol in self.processed_features and self.processed_features[symbol]:
return self.processed_features[symbol][-1]
return None
def get_processing_stats(self) -> Dict[str, Any]:
"""Get processing performance statistics"""
stats = {
'symbols': self.symbols,
'streaming': self.streaming,
'tick_counts': dict(self.tick_counts),
'buffer_sizes': {symbol: len(self.tick_buffers[symbol]) for symbol in self.symbols},
'feature_counts': {symbol: len(self.processed_features[symbol]) for symbol in self.symbols},
'subscribers': len(self.feature_subscribers)
}
if self.processing_times:
stats['processing_performance'] = {
'avg_time_ms': np.mean(list(self.processing_times)),
'min_time_ms': np.min(list(self.processing_times)),
'max_time_ms': np.max(list(self.processing_times)),
'std_time_ms': np.std(list(self.processing_times))
}
return stats
def train_neural_network(self, training_data: List[Tuple[np.ndarray, np.ndarray]], epochs: int = 100):
"""Train the tick processing neural network"""
logger.info("Training tick processing neural network...")
self.tick_nn.train()
optimizer = torch.optim.Adam(self.tick_nn.parameters(), lr=0.001)
criterion = nn.MSELoss()
for epoch in range(epochs):
total_loss = 0.0
for batch_features, batch_targets in training_data:
optimizer.zero_grad()
# Convert to tensors
features_tensor = torch.FloatTensor(batch_features).to(self.device)
targets_tensor = torch.FloatTensor(batch_targets).to(self.device)
# Forward pass
outputs, confidence = self.tick_nn(features_tensor)
# Calculate loss
loss = criterion(outputs, targets_tensor)
# Backward pass
loss.backward()
optimizer.step()
total_loss += loss.item()
if epoch % 10 == 0:
avg_loss = total_loss / len(training_data)
logger.info(f"Epoch {epoch}/{epochs}, Average Loss: {avg_loss:.6f}")
self.tick_nn.eval()
logger.info("Neural network training completed")
# Integration with existing orchestrator
def integrate_with_orchestrator(orchestrator, tick_processor: RealTimeTickProcessor):
"""Integrate tick processor with enhanced orchestrator"""
def feature_callback(symbol: str, features: ProcessedTickFeatures):
"""Callback to feed processed features to orchestrator"""
try:
# Convert processed features to format expected by orchestrator
feature_dict = {
'symbol': symbol,
'timestamp': features.timestamp,
'neural_features': features.neural_features,
'price_features': features.price_features,
'volume_features': features.volume_features,
'microstructure_features': features.microstructure_features,
'confidence': features.confidence
}
# Feed to orchestrator's real-time feature processing
if hasattr(orchestrator, 'process_realtime_features'):
orchestrator.process_realtime_features(feature_dict)
except Exception as e:
logger.error(f"Error integrating features with orchestrator: {e}")
# Add the callback to tick processor
tick_processor.add_feature_subscriber(feature_callback)
logger.info("Tick processor integrated with orchestrator")
# Factory function for easy creation
def create_realtime_tick_processor(symbols: List[str] = None) -> RealTimeTickProcessor:
"""Create and configure a real-time tick processor"""
if symbols is None:
symbols = ['ETH/USDT', 'BTC/USDT']
processor = RealTimeTickProcessor(symbols=symbols)
logger.info(f"Created RealTimeTickProcessor for symbols: {symbols}")
return processor

View File

@ -0,0 +1,411 @@
"""
Universal Data Adapter for Trading Models
This adapter ensures all models receive data in our universal format:
- ETH/USDT: ticks (1s), 1m, 1h, 1d
- BTC/USDT: ticks (1s) as reference
This is the standard input format that all models must respect.
"""
import logging
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
from .config import get_config
from .data_provider import DataProvider
logger = logging.getLogger(__name__)
@dataclass
class UniversalDataStream:
"""Universal data stream containing the 5 required timeseries"""
eth_ticks: np.ndarray # ETH/USDT 1s/ticks data [timestamp, open, high, low, close, volume]
eth_1m: np.ndarray # ETH/USDT 1m data
eth_1h: np.ndarray # ETH/USDT 1h data
eth_1d: np.ndarray # ETH/USDT 1d data
btc_ticks: np.ndarray # BTC/USDT 1s/ticks reference data
timestamp: datetime # Current timestamp
metadata: Dict[str, Any] # Additional metadata
class UniversalDataAdapter:
"""
Adapter that converts any data source into our universal 5-timeseries format
"""
def __init__(self, data_provider: DataProvider = None):
"""Initialize the universal data adapter"""
self.config = get_config()
self.data_provider = data_provider or DataProvider()
# Universal format configuration
self.required_symbols = ['ETH/USDT', 'BTC/USDT']
self.required_timeframes = {
'ETH/USDT': ['1s', '1m', '1h', '1d'], # Primary trading pair
'BTC/USDT': ['1s'] # Reference pair
}
# Data window sizes for each timeframe
self.window_sizes = {
'1s': 60, # Last 60 seconds of tick data
'1m': 60, # Last 60 minutes
'1h': 24, # Last 24 hours
'1d': 30 # Last 30 days
}
# Feature columns (OHLCV)
self.feature_columns = ['open', 'high', 'low', 'close', 'volume']
logger.info("Universal Data Adapter initialized")
logger.info(f"Required symbols: {self.required_symbols}")
logger.info(f"Required timeframes: {self.required_timeframes}")
def get_universal_data_stream(self, current_time: datetime = None) -> Optional[UniversalDataStream]:
"""
Get data in universal format for all models
Returns:
UniversalDataStream with the 5 required timeseries
"""
try:
current_time = current_time or datetime.now()
# Get ETH/USDT data for all required timeframes
eth_data = {}
for timeframe in self.required_timeframes['ETH/USDT']:
data = self._get_timeframe_data('ETH/USDT', timeframe)
if data is not None:
eth_data[timeframe] = data
else:
logger.warning(f"Failed to get ETH/USDT {timeframe} data")
return None
# Get BTC/USDT reference data
btc_data = self._get_timeframe_data('BTC/USDT', '1s')
if btc_data is None:
logger.warning("Failed to get BTC/USDT reference data")
return None
# Create universal data stream
stream = UniversalDataStream(
eth_ticks=eth_data['1s'],
eth_1m=eth_data['1m'],
eth_1h=eth_data['1h'],
eth_1d=eth_data['1d'],
btc_ticks=btc_data,
timestamp=current_time,
metadata={
'data_quality': self._assess_data_quality(eth_data, btc_data),
'market_hours': self._is_market_hours(current_time),
'data_freshness': self._calculate_data_freshness(eth_data, btc_data, current_time)
}
)
logger.debug(f"Universal data stream created with {len(stream.eth_ticks)} ETH ticks, "
f"{len(stream.eth_1m)} ETH 1m candles, {len(stream.btc_ticks)} BTC ticks")
return stream
except Exception as e:
logger.error(f"Error creating universal data stream: {e}")
return None
def _get_timeframe_data(self, symbol: str, timeframe: str) -> Optional[np.ndarray]:
"""Get data for a specific symbol and timeframe"""
try:
window_size = self.window_sizes.get(timeframe, 60)
# Get historical data from data provider
df = self.data_provider.get_historical_data(
symbol=symbol,
timeframe=timeframe,
limit=window_size
)
if df is None or df.empty:
logger.warning(f"No data returned for {symbol} {timeframe}")
return None
# Ensure we have the required columns
missing_cols = [col for col in self.feature_columns if col not in df.columns]
if missing_cols:
logger.warning(f"Missing columns for {symbol} {timeframe}: {missing_cols}")
return None
# Convert to numpy array with timestamp
data_array = df[self.feature_columns].values.astype(np.float32)
# Add timestamp column if available
if 'timestamp' in df.columns:
timestamps = pd.to_datetime(df['timestamp']).astype(np.int64) // 10**9 # Unix timestamp
data_with_time = np.column_stack([timestamps, data_array])
else:
# Generate timestamps if not available
end_time = datetime.now()
if timeframe == '1s':
timestamps = [(end_time - timedelta(seconds=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
elif timeframe == '1m':
timestamps = [(end_time - timedelta(minutes=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
elif timeframe == '1h':
timestamps = [(end_time - timedelta(hours=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
elif timeframe == '1d':
timestamps = [(end_time - timedelta(days=i)).timestamp() for i in range(len(data_array)-1, -1, -1)]
else:
timestamps = [end_time.timestamp()] * len(data_array)
data_with_time = np.column_stack([timestamps, data_array])
return data_with_time
except Exception as e:
logger.error(f"Error getting {symbol} {timeframe} data: {e}")
return None
def _assess_data_quality(self, eth_data: Dict[str, np.ndarray], btc_data: np.ndarray) -> Dict[str, Any]:
"""Assess the quality of the data streams"""
quality = {
'overall_score': 1.0,
'issues': []
}
try:
# Check ETH data completeness
for timeframe, data in eth_data.items():
expected_size = self.window_sizes.get(timeframe, 60)
actual_size = len(data) if data is not None else 0
if actual_size < expected_size * 0.8: # Less than 80% of expected data
quality['issues'].append(f"ETH {timeframe} data incomplete: {actual_size}/{expected_size}")
quality['overall_score'] *= 0.9
# Check BTC reference data
btc_expected = self.window_sizes.get('1s', 60)
btc_actual = len(btc_data) if btc_data is not None else 0
if btc_actual < btc_expected * 0.8:
quality['issues'].append(f"BTC reference data incomplete: {btc_actual}/{btc_expected}")
quality['overall_score'] *= 0.9
# Check for data gaps or anomalies
for timeframe, data in eth_data.items():
if data is not None and len(data) > 1:
# Check for price anomalies (sudden jumps > 10%)
prices = data[:, 4] # Close prices
price_changes = np.abs(np.diff(prices) / prices[:-1])
if np.any(price_changes > 0.1):
quality['issues'].append(f"ETH {timeframe} has price anomalies")
quality['overall_score'] *= 0.95
except Exception as e:
logger.error(f"Error assessing data quality: {e}")
quality['issues'].append(f"Quality assessment error: {e}")
quality['overall_score'] *= 0.8
return quality
def _is_market_hours(self, timestamp: datetime) -> bool:
"""Check if it's market hours (crypto markets are 24/7)"""
return True # Crypto markets are always open
def _calculate_data_freshness(self, eth_data: Dict[str, np.ndarray], btc_data: np.ndarray,
current_time: datetime) -> Dict[str, float]:
"""Calculate how fresh the data is"""
freshness = {}
try:
current_timestamp = current_time.timestamp()
# Check ETH data freshness
for timeframe, data in eth_data.items():
if data is not None and len(data) > 0:
latest_timestamp = data[-1, 0] # First column is timestamp
age_seconds = current_timestamp - latest_timestamp
# Convert to appropriate units
if timeframe == '1s':
freshness[f'eth_{timeframe}'] = age_seconds # Seconds
elif timeframe == '1m':
freshness[f'eth_{timeframe}'] = age_seconds / 60 # Minutes
elif timeframe == '1h':
freshness[f'eth_{timeframe}'] = age_seconds / 3600 # Hours
elif timeframe == '1d':
freshness[f'eth_{timeframe}'] = age_seconds / 86400 # Days
else:
freshness[f'eth_{timeframe}'] = float('inf')
# Check BTC data freshness
if btc_data is not None and len(btc_data) > 0:
btc_latest = btc_data[-1, 0]
btc_age = current_timestamp - btc_latest
freshness['btc_1s'] = btc_age # Seconds
else:
freshness['btc_1s'] = float('inf')
except Exception as e:
logger.error(f"Error calculating data freshness: {e}")
freshness['error'] = str(e)
return freshness
def format_for_model(self, stream: UniversalDataStream, model_type: str = 'cnn') -> Dict[str, np.ndarray]:
"""
Format universal data stream for specific model types
Args:
stream: Universal data stream
model_type: Type of model ('cnn', 'rl', 'transformer', etc.)
Returns:
Dictionary with formatted data for the model
"""
try:
if model_type.lower() == 'cnn':
return self._format_for_cnn(stream)
elif model_type.lower() == 'rl':
return self._format_for_rl(stream)
elif model_type.lower() == 'transformer':
return self._format_for_transformer(stream)
else:
# Default format - return raw arrays
return {
'eth_ticks': stream.eth_ticks,
'eth_1m': stream.eth_1m,
'eth_1h': stream.eth_1h,
'eth_1d': stream.eth_1d,
'btc_ticks': stream.btc_ticks,
'metadata': stream.metadata
}
except Exception as e:
logger.error(f"Error formatting data for {model_type}: {e}")
return {}
def _format_for_cnn(self, stream: UniversalDataStream) -> Dict[str, np.ndarray]:
"""Format data for CNN models"""
# CNN expects [batch, sequence, features] format
formatted = {}
# Remove timestamp column and keep only OHLCV
formatted['eth_ticks'] = stream.eth_ticks[:, 1:] if stream.eth_ticks.shape[1] > 5 else stream.eth_ticks
formatted['eth_1m'] = stream.eth_1m[:, 1:] if stream.eth_1m.shape[1] > 5 else stream.eth_1m
formatted['eth_1h'] = stream.eth_1h[:, 1:] if stream.eth_1h.shape[1] > 5 else stream.eth_1h
formatted['eth_1d'] = stream.eth_1d[:, 1:] if stream.eth_1d.shape[1] > 5 else stream.eth_1d
formatted['btc_ticks'] = stream.btc_ticks[:, 1:] if stream.btc_ticks.shape[1] > 5 else stream.btc_ticks
return formatted
def _format_for_rl(self, stream: UniversalDataStream) -> Dict[str, np.ndarray]:
"""Format data for RL models"""
# RL typically expects flattened state vector
state_components = []
# Add latest values from each timeframe
if len(stream.eth_ticks) > 0:
state_components.extend(stream.eth_ticks[-1, 1:]) # Latest ETH tick (OHLCV)
if len(stream.eth_1m) > 0:
state_components.extend(stream.eth_1m[-1, 1:]) # Latest ETH 1m (OHLCV)
if len(stream.eth_1h) > 0:
state_components.extend(stream.eth_1h[-1, 1:]) # Latest ETH 1h (OHLCV)
if len(stream.eth_1d) > 0:
state_components.extend(stream.eth_1d[-1, 1:]) # Latest ETH 1d (OHLCV)
if len(stream.btc_ticks) > 0:
state_components.extend(stream.btc_ticks[-1, 1:]) # Latest BTC tick (OHLCV)
# Add some derived features
if len(stream.eth_ticks) > 1:
# Price momentum
eth_momentum = (stream.eth_ticks[-1, 4] - stream.eth_ticks[-2, 4]) / stream.eth_ticks[-2, 4]
state_components.append(eth_momentum)
if len(stream.btc_ticks) > 1:
# BTC momentum for correlation
btc_momentum = (stream.btc_ticks[-1, 4] - stream.btc_ticks[-2, 4]) / stream.btc_ticks[-2, 4]
state_components.append(btc_momentum)
return {'state_vector': np.array(state_components, dtype=np.float32)}
def _format_for_transformer(self, stream: UniversalDataStream) -> Dict[str, np.ndarray]:
"""Format data for Transformer models"""
# Transformers expect sequence data with attention
formatted = {}
# Keep timestamp for positional encoding
formatted['eth_ticks'] = stream.eth_ticks
formatted['eth_1m'] = stream.eth_1m
formatted['eth_1h'] = stream.eth_1h
formatted['eth_1d'] = stream.eth_1d
formatted['btc_ticks'] = stream.btc_ticks
# Add sequence length information
formatted['sequence_lengths'] = {
'eth_ticks': len(stream.eth_ticks),
'eth_1m': len(stream.eth_1m),
'eth_1h': len(stream.eth_1h),
'eth_1d': len(stream.eth_1d),
'btc_ticks': len(stream.btc_ticks)
}
return formatted
def validate_universal_format(self, stream: UniversalDataStream) -> Tuple[bool, List[str]]:
"""
Validate that the data stream conforms to our universal format
Returns:
(is_valid, list_of_issues)
"""
issues = []
try:
# Check that all required arrays are present and not None
required_arrays = ['eth_ticks', 'eth_1m', 'eth_1h', 'eth_1d', 'btc_ticks']
for array_name in required_arrays:
array = getattr(stream, array_name)
if array is None:
issues.append(f"{array_name} is None")
elif len(array) == 0:
issues.append(f"{array_name} is empty")
elif array.shape[1] < 5: # Should have at least OHLCV
issues.append(f"{array_name} has insufficient columns: {array.shape[1]} < 5")
# Check timestamp
if stream.timestamp is None:
issues.append("timestamp is None")
# Check data consistency (more tolerant for cached data)
if stream.eth_ticks is not None and len(stream.eth_ticks) > 0:
if stream.btc_ticks is not None and len(stream.btc_ticks) > 0:
# Check if timestamps are roughly aligned (more tolerant for cached data)
eth_latest = stream.eth_ticks[-1, 0] if stream.eth_ticks.shape[1] > 5 else 0
btc_latest = stream.btc_ticks[-1, 0] if stream.btc_ticks.shape[1] > 5 else 0
# Be more tolerant - allow up to 1 hour difference for cached data
max_time_diff = 3600 # 1 hour instead of 5 minutes
time_diff = abs(eth_latest - btc_latest)
if time_diff > max_time_diff:
# This is a warning, not a failure for cached data
issues.append(f"ETH and BTC timestamps far apart: {time_diff} seconds (using cached data)")
logger.warning(f"Timestamp difference detected: {time_diff} seconds - this is normal for cached data")
# Check data quality from metadata
if 'data_quality' in stream.metadata:
quality_score = stream.metadata['data_quality'].get('overall_score', 0)
if quality_score < 0.5: # Very low quality
issues.append(f"Data quality too low: {quality_score:.2f}")
except Exception as e:
issues.append(f"Validation error: {e}")
# For cached data, we're more lenient - only fail on critical issues
critical_issues = [issue for issue in issues if not ('timestamps far apart' in issue and 'cached data' in issue)]
is_valid = len(critical_issues) == 0
return is_valid, issues