""" Real-Time Tick Aggregator for Raw Tick Processing This module processes raw tick data and calculates 1s OHLCV bars in real-time. It preserves raw tick data for pattern detection while providing aggregated data for traditional analysis. Features: - Raw tick data preservation with time difference analysis - Real-time 1s OHLCV calculation from ticks - Tick pattern detection for violent moves - Volume-weighted price calculations - Microstructure analysis """ import logging import time import numpy as np import pandas as pd from datetime import datetime, timedelta from typing import Dict, List, Optional, Tuple, Any, Deque from collections import deque from threading import Lock from dataclasses import dataclass, field logger = logging.getLogger(__name__) @dataclass class RawTick: """Raw tick data with timing information""" timestamp: datetime price: float volume: float quantity: float side: str # 'buy' or 'sell' trade_id: str time_since_last: float # Milliseconds since last tick price_change: float # Price change from last tick volume_intensity: float # Volume relative to recent average @dataclass class TickPattern: """Detected tick pattern for violent moves""" start_time: datetime end_time: datetime pattern_type: str # 'rapid_fire', 'volume_spike', 'price_acceleration' tick_count: int avg_time_between_ticks: float price_change: float volume_total: float confidence: float @dataclass class OHLCVBar: """1-second OHLCV bar calculated from ticks""" timestamp: datetime open: float high: float low: float close: float volume: float tick_count: int buy_volume: float sell_volume: float vwap: float # Volume-weighted average price patterns: List[TickPattern] = field(default_factory=list) class RealTimeTickAggregator: """ Real-time tick aggregator that processes raw ticks and calculates 1s OHLCV bars while preserving tick-level data for pattern detection """ def __init__(self, symbols: List[str] = None, tick_buffer_size: int = 10000): """Initialize the tick aggregator""" self.symbols = symbols or ['ETHUSDT', 'BTCUSDT'] self.tick_buffer_size = tick_buffer_size # Raw tick storage (preserves all ticks with timing data) self.raw_tick_buffers: Dict[str, Deque[RawTick]] = {} # 1s OHLCV bars calculated from ticks self.ohlcv_bars: Dict[str, Deque[OHLCVBar]] = {} # Current incomplete bars being built self.current_bars: Dict[str, Dict] = {} # Pattern detection storage self.detected_patterns: Dict[str, Deque[TickPattern]] = {} # Initialize buffers for each symbol for symbol in self.symbols: self.raw_tick_buffers[symbol] = deque(maxlen=tick_buffer_size) self.ohlcv_bars[symbol] = deque(maxlen=3600) # 1 hour of 1s bars self.detected_patterns[symbol] = deque(maxlen=1000) self.current_bars[symbol] = None # Pattern detection parameters self.rapid_fire_threshold = 50 # ms between ticks for rapid fire self.volume_spike_multiplier = 3.0 # Volume spike threshold self.price_acceleration_threshold = 0.001 # 0.1% price change threshold # Statistics tracking self.stats = { 'total_ticks_processed': 0, 'patterns_detected': 0, 'bars_created': 0, 'processing_times': deque(maxlen=1000) } # Thread safety self.data_lock = Lock() # Last tick tracking for time differences self.last_tick_times: Dict[str, datetime] = {} self.last_tick_prices: Dict[str, float] = {} # Volume tracking for intensity calculation self.volume_history: Dict[str, Deque[float]] = {} for symbol in self.symbols: self.volume_history[symbol] = deque(maxlen=100) # Last 100 ticks for volume average logger.info(f"RealTimeTickAggregator initialized for symbols: {self.symbols}") logger.info(f"Tick buffer size: {tick_buffer_size}") logger.info("Pattern detection enabled for violent moves") def process_tick(self, symbol: str, timestamp: datetime, price: float, volume: float, quantity: float, side: str, trade_id: str) -> Tuple[RawTick, Optional[OHLCVBar]]: """ Process a raw tick and return the tick data plus any completed 1s bar Returns: Tuple[RawTick, Optional[OHLCVBar]]: The processed tick and completed bar (if any) """ start_time = time.time() try: with self.data_lock: # Calculate timing information time_since_last = 0.0 price_change = 0.0 if symbol in self.last_tick_times: time_diff = (timestamp - self.last_tick_times[symbol]).total_seconds() * 1000 time_since_last = max(0, time_diff) # Ensure non-negative if symbol in self.last_tick_prices: price_change = price - self.last_tick_prices[symbol] # Calculate volume intensity volume_intensity = self._calculate_volume_intensity(symbol, volume) # Create raw tick raw_tick = RawTick( timestamp=timestamp, price=price, volume=volume, quantity=quantity, side=side, trade_id=trade_id, time_since_last=time_since_last, price_change=price_change, volume_intensity=volume_intensity ) # Add to raw tick buffer self.raw_tick_buffers[symbol].append(raw_tick) # Update tracking self.last_tick_times[symbol] = timestamp self.last_tick_prices[symbol] = price self.volume_history[symbol].append(volume) # Process for 1s OHLCV bar completed_bar = self._update_ohlcv_bar(symbol, raw_tick) # Detect patterns self._detect_tick_patterns(symbol, raw_tick) # Update statistics self.stats['total_ticks_processed'] += 1 processing_time = (time.time() - start_time) * 1000 self.stats['processing_times'].append(processing_time) return raw_tick, completed_bar except Exception as e: logger.error(f"Error processing tick for {symbol}: {e}") return None, None def _calculate_volume_intensity(self, symbol: str, volume: float) -> float: """Calculate volume intensity relative to recent average""" try: if symbol not in self.volume_history or len(self.volume_history[symbol]) < 10: return 1.0 # Default intensity recent_volumes = list(self.volume_history[symbol]) avg_volume = np.mean(recent_volumes) if avg_volume > 0: return volume / avg_volume return 1.0 except Exception as e: logger.error(f"Error calculating volume intensity: {e}") return 1.0 def _update_ohlcv_bar(self, symbol: str, tick: RawTick) -> Optional[OHLCVBar]: """Update or create 1s OHLCV bar from tick data""" try: # Get the second timestamp (truncate to second) bar_timestamp = tick.timestamp.replace(microsecond=0) # Check if we need a new bar current_bar = self.current_bars[symbol] if current_bar is None or current_bar['timestamp'] != bar_timestamp: # Complete the previous bar if it exists completed_bar = None if current_bar is not None: completed_bar = self._finalize_bar(symbol, current_bar) # Start new bar self.current_bars[symbol] = { 'timestamp': bar_timestamp, 'open': tick.price, 'high': tick.price, 'low': tick.price, 'close': tick.price, 'volume': tick.volume, 'tick_count': 1, 'buy_volume': tick.volume if tick.side == 'buy' else 0, 'sell_volume': tick.volume if tick.side == 'sell' else 0, 'volume_price_sum': tick.volume * tick.price, 'ticks': [tick] } return completed_bar else: # Update existing bar current_bar['high'] = max(current_bar['high'], tick.price) current_bar['low'] = min(current_bar['low'], tick.price) current_bar['close'] = tick.price current_bar['volume'] += tick.volume current_bar['tick_count'] += 1 current_bar['volume_price_sum'] += tick.volume * tick.price current_bar['ticks'].append(tick) if tick.side == 'buy': current_bar['buy_volume'] += tick.volume else: current_bar['sell_volume'] += tick.volume return None except Exception as e: logger.error(f"Error updating OHLCV bar for {symbol}: {e}") return None def _finalize_bar(self, symbol: str, bar_data: Dict) -> OHLCVBar: """Finalize a 1s OHLCV bar and detect patterns""" try: # Calculate VWAP vwap = bar_data['volume_price_sum'] / bar_data['volume'] if bar_data['volume'] > 0 else bar_data['close'] # Detect patterns in this bar bar_patterns = self._detect_bar_patterns(symbol, bar_data['ticks']) # Create OHLCV bar ohlcv_bar = OHLCVBar( timestamp=bar_data['timestamp'], open=bar_data['open'], high=bar_data['high'], low=bar_data['low'], close=bar_data['close'], volume=bar_data['volume'], tick_count=bar_data['tick_count'], buy_volume=bar_data['buy_volume'], sell_volume=bar_data['sell_volume'], vwap=vwap, patterns=bar_patterns ) # Add to buffer self.ohlcv_bars[symbol].append(ohlcv_bar) self.stats['bars_created'] += 1 return ohlcv_bar except Exception as e: logger.error(f"Error finalizing bar for {symbol}: {e}") return None def _detect_tick_patterns(self, symbol: str, tick: RawTick): """Detect patterns in tick data for violent moves""" try: # Rapid fire detection (very fast ticks) if tick.time_since_last > 0 and tick.time_since_last < self.rapid_fire_threshold: self._check_rapid_fire_pattern(symbol, tick) # Volume spike detection if tick.volume_intensity > self.volume_spike_multiplier: self._check_volume_spike_pattern(symbol, tick) # Price acceleration detection if abs(tick.price_change) > 0 and symbol in self.last_tick_prices: price_change_pct = abs(tick.price_change) / self.last_tick_prices[symbol] if price_change_pct > self.price_acceleration_threshold: self._check_price_acceleration_pattern(symbol, tick) except Exception as e: logger.error(f"Error detecting tick patterns for {symbol}: {e}") def _check_rapid_fire_pattern(self, symbol: str, tick: RawTick): """Check for rapid fire tick pattern""" try: # Look at last 10 ticks to see if we have a rapid fire sequence recent_ticks = list(self.raw_tick_buffers[symbol])[-10:] if len(recent_ticks) >= 5: rapid_ticks = [t for t in recent_ticks if t.time_since_last < self.rapid_fire_threshold] if len(rapid_ticks) >= 5: # We have a rapid fire pattern pattern = TickPattern( start_time=rapid_ticks[0].timestamp, end_time=tick.timestamp, pattern_type='rapid_fire', tick_count=len(rapid_ticks), avg_time_between_ticks=np.mean([t.time_since_last for t in rapid_ticks]), price_change=tick.price - rapid_ticks[0].price, volume_total=sum(t.volume for t in rapid_ticks), confidence=min(1.0, len(rapid_ticks) / 10.0) ) self.detected_patterns[symbol].append(pattern) self.stats['patterns_detected'] += 1 logger.debug(f"RAPID FIRE pattern detected for {symbol}: {len(rapid_ticks)} ticks in {(tick.timestamp - rapid_ticks[0].timestamp).total_seconds():.3f}s") except Exception as e: logger.error(f"Error checking rapid fire pattern: {e}") def _check_volume_spike_pattern(self, symbol: str, tick: RawTick): """Check for volume spike pattern""" try: pattern = TickPattern( start_time=tick.timestamp, end_time=tick.timestamp, pattern_type='volume_spike', tick_count=1, avg_time_between_ticks=tick.time_since_last, price_change=tick.price_change, volume_total=tick.volume, confidence=min(1.0, tick.volume_intensity / self.volume_spike_multiplier) ) self.detected_patterns[symbol].append(pattern) self.stats['patterns_detected'] += 1 logger.debug(f"VOLUME SPIKE pattern detected for {symbol}: {tick.volume_intensity:.2f}x normal volume") except Exception as e: logger.error(f"Error checking volume spike pattern: {e}") def _check_price_acceleration_pattern(self, symbol: str, tick: RawTick): """Check for price acceleration pattern""" try: price_change_pct = abs(tick.price_change) / self.last_tick_prices[symbol] pattern = TickPattern( start_time=tick.timestamp, end_time=tick.timestamp, pattern_type='price_acceleration', tick_count=1, avg_time_between_ticks=tick.time_since_last, price_change=tick.price_change, volume_total=tick.volume, confidence=min(1.0, price_change_pct / self.price_acceleration_threshold) ) self.detected_patterns[symbol].append(pattern) self.stats['patterns_detected'] += 1 logger.debug(f"PRICE ACCELERATION pattern detected for {symbol}: {price_change_pct*100:.3f}% change") except Exception as e: logger.error(f"Error checking price acceleration pattern: {e}") def _detect_bar_patterns(self, symbol: str, ticks: List[RawTick]) -> List[TickPattern]: """Detect patterns within a completed 1s bar""" patterns = [] try: if len(ticks) < 2: return patterns # Check for high-frequency trading within the bar if len(ticks) > 20: # More than 20 ticks in 1 second avg_time = np.mean([t.time_since_last for t in ticks[1:]]) pattern = TickPattern( start_time=ticks[0].timestamp, end_time=ticks[-1].timestamp, pattern_type='high_frequency_bar', tick_count=len(ticks), avg_time_between_ticks=avg_time, price_change=ticks[-1].price - ticks[0].price, volume_total=sum(t.volume for t in ticks), confidence=min(1.0, len(ticks) / 50.0) ) patterns.append(pattern) # Check for volume concentration total_volume = sum(t.volume for t in ticks) max_tick_volume = max(t.volume for t in ticks) if max_tick_volume > total_volume * 0.5: # Single tick has >50% of bar volume pattern = TickPattern( start_time=ticks[0].timestamp, end_time=ticks[-1].timestamp, pattern_type='volume_concentration', tick_count=len(ticks), avg_time_between_ticks=np.mean([t.time_since_last for t in ticks[1:]]), price_change=ticks[-1].price - ticks[0].price, volume_total=total_volume, confidence=max_tick_volume / total_volume ) patterns.append(pattern) except Exception as e: logger.error(f"Error detecting bar patterns: {e}") return patterns def get_raw_ticks(self, symbol: str, count: int = 100) -> List[RawTick]: """Get recent raw ticks for a symbol""" with self.data_lock: if symbol in self.raw_tick_buffers: return list(self.raw_tick_buffers[symbol])[-count:] return [] def get_ohlcv_bars(self, symbol: str, count: int = 100) -> List[OHLCVBar]: """Get recent 1s OHLCV bars for a symbol""" with self.data_lock: if symbol in self.ohlcv_bars: return list(self.ohlcv_bars[symbol])[-count:] return [] def get_detected_patterns(self, symbol: str, count: int = 50) -> List[TickPattern]: """Get recently detected patterns for a symbol""" with self.data_lock: if symbol in self.detected_patterns: return list(self.detected_patterns[symbol])[-count:] return [] def get_tick_features_for_model(self, symbol: str, window_size: int = 50) -> Optional[np.ndarray]: """ Get tick features formatted for model consumption Returns: np.ndarray: Shape (window_size, features) where features include: [price, volume, time_since_last, price_change, volume_intensity, side_indicator] """ try: with self.data_lock: recent_ticks = self.get_raw_ticks(symbol, window_size) if len(recent_ticks) < window_size: return None features = [] for tick in recent_ticks: tick_features = [ tick.price, tick.volume, tick.time_since_last, tick.price_change, tick.volume_intensity, 1.0 if tick.side == 'buy' else 0.0 # Buy/sell indicator ] features.append(tick_features) return np.array(features, dtype=np.float32) except Exception as e: logger.error(f"Error getting tick features for model: {e}") return None def get_ohlcv_features_for_model(self, symbol: str, window_size: int = 60) -> Optional[np.ndarray]: """ Get 1s OHLCV features formatted for model consumption Returns: np.ndarray: Shape (window_size, 5) - standard OHLCV format """ try: with self.data_lock: recent_bars = self.get_ohlcv_bars(symbol, window_size) if len(recent_bars) < window_size: return None features = [] for bar in recent_bars: ohlcv_features = [ bar.open, bar.high, bar.low, bar.close, bar.volume ] features.append(ohlcv_features) return np.array(features, dtype=np.float32) except Exception as e: logger.error(f"Error getting OHLCV features for model: {e}") return None def get_statistics(self) -> Dict[str, Any]: """Get aggregator statistics""" with self.data_lock: avg_processing_time = np.mean(list(self.stats['processing_times'])) if self.stats['processing_times'] else 0 return { 'total_ticks_processed': self.stats['total_ticks_processed'], 'patterns_detected': self.stats['patterns_detected'], 'bars_created': self.stats['bars_created'], 'avg_processing_time_ms': avg_processing_time, 'symbols': self.symbols, 'buffer_sizes': {symbol: len(self.raw_tick_buffers[symbol]) for symbol in self.symbols}, 'bar_counts': {symbol: len(self.ohlcv_bars[symbol]) for symbol in self.symbols}, 'pattern_counts': {symbol: len(self.detected_patterns[symbol]) for symbol in self.symbols} } def clear_old_data(self, hours_to_keep: int = 1): """Clear old data to manage memory usage""" try: cutoff_time = datetime.now() - timedelta(hours=hours_to_keep) with self.data_lock: for symbol in self.symbols: # Clear old ticks while (self.raw_tick_buffers[symbol] and self.raw_tick_buffers[symbol][0].timestamp < cutoff_time): self.raw_tick_buffers[symbol].popleft() # Clear old bars while (self.ohlcv_bars[symbol] and self.ohlcv_bars[symbol][0].timestamp < cutoff_time): self.ohlcv_bars[symbol].popleft() # Clear old patterns while (self.detected_patterns[symbol] and self.detected_patterns[symbol][0].start_time < cutoff_time): self.detected_patterns[symbol].popleft() logger.info(f"Cleared data older than {hours_to_keep} hours") except Exception as e: logger.error(f"Error clearing old data: {e}")