gogo2/core/tick_aggregator.py
2025-05-27 01:03:40 +03:00

560 lines
23 KiB
Python

"""
Real-Time Tick Aggregator for Raw Tick Processing
This module processes raw tick data and calculates 1s OHLCV bars in real-time.
It preserves raw tick data for pattern detection while providing aggregated data
for traditional analysis.
Features:
- Raw tick data preservation with time difference analysis
- Real-time 1s OHLCV calculation from ticks
- Tick pattern detection for violent moves
- Volume-weighted price calculations
- Microstructure analysis
"""
import logging
import time
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any, Deque
from collections import deque
from threading import Lock
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
@dataclass
class RawTick:
"""Raw tick data with timing information"""
timestamp: datetime
price: float
volume: float
quantity: float
side: str # 'buy' or 'sell'
trade_id: str
time_since_last: float # Milliseconds since last tick
price_change: float # Price change from last tick
volume_intensity: float # Volume relative to recent average
@dataclass
class TickPattern:
"""Detected tick pattern for violent moves"""
start_time: datetime
end_time: datetime
pattern_type: str # 'rapid_fire', 'volume_spike', 'price_acceleration'
tick_count: int
avg_time_between_ticks: float
price_change: float
volume_total: float
confidence: float
@dataclass
class OHLCVBar:
"""1-second OHLCV bar calculated from ticks"""
timestamp: datetime
open: float
high: float
low: float
close: float
volume: float
tick_count: int
buy_volume: float
sell_volume: float
vwap: float # Volume-weighted average price
patterns: List[TickPattern] = field(default_factory=list)
class RealTimeTickAggregator:
"""
Real-time tick aggregator that processes raw ticks and calculates 1s OHLCV bars
while preserving tick-level data for pattern detection
"""
def __init__(self, symbols: List[str] = None, tick_buffer_size: int = 10000):
"""Initialize the tick aggregator"""
self.symbols = symbols or ['ETHUSDT', 'BTCUSDT']
self.tick_buffer_size = tick_buffer_size
# Raw tick storage (preserves all ticks with timing data)
self.raw_tick_buffers: Dict[str, Deque[RawTick]] = {}
# 1s OHLCV bars calculated from ticks
self.ohlcv_bars: Dict[str, Deque[OHLCVBar]] = {}
# Current incomplete bars being built
self.current_bars: Dict[str, Dict] = {}
# Pattern detection storage
self.detected_patterns: Dict[str, Deque[TickPattern]] = {}
# Initialize buffers for each symbol
for symbol in self.symbols:
self.raw_tick_buffers[symbol] = deque(maxlen=tick_buffer_size)
self.ohlcv_bars[symbol] = deque(maxlen=3600) # 1 hour of 1s bars
self.detected_patterns[symbol] = deque(maxlen=1000)
self.current_bars[symbol] = None
# Pattern detection parameters
self.rapid_fire_threshold = 50 # ms between ticks for rapid fire
self.volume_spike_multiplier = 3.0 # Volume spike threshold
self.price_acceleration_threshold = 0.001 # 0.1% price change threshold
# Statistics tracking
self.stats = {
'total_ticks_processed': 0,
'patterns_detected': 0,
'bars_created': 0,
'processing_times': deque(maxlen=1000)
}
# Thread safety
self.data_lock = Lock()
# Last tick tracking for time differences
self.last_tick_times: Dict[str, datetime] = {}
self.last_tick_prices: Dict[str, float] = {}
# Volume tracking for intensity calculation
self.volume_history: Dict[str, Deque[float]] = {}
for symbol in self.symbols:
self.volume_history[symbol] = deque(maxlen=100) # Last 100 ticks for volume average
logger.info(f"RealTimeTickAggregator initialized for symbols: {self.symbols}")
logger.info(f"Tick buffer size: {tick_buffer_size}")
logger.info("Pattern detection enabled for violent moves")
def process_tick(self, symbol: str, timestamp: datetime, price: float,
volume: float, quantity: float, side: str, trade_id: str) -> Tuple[RawTick, Optional[OHLCVBar]]:
"""
Process a raw tick and return the tick data plus any completed 1s bar
Returns:
Tuple[RawTick, Optional[OHLCVBar]]: The processed tick and completed bar (if any)
"""
start_time = time.time()
try:
with self.data_lock:
# Calculate timing information
time_since_last = 0.0
price_change = 0.0
if symbol in self.last_tick_times:
time_diff = (timestamp - self.last_tick_times[symbol]).total_seconds() * 1000
time_since_last = max(0, time_diff) # Ensure non-negative
if symbol in self.last_tick_prices:
price_change = price - self.last_tick_prices[symbol]
# Calculate volume intensity
volume_intensity = self._calculate_volume_intensity(symbol, volume)
# Create raw tick
raw_tick = RawTick(
timestamp=timestamp,
price=price,
volume=volume,
quantity=quantity,
side=side,
trade_id=trade_id,
time_since_last=time_since_last,
price_change=price_change,
volume_intensity=volume_intensity
)
# Add to raw tick buffer
self.raw_tick_buffers[symbol].append(raw_tick)
# Update tracking
self.last_tick_times[symbol] = timestamp
self.last_tick_prices[symbol] = price
self.volume_history[symbol].append(volume)
# Process for 1s OHLCV bar
completed_bar = self._update_ohlcv_bar(symbol, raw_tick)
# Detect patterns
self._detect_tick_patterns(symbol, raw_tick)
# Update statistics
self.stats['total_ticks_processed'] += 1
processing_time = (time.time() - start_time) * 1000
self.stats['processing_times'].append(processing_time)
return raw_tick, completed_bar
except Exception as e:
logger.error(f"Error processing tick for {symbol}: {e}")
return None, None
def _calculate_volume_intensity(self, symbol: str, volume: float) -> float:
"""Calculate volume intensity relative to recent average"""
try:
if symbol not in self.volume_history or len(self.volume_history[symbol]) < 10:
return 1.0 # Default intensity
recent_volumes = list(self.volume_history[symbol])
avg_volume = np.mean(recent_volumes)
if avg_volume > 0:
return volume / avg_volume
return 1.0
except Exception as e:
logger.error(f"Error calculating volume intensity: {e}")
return 1.0
def _update_ohlcv_bar(self, symbol: str, tick: RawTick) -> Optional[OHLCVBar]:
"""Update or create 1s OHLCV bar from tick data"""
try:
# Get the second timestamp (truncate to second)
bar_timestamp = tick.timestamp.replace(microsecond=0)
# Check if we need a new bar
current_bar = self.current_bars[symbol]
if current_bar is None or current_bar['timestamp'] != bar_timestamp:
# Complete the previous bar if it exists
completed_bar = None
if current_bar is not None:
completed_bar = self._finalize_bar(symbol, current_bar)
# Start new bar
self.current_bars[symbol] = {
'timestamp': bar_timestamp,
'open': tick.price,
'high': tick.price,
'low': tick.price,
'close': tick.price,
'volume': tick.volume,
'tick_count': 1,
'buy_volume': tick.volume if tick.side == 'buy' else 0,
'sell_volume': tick.volume if tick.side == 'sell' else 0,
'volume_price_sum': tick.volume * tick.price,
'ticks': [tick]
}
return completed_bar
else:
# Update existing bar
current_bar['high'] = max(current_bar['high'], tick.price)
current_bar['low'] = min(current_bar['low'], tick.price)
current_bar['close'] = tick.price
current_bar['volume'] += tick.volume
current_bar['tick_count'] += 1
current_bar['volume_price_sum'] += tick.volume * tick.price
current_bar['ticks'].append(tick)
if tick.side == 'buy':
current_bar['buy_volume'] += tick.volume
else:
current_bar['sell_volume'] += tick.volume
return None
except Exception as e:
logger.error(f"Error updating OHLCV bar for {symbol}: {e}")
return None
def _finalize_bar(self, symbol: str, bar_data: Dict) -> OHLCVBar:
"""Finalize a 1s OHLCV bar and detect patterns"""
try:
# Calculate VWAP
vwap = bar_data['volume_price_sum'] / bar_data['volume'] if bar_data['volume'] > 0 else bar_data['close']
# Detect patterns in this bar
bar_patterns = self._detect_bar_patterns(symbol, bar_data['ticks'])
# Create OHLCV bar
ohlcv_bar = OHLCVBar(
timestamp=bar_data['timestamp'],
open=bar_data['open'],
high=bar_data['high'],
low=bar_data['low'],
close=bar_data['close'],
volume=bar_data['volume'],
tick_count=bar_data['tick_count'],
buy_volume=bar_data['buy_volume'],
sell_volume=bar_data['sell_volume'],
vwap=vwap,
patterns=bar_patterns
)
# Add to buffer
self.ohlcv_bars[symbol].append(ohlcv_bar)
self.stats['bars_created'] += 1
return ohlcv_bar
except Exception as e:
logger.error(f"Error finalizing bar for {symbol}: {e}")
return None
def _detect_tick_patterns(self, symbol: str, tick: RawTick):
"""Detect patterns in tick data for violent moves"""
try:
# Rapid fire detection (very fast ticks)
if tick.time_since_last > 0 and tick.time_since_last < self.rapid_fire_threshold:
self._check_rapid_fire_pattern(symbol, tick)
# Volume spike detection
if tick.volume_intensity > self.volume_spike_multiplier:
self._check_volume_spike_pattern(symbol, tick)
# Price acceleration detection
if abs(tick.price_change) > 0 and symbol in self.last_tick_prices:
price_change_pct = abs(tick.price_change) / self.last_tick_prices[symbol]
if price_change_pct > self.price_acceleration_threshold:
self._check_price_acceleration_pattern(symbol, tick)
except Exception as e:
logger.error(f"Error detecting tick patterns for {symbol}: {e}")
def _check_rapid_fire_pattern(self, symbol: str, tick: RawTick):
"""Check for rapid fire tick pattern"""
try:
# Look at last 10 ticks to see if we have a rapid fire sequence
recent_ticks = list(self.raw_tick_buffers[symbol])[-10:]
if len(recent_ticks) >= 5:
rapid_ticks = [t for t in recent_ticks if t.time_since_last < self.rapid_fire_threshold]
if len(rapid_ticks) >= 5:
# We have a rapid fire pattern
pattern = TickPattern(
start_time=rapid_ticks[0].timestamp,
end_time=tick.timestamp,
pattern_type='rapid_fire',
tick_count=len(rapid_ticks),
avg_time_between_ticks=np.mean([t.time_since_last for t in rapid_ticks]),
price_change=tick.price - rapid_ticks[0].price,
volume_total=sum(t.volume for t in rapid_ticks),
confidence=min(1.0, len(rapid_ticks) / 10.0)
)
self.detected_patterns[symbol].append(pattern)
self.stats['patterns_detected'] += 1
logger.debug(f"RAPID FIRE pattern detected for {symbol}: {len(rapid_ticks)} ticks in {(tick.timestamp - rapid_ticks[0].timestamp).total_seconds():.3f}s")
except Exception as e:
logger.error(f"Error checking rapid fire pattern: {e}")
def _check_volume_spike_pattern(self, symbol: str, tick: RawTick):
"""Check for volume spike pattern"""
try:
pattern = TickPattern(
start_time=tick.timestamp,
end_time=tick.timestamp,
pattern_type='volume_spike',
tick_count=1,
avg_time_between_ticks=tick.time_since_last,
price_change=tick.price_change,
volume_total=tick.volume,
confidence=min(1.0, tick.volume_intensity / self.volume_spike_multiplier)
)
self.detected_patterns[symbol].append(pattern)
self.stats['patterns_detected'] += 1
logger.debug(f"VOLUME SPIKE pattern detected for {symbol}: {tick.volume_intensity:.2f}x normal volume")
except Exception as e:
logger.error(f"Error checking volume spike pattern: {e}")
def _check_price_acceleration_pattern(self, symbol: str, tick: RawTick):
"""Check for price acceleration pattern"""
try:
price_change_pct = abs(tick.price_change) / self.last_tick_prices[symbol]
pattern = TickPattern(
start_time=tick.timestamp,
end_time=tick.timestamp,
pattern_type='price_acceleration',
tick_count=1,
avg_time_between_ticks=tick.time_since_last,
price_change=tick.price_change,
volume_total=tick.volume,
confidence=min(1.0, price_change_pct / self.price_acceleration_threshold)
)
self.detected_patterns[symbol].append(pattern)
self.stats['patterns_detected'] += 1
logger.debug(f"PRICE ACCELERATION pattern detected for {symbol}: {price_change_pct*100:.3f}% change")
except Exception as e:
logger.error(f"Error checking price acceleration pattern: {e}")
def _detect_bar_patterns(self, symbol: str, ticks: List[RawTick]) -> List[TickPattern]:
"""Detect patterns within a completed 1s bar"""
patterns = []
try:
if len(ticks) < 2:
return patterns
# Check for high-frequency trading within the bar
if len(ticks) > 20: # More than 20 ticks in 1 second
avg_time = np.mean([t.time_since_last for t in ticks[1:]])
pattern = TickPattern(
start_time=ticks[0].timestamp,
end_time=ticks[-1].timestamp,
pattern_type='high_frequency_bar',
tick_count=len(ticks),
avg_time_between_ticks=avg_time,
price_change=ticks[-1].price - ticks[0].price,
volume_total=sum(t.volume for t in ticks),
confidence=min(1.0, len(ticks) / 50.0)
)
patterns.append(pattern)
# Check for volume concentration
total_volume = sum(t.volume for t in ticks)
max_tick_volume = max(t.volume for t in ticks)
if max_tick_volume > total_volume * 0.5: # Single tick has >50% of bar volume
pattern = TickPattern(
start_time=ticks[0].timestamp,
end_time=ticks[-1].timestamp,
pattern_type='volume_concentration',
tick_count=len(ticks),
avg_time_between_ticks=np.mean([t.time_since_last for t in ticks[1:]]),
price_change=ticks[-1].price - ticks[0].price,
volume_total=total_volume,
confidence=max_tick_volume / total_volume
)
patterns.append(pattern)
except Exception as e:
logger.error(f"Error detecting bar patterns: {e}")
return patterns
def get_raw_ticks(self, symbol: str, count: int = 100) -> List[RawTick]:
"""Get recent raw ticks for a symbol"""
with self.data_lock:
if symbol in self.raw_tick_buffers:
return list(self.raw_tick_buffers[symbol])[-count:]
return []
def get_ohlcv_bars(self, symbol: str, count: int = 100) -> List[OHLCVBar]:
"""Get recent 1s OHLCV bars for a symbol"""
with self.data_lock:
if symbol in self.ohlcv_bars:
return list(self.ohlcv_bars[symbol])[-count:]
return []
def get_detected_patterns(self, symbol: str, count: int = 50) -> List[TickPattern]:
"""Get recently detected patterns for a symbol"""
with self.data_lock:
if symbol in self.detected_patterns:
return list(self.detected_patterns[symbol])[-count:]
return []
def get_tick_features_for_model(self, symbol: str, window_size: int = 50) -> Optional[np.ndarray]:
"""
Get tick features formatted for model consumption
Returns:
np.ndarray: Shape (window_size, features) where features include:
[price, volume, time_since_last, price_change, volume_intensity, side_indicator]
"""
try:
with self.data_lock:
recent_ticks = self.get_raw_ticks(symbol, window_size)
if len(recent_ticks) < window_size:
return None
features = []
for tick in recent_ticks:
tick_features = [
tick.price,
tick.volume,
tick.time_since_last,
tick.price_change,
tick.volume_intensity,
1.0 if tick.side == 'buy' else 0.0 # Buy/sell indicator
]
features.append(tick_features)
return np.array(features, dtype=np.float32)
except Exception as e:
logger.error(f"Error getting tick features for model: {e}")
return None
def get_ohlcv_features_for_model(self, symbol: str, window_size: int = 60) -> Optional[np.ndarray]:
"""
Get 1s OHLCV features formatted for model consumption
Returns:
np.ndarray: Shape (window_size, 5) - standard OHLCV format
"""
try:
with self.data_lock:
recent_bars = self.get_ohlcv_bars(symbol, window_size)
if len(recent_bars) < window_size:
return None
features = []
for bar in recent_bars:
ohlcv_features = [
bar.open,
bar.high,
bar.low,
bar.close,
bar.volume
]
features.append(ohlcv_features)
return np.array(features, dtype=np.float32)
except Exception as e:
logger.error(f"Error getting OHLCV features for model: {e}")
return None
def get_statistics(self) -> Dict[str, Any]:
"""Get aggregator statistics"""
with self.data_lock:
avg_processing_time = np.mean(list(self.stats['processing_times'])) if self.stats['processing_times'] else 0
return {
'total_ticks_processed': self.stats['total_ticks_processed'],
'patterns_detected': self.stats['patterns_detected'],
'bars_created': self.stats['bars_created'],
'avg_processing_time_ms': avg_processing_time,
'symbols': self.symbols,
'buffer_sizes': {symbol: len(self.raw_tick_buffers[symbol]) for symbol in self.symbols},
'bar_counts': {symbol: len(self.ohlcv_bars[symbol]) for symbol in self.symbols},
'pattern_counts': {symbol: len(self.detected_patterns[symbol]) for symbol in self.symbols}
}
def clear_old_data(self, hours_to_keep: int = 1):
"""Clear old data to manage memory usage"""
try:
cutoff_time = datetime.now() - timedelta(hours=hours_to_keep)
with self.data_lock:
for symbol in self.symbols:
# Clear old ticks
while (self.raw_tick_buffers[symbol] and
self.raw_tick_buffers[symbol][0].timestamp < cutoff_time):
self.raw_tick_buffers[symbol].popleft()
# Clear old bars
while (self.ohlcv_bars[symbol] and
self.ohlcv_bars[symbol][0].timestamp < cutoff_time):
self.ohlcv_bars[symbol].popleft()
# Clear old patterns
while (self.detected_patterns[symbol] and
self.detected_patterns[symbol][0].start_time < cutoff_time):
self.detected_patterns[symbol].popleft()
logger.info(f"Cleared data older than {hours_to_keep} hours")
except Exception as e:
logger.error(f"Error clearing old data: {e}")