560 lines
23 KiB
Python
560 lines
23 KiB
Python
"""
|
|
Real-Time Tick Aggregator for Raw Tick Processing
|
|
|
|
This module processes raw tick data and calculates 1s OHLCV bars in real-time.
|
|
It preserves raw tick data for pattern detection while providing aggregated data
|
|
for traditional analysis.
|
|
|
|
Features:
|
|
- Raw tick data preservation with time difference analysis
|
|
- Real-time 1s OHLCV calculation from ticks
|
|
- Tick pattern detection for violent moves
|
|
- Volume-weighted price calculations
|
|
- Microstructure analysis
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
import numpy as np
|
|
import pandas as pd
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional, Tuple, Any, Deque
|
|
from collections import deque
|
|
from threading import Lock
|
|
from dataclasses import dataclass, field
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@dataclass
|
|
class RawTick:
|
|
"""Raw tick data with timing information"""
|
|
timestamp: datetime
|
|
price: float
|
|
volume: float
|
|
quantity: float
|
|
side: str # 'buy' or 'sell'
|
|
trade_id: str
|
|
time_since_last: float # Milliseconds since last tick
|
|
price_change: float # Price change from last tick
|
|
volume_intensity: float # Volume relative to recent average
|
|
|
|
@dataclass
|
|
class TickPattern:
|
|
"""Detected tick pattern for violent moves"""
|
|
start_time: datetime
|
|
end_time: datetime
|
|
pattern_type: str # 'rapid_fire', 'volume_spike', 'price_acceleration'
|
|
tick_count: int
|
|
avg_time_between_ticks: float
|
|
price_change: float
|
|
volume_total: float
|
|
confidence: float
|
|
|
|
@dataclass
|
|
class OHLCVBar:
|
|
"""1-second OHLCV bar calculated from ticks"""
|
|
timestamp: datetime
|
|
open: float
|
|
high: float
|
|
low: float
|
|
close: float
|
|
volume: float
|
|
tick_count: int
|
|
buy_volume: float
|
|
sell_volume: float
|
|
vwap: float # Volume-weighted average price
|
|
patterns: List[TickPattern] = field(default_factory=list)
|
|
|
|
class RealTimeTickAggregator:
|
|
"""
|
|
Real-time tick aggregator that processes raw ticks and calculates 1s OHLCV bars
|
|
while preserving tick-level data for pattern detection
|
|
"""
|
|
|
|
def __init__(self, symbols: List[str] = None, tick_buffer_size: int = 10000):
|
|
"""Initialize the tick aggregator"""
|
|
self.symbols = symbols or ['ETHUSDT', 'BTCUSDT']
|
|
self.tick_buffer_size = tick_buffer_size
|
|
|
|
# Raw tick storage (preserves all ticks with timing data)
|
|
self.raw_tick_buffers: Dict[str, Deque[RawTick]] = {}
|
|
|
|
# 1s OHLCV bars calculated from ticks
|
|
self.ohlcv_bars: Dict[str, Deque[OHLCVBar]] = {}
|
|
|
|
# Current incomplete bars being built
|
|
self.current_bars: Dict[str, Dict] = {}
|
|
|
|
# Pattern detection storage
|
|
self.detected_patterns: Dict[str, Deque[TickPattern]] = {}
|
|
|
|
# Initialize buffers for each symbol
|
|
for symbol in self.symbols:
|
|
self.raw_tick_buffers[symbol] = deque(maxlen=tick_buffer_size)
|
|
self.ohlcv_bars[symbol] = deque(maxlen=3600) # 1 hour of 1s bars
|
|
self.detected_patterns[symbol] = deque(maxlen=1000)
|
|
self.current_bars[symbol] = None
|
|
|
|
# Pattern detection parameters
|
|
self.rapid_fire_threshold = 50 # ms between ticks for rapid fire
|
|
self.volume_spike_multiplier = 3.0 # Volume spike threshold
|
|
self.price_acceleration_threshold = 0.001 # 0.1% price change threshold
|
|
|
|
# Statistics tracking
|
|
self.stats = {
|
|
'total_ticks_processed': 0,
|
|
'patterns_detected': 0,
|
|
'bars_created': 0,
|
|
'processing_times': deque(maxlen=1000)
|
|
}
|
|
|
|
# Thread safety
|
|
self.data_lock = Lock()
|
|
|
|
# Last tick tracking for time differences
|
|
self.last_tick_times: Dict[str, datetime] = {}
|
|
self.last_tick_prices: Dict[str, float] = {}
|
|
|
|
# Volume tracking for intensity calculation
|
|
self.volume_history: Dict[str, Deque[float]] = {}
|
|
for symbol in self.symbols:
|
|
self.volume_history[symbol] = deque(maxlen=100) # Last 100 ticks for volume average
|
|
|
|
logger.info(f"RealTimeTickAggregator initialized for symbols: {self.symbols}")
|
|
logger.info(f"Tick buffer size: {tick_buffer_size}")
|
|
logger.info("Pattern detection enabled for violent moves")
|
|
|
|
def process_tick(self, symbol: str, timestamp: datetime, price: float,
|
|
volume: float, quantity: float, side: str, trade_id: str) -> Tuple[RawTick, Optional[OHLCVBar]]:
|
|
"""
|
|
Process a raw tick and return the tick data plus any completed 1s bar
|
|
|
|
Returns:
|
|
Tuple[RawTick, Optional[OHLCVBar]]: The processed tick and completed bar (if any)
|
|
"""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
with self.data_lock:
|
|
# Calculate timing information
|
|
time_since_last = 0.0
|
|
price_change = 0.0
|
|
|
|
if symbol in self.last_tick_times:
|
|
time_diff = (timestamp - self.last_tick_times[symbol]).total_seconds() * 1000
|
|
time_since_last = max(0, time_diff) # Ensure non-negative
|
|
|
|
if symbol in self.last_tick_prices:
|
|
price_change = price - self.last_tick_prices[symbol]
|
|
|
|
# Calculate volume intensity
|
|
volume_intensity = self._calculate_volume_intensity(symbol, volume)
|
|
|
|
# Create raw tick
|
|
raw_tick = RawTick(
|
|
timestamp=timestamp,
|
|
price=price,
|
|
volume=volume,
|
|
quantity=quantity,
|
|
side=side,
|
|
trade_id=trade_id,
|
|
time_since_last=time_since_last,
|
|
price_change=price_change,
|
|
volume_intensity=volume_intensity
|
|
)
|
|
|
|
# Add to raw tick buffer
|
|
self.raw_tick_buffers[symbol].append(raw_tick)
|
|
|
|
# Update tracking
|
|
self.last_tick_times[symbol] = timestamp
|
|
self.last_tick_prices[symbol] = price
|
|
self.volume_history[symbol].append(volume)
|
|
|
|
# Process for 1s OHLCV bar
|
|
completed_bar = self._update_ohlcv_bar(symbol, raw_tick)
|
|
|
|
# Detect patterns
|
|
self._detect_tick_patterns(symbol, raw_tick)
|
|
|
|
# Update statistics
|
|
self.stats['total_ticks_processed'] += 1
|
|
processing_time = (time.time() - start_time) * 1000
|
|
self.stats['processing_times'].append(processing_time)
|
|
|
|
return raw_tick, completed_bar
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing tick for {symbol}: {e}")
|
|
return None, None
|
|
|
|
def _calculate_volume_intensity(self, symbol: str, volume: float) -> float:
|
|
"""Calculate volume intensity relative to recent average"""
|
|
try:
|
|
if symbol not in self.volume_history or len(self.volume_history[symbol]) < 10:
|
|
return 1.0 # Default intensity
|
|
|
|
recent_volumes = list(self.volume_history[symbol])
|
|
avg_volume = np.mean(recent_volumes)
|
|
|
|
if avg_volume > 0:
|
|
return volume / avg_volume
|
|
return 1.0
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error calculating volume intensity: {e}")
|
|
return 1.0
|
|
|
|
def _update_ohlcv_bar(self, symbol: str, tick: RawTick) -> Optional[OHLCVBar]:
|
|
"""Update or create 1s OHLCV bar from tick data"""
|
|
try:
|
|
# Get the second timestamp (truncate to second)
|
|
bar_timestamp = tick.timestamp.replace(microsecond=0)
|
|
|
|
# Check if we need a new bar
|
|
current_bar = self.current_bars[symbol]
|
|
|
|
if current_bar is None or current_bar['timestamp'] != bar_timestamp:
|
|
# Complete the previous bar if it exists
|
|
completed_bar = None
|
|
if current_bar is not None:
|
|
completed_bar = self._finalize_bar(symbol, current_bar)
|
|
|
|
# Start new bar
|
|
self.current_bars[symbol] = {
|
|
'timestamp': bar_timestamp,
|
|
'open': tick.price,
|
|
'high': tick.price,
|
|
'low': tick.price,
|
|
'close': tick.price,
|
|
'volume': tick.volume,
|
|
'tick_count': 1,
|
|
'buy_volume': tick.volume if tick.side == 'buy' else 0,
|
|
'sell_volume': tick.volume if tick.side == 'sell' else 0,
|
|
'volume_price_sum': tick.volume * tick.price,
|
|
'ticks': [tick]
|
|
}
|
|
|
|
return completed_bar
|
|
else:
|
|
# Update existing bar
|
|
current_bar['high'] = max(current_bar['high'], tick.price)
|
|
current_bar['low'] = min(current_bar['low'], tick.price)
|
|
current_bar['close'] = tick.price
|
|
current_bar['volume'] += tick.volume
|
|
current_bar['tick_count'] += 1
|
|
current_bar['volume_price_sum'] += tick.volume * tick.price
|
|
current_bar['ticks'].append(tick)
|
|
|
|
if tick.side == 'buy':
|
|
current_bar['buy_volume'] += tick.volume
|
|
else:
|
|
current_bar['sell_volume'] += tick.volume
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating OHLCV bar for {symbol}: {e}")
|
|
return None
|
|
|
|
def _finalize_bar(self, symbol: str, bar_data: Dict) -> OHLCVBar:
|
|
"""Finalize a 1s OHLCV bar and detect patterns"""
|
|
try:
|
|
# Calculate VWAP
|
|
vwap = bar_data['volume_price_sum'] / bar_data['volume'] if bar_data['volume'] > 0 else bar_data['close']
|
|
|
|
# Detect patterns in this bar
|
|
bar_patterns = self._detect_bar_patterns(symbol, bar_data['ticks'])
|
|
|
|
# Create OHLCV bar
|
|
ohlcv_bar = OHLCVBar(
|
|
timestamp=bar_data['timestamp'],
|
|
open=bar_data['open'],
|
|
high=bar_data['high'],
|
|
low=bar_data['low'],
|
|
close=bar_data['close'],
|
|
volume=bar_data['volume'],
|
|
tick_count=bar_data['tick_count'],
|
|
buy_volume=bar_data['buy_volume'],
|
|
sell_volume=bar_data['sell_volume'],
|
|
vwap=vwap,
|
|
patterns=bar_patterns
|
|
)
|
|
|
|
# Add to buffer
|
|
self.ohlcv_bars[symbol].append(ohlcv_bar)
|
|
self.stats['bars_created'] += 1
|
|
|
|
return ohlcv_bar
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finalizing bar for {symbol}: {e}")
|
|
return None
|
|
|
|
def _detect_tick_patterns(self, symbol: str, tick: RawTick):
|
|
"""Detect patterns in tick data for violent moves"""
|
|
try:
|
|
# Rapid fire detection (very fast ticks)
|
|
if tick.time_since_last > 0 and tick.time_since_last < self.rapid_fire_threshold:
|
|
self._check_rapid_fire_pattern(symbol, tick)
|
|
|
|
# Volume spike detection
|
|
if tick.volume_intensity > self.volume_spike_multiplier:
|
|
self._check_volume_spike_pattern(symbol, tick)
|
|
|
|
# Price acceleration detection
|
|
if abs(tick.price_change) > 0 and symbol in self.last_tick_prices:
|
|
price_change_pct = abs(tick.price_change) / self.last_tick_prices[symbol]
|
|
if price_change_pct > self.price_acceleration_threshold:
|
|
self._check_price_acceleration_pattern(symbol, tick)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error detecting tick patterns for {symbol}: {e}")
|
|
|
|
def _check_rapid_fire_pattern(self, symbol: str, tick: RawTick):
|
|
"""Check for rapid fire tick pattern"""
|
|
try:
|
|
# Look at last 10 ticks to see if we have a rapid fire sequence
|
|
recent_ticks = list(self.raw_tick_buffers[symbol])[-10:]
|
|
|
|
if len(recent_ticks) >= 5:
|
|
rapid_ticks = [t for t in recent_ticks if t.time_since_last < self.rapid_fire_threshold]
|
|
|
|
if len(rapid_ticks) >= 5:
|
|
# We have a rapid fire pattern
|
|
pattern = TickPattern(
|
|
start_time=rapid_ticks[0].timestamp,
|
|
end_time=tick.timestamp,
|
|
pattern_type='rapid_fire',
|
|
tick_count=len(rapid_ticks),
|
|
avg_time_between_ticks=np.mean([t.time_since_last for t in rapid_ticks]),
|
|
price_change=tick.price - rapid_ticks[0].price,
|
|
volume_total=sum(t.volume for t in rapid_ticks),
|
|
confidence=min(1.0, len(rapid_ticks) / 10.0)
|
|
)
|
|
|
|
self.detected_patterns[symbol].append(pattern)
|
|
self.stats['patterns_detected'] += 1
|
|
|
|
logger.debug(f"RAPID FIRE pattern detected for {symbol}: {len(rapid_ticks)} ticks in {(tick.timestamp - rapid_ticks[0].timestamp).total_seconds():.3f}s")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking rapid fire pattern: {e}")
|
|
|
|
def _check_volume_spike_pattern(self, symbol: str, tick: RawTick):
|
|
"""Check for volume spike pattern"""
|
|
try:
|
|
pattern = TickPattern(
|
|
start_time=tick.timestamp,
|
|
end_time=tick.timestamp,
|
|
pattern_type='volume_spike',
|
|
tick_count=1,
|
|
avg_time_between_ticks=tick.time_since_last,
|
|
price_change=tick.price_change,
|
|
volume_total=tick.volume,
|
|
confidence=min(1.0, tick.volume_intensity / self.volume_spike_multiplier)
|
|
)
|
|
|
|
self.detected_patterns[symbol].append(pattern)
|
|
self.stats['patterns_detected'] += 1
|
|
|
|
logger.debug(f"VOLUME SPIKE pattern detected for {symbol}: {tick.volume_intensity:.2f}x normal volume")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking volume spike pattern: {e}")
|
|
|
|
def _check_price_acceleration_pattern(self, symbol: str, tick: RawTick):
|
|
"""Check for price acceleration pattern"""
|
|
try:
|
|
price_change_pct = abs(tick.price_change) / self.last_tick_prices[symbol]
|
|
|
|
pattern = TickPattern(
|
|
start_time=tick.timestamp,
|
|
end_time=tick.timestamp,
|
|
pattern_type='price_acceleration',
|
|
tick_count=1,
|
|
avg_time_between_ticks=tick.time_since_last,
|
|
price_change=tick.price_change,
|
|
volume_total=tick.volume,
|
|
confidence=min(1.0, price_change_pct / self.price_acceleration_threshold)
|
|
)
|
|
|
|
self.detected_patterns[symbol].append(pattern)
|
|
self.stats['patterns_detected'] += 1
|
|
|
|
logger.debug(f"PRICE ACCELERATION pattern detected for {symbol}: {price_change_pct*100:.3f}% change")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking price acceleration pattern: {e}")
|
|
|
|
def _detect_bar_patterns(self, symbol: str, ticks: List[RawTick]) -> List[TickPattern]:
|
|
"""Detect patterns within a completed 1s bar"""
|
|
patterns = []
|
|
|
|
try:
|
|
if len(ticks) < 2:
|
|
return patterns
|
|
|
|
# Check for high-frequency trading within the bar
|
|
if len(ticks) > 20: # More than 20 ticks in 1 second
|
|
avg_time = np.mean([t.time_since_last for t in ticks[1:]])
|
|
pattern = TickPattern(
|
|
start_time=ticks[0].timestamp,
|
|
end_time=ticks[-1].timestamp,
|
|
pattern_type='high_frequency_bar',
|
|
tick_count=len(ticks),
|
|
avg_time_between_ticks=avg_time,
|
|
price_change=ticks[-1].price - ticks[0].price,
|
|
volume_total=sum(t.volume for t in ticks),
|
|
confidence=min(1.0, len(ticks) / 50.0)
|
|
)
|
|
patterns.append(pattern)
|
|
|
|
# Check for volume concentration
|
|
total_volume = sum(t.volume for t in ticks)
|
|
max_tick_volume = max(t.volume for t in ticks)
|
|
if max_tick_volume > total_volume * 0.5: # Single tick has >50% of bar volume
|
|
pattern = TickPattern(
|
|
start_time=ticks[0].timestamp,
|
|
end_time=ticks[-1].timestamp,
|
|
pattern_type='volume_concentration',
|
|
tick_count=len(ticks),
|
|
avg_time_between_ticks=np.mean([t.time_since_last for t in ticks[1:]]),
|
|
price_change=ticks[-1].price - ticks[0].price,
|
|
volume_total=total_volume,
|
|
confidence=max_tick_volume / total_volume
|
|
)
|
|
patterns.append(pattern)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error detecting bar patterns: {e}")
|
|
|
|
return patterns
|
|
|
|
def get_raw_ticks(self, symbol: str, count: int = 100) -> List[RawTick]:
|
|
"""Get recent raw ticks for a symbol"""
|
|
with self.data_lock:
|
|
if symbol in self.raw_tick_buffers:
|
|
return list(self.raw_tick_buffers[symbol])[-count:]
|
|
return []
|
|
|
|
def get_ohlcv_bars(self, symbol: str, count: int = 100) -> List[OHLCVBar]:
|
|
"""Get recent 1s OHLCV bars for a symbol"""
|
|
with self.data_lock:
|
|
if symbol in self.ohlcv_bars:
|
|
return list(self.ohlcv_bars[symbol])[-count:]
|
|
return []
|
|
|
|
def get_detected_patterns(self, symbol: str, count: int = 50) -> List[TickPattern]:
|
|
"""Get recently detected patterns for a symbol"""
|
|
with self.data_lock:
|
|
if symbol in self.detected_patterns:
|
|
return list(self.detected_patterns[symbol])[-count:]
|
|
return []
|
|
|
|
def get_tick_features_for_model(self, symbol: str, window_size: int = 50) -> Optional[np.ndarray]:
|
|
"""
|
|
Get tick features formatted for model consumption
|
|
|
|
Returns:
|
|
np.ndarray: Shape (window_size, features) where features include:
|
|
[price, volume, time_since_last, price_change, volume_intensity, side_indicator]
|
|
"""
|
|
try:
|
|
with self.data_lock:
|
|
recent_ticks = self.get_raw_ticks(symbol, window_size)
|
|
|
|
if len(recent_ticks) < window_size:
|
|
return None
|
|
|
|
features = []
|
|
for tick in recent_ticks:
|
|
tick_features = [
|
|
tick.price,
|
|
tick.volume,
|
|
tick.time_since_last,
|
|
tick.price_change,
|
|
tick.volume_intensity,
|
|
1.0 if tick.side == 'buy' else 0.0 # Buy/sell indicator
|
|
]
|
|
features.append(tick_features)
|
|
|
|
return np.array(features, dtype=np.float32)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting tick features for model: {e}")
|
|
return None
|
|
|
|
def get_ohlcv_features_for_model(self, symbol: str, window_size: int = 60) -> Optional[np.ndarray]:
|
|
"""
|
|
Get 1s OHLCV features formatted for model consumption
|
|
|
|
Returns:
|
|
np.ndarray: Shape (window_size, 5) - standard OHLCV format
|
|
"""
|
|
try:
|
|
with self.data_lock:
|
|
recent_bars = self.get_ohlcv_bars(symbol, window_size)
|
|
|
|
if len(recent_bars) < window_size:
|
|
return None
|
|
|
|
features = []
|
|
for bar in recent_bars:
|
|
ohlcv_features = [
|
|
bar.open,
|
|
bar.high,
|
|
bar.low,
|
|
bar.close,
|
|
bar.volume
|
|
]
|
|
features.append(ohlcv_features)
|
|
|
|
return np.array(features, dtype=np.float32)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting OHLCV features for model: {e}")
|
|
return None
|
|
|
|
def get_statistics(self) -> Dict[str, Any]:
|
|
"""Get aggregator statistics"""
|
|
with self.data_lock:
|
|
avg_processing_time = np.mean(list(self.stats['processing_times'])) if self.stats['processing_times'] else 0
|
|
|
|
return {
|
|
'total_ticks_processed': self.stats['total_ticks_processed'],
|
|
'patterns_detected': self.stats['patterns_detected'],
|
|
'bars_created': self.stats['bars_created'],
|
|
'avg_processing_time_ms': avg_processing_time,
|
|
'symbols': self.symbols,
|
|
'buffer_sizes': {symbol: len(self.raw_tick_buffers[symbol]) for symbol in self.symbols},
|
|
'bar_counts': {symbol: len(self.ohlcv_bars[symbol]) for symbol in self.symbols},
|
|
'pattern_counts': {symbol: len(self.detected_patterns[symbol]) for symbol in self.symbols}
|
|
}
|
|
|
|
def clear_old_data(self, hours_to_keep: int = 1):
|
|
"""Clear old data to manage memory usage"""
|
|
try:
|
|
cutoff_time = datetime.now() - timedelta(hours=hours_to_keep)
|
|
|
|
with self.data_lock:
|
|
for symbol in self.symbols:
|
|
# Clear old ticks
|
|
while (self.raw_tick_buffers[symbol] and
|
|
self.raw_tick_buffers[symbol][0].timestamp < cutoff_time):
|
|
self.raw_tick_buffers[symbol].popleft()
|
|
|
|
# Clear old bars
|
|
while (self.ohlcv_bars[symbol] and
|
|
self.ohlcv_bars[symbol][0].timestamp < cutoff_time):
|
|
self.ohlcv_bars[symbol].popleft()
|
|
|
|
# Clear old patterns
|
|
while (self.detected_patterns[symbol] and
|
|
self.detected_patterns[symbol][0].start_time < cutoff_time):
|
|
self.detected_patterns[symbol].popleft()
|
|
|
|
logger.info(f"Cleared data older than {hours_to_keep} hours")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error clearing old data: {e}") |