added 5 min bom data to CNN. respecting port
This commit is contained in:
@ -142,6 +142,16 @@ class DataProvider:
|
||||
binance_symbol = symbol.replace('/', '').upper()
|
||||
self.tick_buffers[binance_symbol] = deque(maxlen=self.buffer_size)
|
||||
|
||||
# BOM (Book of Market) data caching - 1s resolution for last 5 minutes
|
||||
self.bom_cache_duration = 300 # 5 minutes in seconds
|
||||
self.bom_feature_count = 120 # Number of BOM features per timestamp
|
||||
self.bom_data_cache: Dict[str, deque] = {} # {symbol: deque of (timestamp, bom_features)}
|
||||
|
||||
# Initialize BOM cache for each symbol
|
||||
for symbol in self.symbols:
|
||||
# Store 300 seconds worth of 1s BOM data
|
||||
self.bom_data_cache[symbol] = deque(maxlen=self.bom_cache_duration)
|
||||
|
||||
# Initialize tick aggregator for raw tick processing
|
||||
binance_symbols = [symbol.replace('/', '').upper() for symbol in self.symbols]
|
||||
self.tick_aggregator = RealTimeTickAggregator(symbols=binance_symbols)
|
||||
@ -2083,4 +2093,363 @@ class DataProvider:
|
||||
'distribution_stats': self.distribution_stats.copy(),
|
||||
'buffer_sizes': {symbol: len(buffer) for symbol, buffer in self.tick_buffers.items()},
|
||||
'tick_aggregator': aggregator_stats
|
||||
}
|
||||
}
|
||||
|
||||
def update_bom_cache(self, symbol: str, bom_features: List[float], cob_integration=None):
|
||||
"""
|
||||
Update BOM cache with latest features for a symbol
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol (e.g., 'ETH/USDT')
|
||||
bom_features: List of BOM features (should be 120 features)
|
||||
cob_integration: Optional COB integration instance for real BOM data
|
||||
"""
|
||||
try:
|
||||
current_time = datetime.now()
|
||||
|
||||
# Ensure we have exactly 120 features
|
||||
if len(bom_features) != self.bom_feature_count:
|
||||
if len(bom_features) > self.bom_feature_count:
|
||||
bom_features = bom_features[:self.bom_feature_count]
|
||||
else:
|
||||
bom_features.extend([0.0] * (self.bom_feature_count - len(bom_features)))
|
||||
|
||||
# Convert to numpy array for efficient storage
|
||||
bom_array = np.array(bom_features, dtype=np.float32)
|
||||
|
||||
# Add timestamp and features to cache
|
||||
with self.data_lock:
|
||||
self.bom_data_cache[symbol].append((current_time, bom_array))
|
||||
|
||||
logger.debug(f"Updated BOM cache for {symbol}: {len(self.bom_data_cache[symbol])} timestamps cached")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating BOM cache for {symbol}: {e}")
|
||||
|
||||
def get_bom_matrix_for_cnn(self, symbol: str, sequence_length: int = 50) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Get BOM matrix for CNN input from cached 1s data
|
||||
|
||||
Args:
|
||||
symbol: Trading symbol (e.g., 'ETH/USDT')
|
||||
sequence_length: Required sequence length (default 50)
|
||||
|
||||
Returns:
|
||||
np.ndarray: BOM matrix of shape (sequence_length, 120) or None if insufficient data
|
||||
"""
|
||||
try:
|
||||
with self.data_lock:
|
||||
if symbol not in self.bom_data_cache or len(self.bom_data_cache[symbol]) == 0:
|
||||
logger.warning(f"No BOM data cached for {symbol}")
|
||||
return None
|
||||
|
||||
# Get recent data
|
||||
cached_data = list(self.bom_data_cache[symbol])
|
||||
|
||||
if len(cached_data) < sequence_length:
|
||||
logger.warning(f"Insufficient BOM data for {symbol}: {len(cached_data)} < {sequence_length}")
|
||||
# Pad with zeros if we don't have enough data
|
||||
bom_matrix = np.zeros((sequence_length, self.bom_feature_count), dtype=np.float32)
|
||||
|
||||
# Fill available data at the end
|
||||
for i, (timestamp, features) in enumerate(cached_data):
|
||||
if i < sequence_length:
|
||||
bom_matrix[sequence_length - len(cached_data) + i] = features
|
||||
|
||||
return bom_matrix
|
||||
|
||||
# Take the most recent sequence_length samples
|
||||
recent_data = cached_data[-sequence_length:]
|
||||
|
||||
# Create matrix
|
||||
bom_matrix = np.zeros((sequence_length, self.bom_feature_count), dtype=np.float32)
|
||||
for i, (timestamp, features) in enumerate(recent_data):
|
||||
bom_matrix[i] = features
|
||||
|
||||
logger.debug(f"Retrieved BOM matrix for {symbol}: shape={bom_matrix.shape}")
|
||||
return bom_matrix
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting BOM matrix for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def generate_synthetic_bom_features(self, symbol: str) -> List[float]:
|
||||
"""
|
||||
Generate synthetic BOM features when real COB data is not available
|
||||
|
||||
This creates realistic-looking order book features based on current market data
|
||||
"""
|
||||
try:
|
||||
features = []
|
||||
|
||||
# Get current price for context
|
||||
current_price = self.get_current_price(symbol)
|
||||
if current_price is None:
|
||||
current_price = 3000.0 # Fallback price
|
||||
|
||||
# === 1. CONSOLIDATED ORDER BOOK DATA (40 features) ===
|
||||
# Top 10 bid levels (price offset + volume)
|
||||
for i in range(10):
|
||||
price_offset = -0.001 * (i + 1) * (1 + np.random.normal(0, 0.1)) # Negative for bids
|
||||
volume_normalized = np.random.exponential(0.5) * (1.0 - i * 0.1) # Decreasing with depth
|
||||
features.extend([price_offset, volume_normalized])
|
||||
|
||||
# Top 10 ask levels (price offset + volume)
|
||||
for i in range(10):
|
||||
price_offset = 0.001 * (i + 1) * (1 + np.random.normal(0, 0.1)) # Positive for asks
|
||||
volume_normalized = np.random.exponential(0.5) * (1.0 - i * 0.1) # Decreasing with depth
|
||||
features.extend([price_offset, volume_normalized])
|
||||
|
||||
# === 2. VOLUME PROFILE FEATURES (30 features) ===
|
||||
# Top 10 volume levels (buy%, sell%, total volume)
|
||||
for i in range(10):
|
||||
buy_percent = 0.3 + np.random.normal(0, 0.2) # Around 30-70% buy
|
||||
buy_percent = max(0.0, min(1.0, buy_percent))
|
||||
sell_percent = 1.0 - buy_percent
|
||||
total_volume = np.random.exponential(1.0) * (1.0 - i * 0.05)
|
||||
features.extend([buy_percent, sell_percent, total_volume])
|
||||
|
||||
# === 3. ORDER FLOW INTENSITY (25 features) ===
|
||||
# Aggressive order flow
|
||||
features.extend([
|
||||
0.5 + np.random.normal(0, 0.1), # Aggressive buy ratio
|
||||
0.5 + np.random.normal(0, 0.1), # Aggressive sell ratio
|
||||
0.4 + np.random.normal(0, 0.1), # Buy volume ratio
|
||||
0.4 + np.random.normal(0, 0.1), # Sell volume ratio
|
||||
np.random.exponential(100), # Avg aggressive buy size
|
||||
np.random.exponential(100), # Avg aggressive sell size
|
||||
])
|
||||
|
||||
# Block trade detection
|
||||
features.extend([
|
||||
0.1 + np.random.exponential(0.05), # Large trade ratio
|
||||
0.2 + np.random.exponential(0.1), # Large trade volume ratio
|
||||
np.random.exponential(1000), # Avg large trade size
|
||||
])
|
||||
|
||||
# Flow velocity metrics
|
||||
features.extend([
|
||||
1.0 + np.random.normal(0, 0.2), # Avg time delta
|
||||
0.1 + np.random.exponential(0.05), # Time velocity variance
|
||||
0.5 + np.random.normal(0, 0.1), # Trade clustering
|
||||
])
|
||||
|
||||
# Institutional activity indicators
|
||||
features.extend([
|
||||
0.05 + np.random.exponential(0.02), # Iceberg detection
|
||||
0.3 + np.random.normal(0, 0.1), # Hidden order ratio
|
||||
0.2 + np.random.normal(0, 0.05), # Smart money flow
|
||||
0.1 + np.random.exponential(0.03), # Algorithmic activity
|
||||
])
|
||||
|
||||
# Market maker behavior
|
||||
features.extend([
|
||||
0.6 + np.random.normal(0, 0.1), # MM provision ratio
|
||||
0.4 + np.random.normal(0, 0.1), # MM take ratio
|
||||
0.02 + np.random.normal(0, 0.005), # Spread tightening
|
||||
1.0 + np.random.normal(0, 0.2), # Quote update frequency
|
||||
0.8 + np.random.normal(0, 0.1), # Quote stability
|
||||
])
|
||||
|
||||
# === 4. MARKET MICROSTRUCTURE SIGNALS (25 features) ===
|
||||
# Order book pressure
|
||||
features.extend([
|
||||
0.5 + np.random.normal(0, 0.1), # Bid pressure
|
||||
0.5 + np.random.normal(0, 0.1), # Ask pressure
|
||||
0.0 + np.random.normal(0, 0.05), # Pressure imbalance
|
||||
1.0 + np.random.normal(0, 0.2), # Pressure intensity
|
||||
0.5 + np.random.normal(0, 0.1), # Depth stability
|
||||
])
|
||||
|
||||
# Price level concentration
|
||||
features.extend([
|
||||
0.3 + np.random.normal(0, 0.1), # Bid concentration
|
||||
0.3 + np.random.normal(0, 0.1), # Ask concentration
|
||||
0.8 + np.random.normal(0, 0.1), # Top level dominance
|
||||
0.2 + np.random.normal(0, 0.05), # Fragmentation index
|
||||
0.6 + np.random.normal(0, 0.1), # Liquidity clustering
|
||||
])
|
||||
|
||||
# Temporal dynamics
|
||||
features.extend([
|
||||
0.1 + np.random.normal(0, 0.02), # Volatility factor
|
||||
1.0 + np.random.normal(0, 0.1), # Momentum factor
|
||||
0.0 + np.random.normal(0, 0.05), # Mean reversion
|
||||
0.5 + np.random.normal(0, 0.1), # Trend alignment
|
||||
0.8 + np.random.normal(0, 0.1), # Pattern consistency
|
||||
])
|
||||
|
||||
# Exchange-specific patterns
|
||||
features.extend([
|
||||
0.4 + np.random.normal(0, 0.1), # Cross-exchange correlation
|
||||
0.3 + np.random.normal(0, 0.1), # Exchange arbitrage
|
||||
0.2 + np.random.normal(0, 0.05), # Latency patterns
|
||||
0.8 + np.random.normal(0, 0.1), # Sync quality
|
||||
0.6 + np.random.normal(0, 0.1), # Data freshness
|
||||
])
|
||||
|
||||
# Ensure exactly 120 features
|
||||
if len(features) > 120:
|
||||
features = features[:120]
|
||||
elif len(features) < 120:
|
||||
features.extend([0.0] * (120 - len(features)))
|
||||
|
||||
# Clamp all values to reasonable ranges
|
||||
features = [max(-5.0, min(5.0, f)) for f in features]
|
||||
|
||||
return features
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating synthetic BOM features for {symbol}: {e}")
|
||||
return [0.0] * 120
|
||||
|
||||
def start_bom_cache_updates(self, cob_integration=None):
|
||||
"""
|
||||
Start background updates of BOM cache every second
|
||||
|
||||
Args:
|
||||
cob_integration: Optional COB integration instance for real data
|
||||
"""
|
||||
try:
|
||||
def update_loop():
|
||||
while self.is_streaming:
|
||||
try:
|
||||
for symbol in self.symbols:
|
||||
if cob_integration:
|
||||
# Try to get real BOM features from COB integration
|
||||
try:
|
||||
bom_features = self._extract_real_bom_features(symbol, cob_integration)
|
||||
if bom_features:
|
||||
self.update_bom_cache(symbol, bom_features, cob_integration)
|
||||
else:
|
||||
# Fallback to synthetic
|
||||
synthetic_features = self.generate_synthetic_bom_features(symbol)
|
||||
self.update_bom_cache(symbol, synthetic_features)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting real BOM features for {symbol}: {e}")
|
||||
synthetic_features = self.generate_synthetic_bom_features(symbol)
|
||||
self.update_bom_cache(symbol, synthetic_features)
|
||||
else:
|
||||
# Generate synthetic BOM features
|
||||
synthetic_features = self.generate_synthetic_bom_features(symbol)
|
||||
self.update_bom_cache(symbol, synthetic_features)
|
||||
|
||||
time.sleep(1.0) # Update every second
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in BOM cache update loop: {e}")
|
||||
time.sleep(5.0) # Wait longer on error
|
||||
|
||||
# Start background thread
|
||||
bom_thread = Thread(target=update_loop, daemon=True)
|
||||
bom_thread.start()
|
||||
|
||||
logger.info("Started BOM cache updates (1s resolution)")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting BOM cache updates: {e}")
|
||||
|
||||
def _extract_real_bom_features(self, symbol: str, cob_integration) -> Optional[List[float]]:
|
||||
"""Extract real BOM features from COB integration"""
|
||||
try:
|
||||
features = []
|
||||
|
||||
# Get consolidated order book
|
||||
if hasattr(cob_integration, 'get_consolidated_orderbook'):
|
||||
cob_snapshot = cob_integration.get_consolidated_orderbook(symbol)
|
||||
if cob_snapshot:
|
||||
# Extract order book features (40 features)
|
||||
features.extend(self._extract_orderbook_features(cob_snapshot))
|
||||
else:
|
||||
features.extend([0.0] * 40)
|
||||
else:
|
||||
features.extend([0.0] * 40)
|
||||
|
||||
# Get volume profile features (30 features)
|
||||
if hasattr(cob_integration, 'get_session_volume_profile'):
|
||||
volume_profile = cob_integration.get_session_volume_profile(symbol)
|
||||
if volume_profile:
|
||||
features.extend(self._extract_volume_profile_features(volume_profile))
|
||||
else:
|
||||
features.extend([0.0] * 30)
|
||||
else:
|
||||
features.extend([0.0] * 30)
|
||||
|
||||
# Add flow and microstructure features (50 features)
|
||||
features.extend(self._extract_flow_microstructure_features(symbol, cob_integration))
|
||||
|
||||
# Ensure exactly 120 features
|
||||
if len(features) > 120:
|
||||
features = features[:120]
|
||||
elif len(features) < 120:
|
||||
features.extend([0.0] * (120 - len(features)))
|
||||
|
||||
return features
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting real BOM features for {symbol}: {e}")
|
||||
return None
|
||||
|
||||
def _extract_orderbook_features(self, cob_snapshot) -> List[float]:
|
||||
"""Extract order book features from COB snapshot"""
|
||||
features = []
|
||||
|
||||
try:
|
||||
# Top 10 bid levels
|
||||
for i in range(10):
|
||||
if i < len(cob_snapshot.consolidated_bids):
|
||||
level = cob_snapshot.consolidated_bids[i]
|
||||
price_offset = (level.price - cob_snapshot.volume_weighted_mid) / cob_snapshot.volume_weighted_mid
|
||||
volume_normalized = level.total_volume_usd / 1000000
|
||||
features.extend([price_offset, volume_normalized])
|
||||
else:
|
||||
features.extend([0.0, 0.0])
|
||||
|
||||
# Top 10 ask levels
|
||||
for i in range(10):
|
||||
if i < len(cob_snapshot.consolidated_asks):
|
||||
level = cob_snapshot.consolidated_asks[i]
|
||||
price_offset = (level.price - cob_snapshot.volume_weighted_mid) / cob_snapshot.volume_weighted_mid
|
||||
volume_normalized = level.total_volume_usd / 1000000
|
||||
features.extend([price_offset, volume_normalized])
|
||||
else:
|
||||
features.extend([0.0, 0.0])
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting order book features: {e}")
|
||||
features = [0.0] * 40
|
||||
|
||||
return features[:40]
|
||||
|
||||
def _extract_volume_profile_features(self, volume_profile) -> List[float]:
|
||||
"""Extract volume profile features"""
|
||||
features = []
|
||||
|
||||
try:
|
||||
if 'data' in volume_profile:
|
||||
svp_data = volume_profile['data']
|
||||
top_levels = sorted(svp_data, key=lambda x: x.get('total_volume', 0), reverse=True)[:10]
|
||||
|
||||
for level in top_levels:
|
||||
buy_percent = level.get('buy_percent', 50.0) / 100.0
|
||||
sell_percent = level.get('sell_percent', 50.0) / 100.0
|
||||
total_volume = level.get('total_volume', 0.0) / 1000000
|
||||
features.extend([buy_percent, sell_percent, total_volume])
|
||||
|
||||
# Pad to 30 features
|
||||
while len(features) < 30:
|
||||
features.extend([0.5, 0.5, 0.0])
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting volume profile features: {e}")
|
||||
features = [0.0] * 30
|
||||
|
||||
return features[:30]
|
||||
|
||||
def _extract_flow_microstructure_features(self, symbol: str, cob_integration) -> List[float]:
|
||||
"""Extract flow and microstructure features"""
|
||||
try:
|
||||
# For now, return synthetic features since full implementation would be complex
|
||||
return self.generate_synthetic_bom_features(symbol)[70:] # Last 50 features
|
||||
except:
|
||||
return [0.0] * 50
|
Reference in New Issue
Block a user