""" Enhanced CNN Model with Bookmap Order Book Integration This module extends the enhanced CNN to incorporate: - Traditional market data (OHLCV, indicators) - Order book depth features (COB) - Volume profile features (SVP) - Order flow signals (sweeps, absorptions, momentum) - Market microstructure metrics The integrated model provides comprehensive market awareness for superior trading decisions. """ import torch import torch.nn as nn import torch.nn.functional as F import numpy as np import logging from typing import Dict, List, Optional, Tuple, Any logger = logging.getLogger(__name__) class ResidualBlock(nn.Module): """Enhanced residual block with skip connections""" def __init__(self, in_channels, out_channels, stride=1): super(ResidualBlock, self).__init__() self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1) self.bn1 = nn.BatchNorm1d(out_channels) self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) self.bn2 = nn.BatchNorm1d(out_channels) # Shortcut connection self.shortcut = nn.Sequential() if stride != 1 or in_channels != out_channels: self.shortcut = nn.Sequential( nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride), nn.BatchNorm1d(out_channels) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) # Avoid in-place operation out = out + self.shortcut(x) out = F.relu(out) return out class MultiHeadAttention(nn.Module): """Multi-head attention mechanism""" def __init__(self, dim, num_heads=8, dropout=0.1): super(MultiHeadAttention, self).__init__() self.dim = dim self.num_heads = num_heads self.head_dim = dim // num_heads self.q_linear = nn.Linear(dim, dim) self.k_linear = nn.Linear(dim, dim) self.v_linear = nn.Linear(dim, dim) self.dropout = nn.Dropout(dropout) self.out = nn.Linear(dim, dim) def forward(self, x): batch_size, seq_len, dim = x.size() # Linear transformations q = self.q_linear(x).view(batch_size, seq_len, self.num_heads, self.head_dim) k = self.k_linear(x).view(batch_size, seq_len, self.num_heads, self.head_dim) v = self.v_linear(x).view(batch_size, seq_len, self.num_heads, self.head_dim) # Transpose for attention q = q.transpose(1, 2) k = k.transpose(1, 2) v = v.transpose(1, 2) # Scaled dot-product attention scores = torch.matmul(q, k.transpose(-2, -1)) / np.sqrt(self.head_dim) attn_weights = F.softmax(scores, dim=-1) attn_weights = self.dropout(attn_weights) attn_output = torch.matmul(attn_weights, v) attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, dim) return self.out(attn_output), attn_weights class OrderBookEncoder(nn.Module): """Specialized encoder for order book data""" def __init__(self, input_dim=100, hidden_dim=512): super(OrderBookEncoder, self).__init__() # Order book feature processing self.bid_encoder = nn.Sequential( nn.Linear(40, 128), # 20 levels x 2 features nn.ReLU(), nn.Dropout(0.2), nn.Linear(128, 256), nn.ReLU(), nn.Dropout(0.2) ) self.ask_encoder = nn.Sequential( nn.Linear(40, 128), # 20 levels x 2 features nn.ReLU(), nn.Dropout(0.2), nn.Linear(128, 256), nn.ReLU(), nn.Dropout(0.2) ) # Microstructure features self.microstructure_encoder = nn.Sequential( nn.Linear(15, 64), # Liquidity + imbalance + flow features nn.ReLU(), nn.Dropout(0.2), nn.Linear(64, 128), nn.ReLU(), nn.Dropout(0.2) ) # Cross-attention between bids and asks self.cross_attention = MultiHeadAttention(256, num_heads=8) # Output projection self.output_projection = nn.Sequential( nn.Linear(256 + 256 + 128, hidden_dim), # Combine all features nn.ReLU(), nn.Dropout(0.3), nn.Linear(hidden_dim, hidden_dim) ) def forward(self, orderbook_features): """ Process order book features Args: orderbook_features: Tensor of shape [batch, 100] containing: - 40 bid features (20 levels x 2) - 40 ask features (20 levels x 2) - 15 microstructure features - 5 flow signal features """ # Split features bid_features = orderbook_features[:, :40] # First 40 features ask_features = orderbook_features[:, 40:80] # Next 40 features micro_features = orderbook_features[:, 80:95] # Next 15 features # flow_features = orderbook_features[:, 95:100] # Last 5 features (included in micro) # Encode each component bid_encoded = self.bid_encoder(bid_features) # [batch, 256] ask_encoded = self.ask_encoder(ask_features) # [batch, 256] micro_encoded = self.microstructure_encoder(micro_features) # [batch, 128] # Add sequence dimension for attention bid_seq = bid_encoded.unsqueeze(1) # [batch, 1, 256] ask_seq = ask_encoded.unsqueeze(1) # [batch, 1, 256] # Cross-attention between bids and asks combined_seq = torch.cat([bid_seq, ask_seq], dim=1) # [batch, 2, 256] attended_features, attention_weights = self.cross_attention(combined_seq) # Flatten attended features attended_flat = attended_features.view(attended_features.size(0), -1) # [batch, 512] # Combine with microstructure features combined_features = torch.cat([attended_flat, micro_encoded], dim=1) # [batch, 640] # Final projection output = self.output_projection(combined_features) return output class VolumeProfileEncoder(nn.Module): """Encoder for volume profile data""" def __init__(self, max_levels=50, hidden_dim=256): super(VolumeProfileEncoder, self).__init__() self.max_levels = max_levels # Process volume profile levels self.level_encoder = nn.Sequential( nn.Linear(7, 32), # price, volume, buy_vol, sell_vol, trades, vwap, net_vol nn.ReLU(), nn.Dropout(0.2), nn.Linear(32, 64), nn.ReLU() ) # Attention over price levels self.level_attention = MultiHeadAttention(64, num_heads=4) # Final aggregation self.aggregator = nn.Sequential( nn.Linear(64, hidden_dim), nn.ReLU(), nn.Dropout(0.3), nn.Linear(hidden_dim, hidden_dim) ) def forward(self, volume_profile_data): """ Process volume profile data Args: volume_profile_data: List of dicts or tensor with volume profile levels """ # If input is list of dicts, convert to tensor if isinstance(volume_profile_data, list): if not volume_profile_data: # Return zero features if no data batch_size = 1 return torch.zeros(batch_size, self.aggregator[-1].out_features) # Convert to tensor features = [] for level in volume_profile_data[:self.max_levels]: level_features = [ level.get('price', 0.0), level.get('volume', 0.0), level.get('buy_volume', 0.0), level.get('sell_volume', 0.0), level.get('trades_count', 0.0), level.get('vwap', 0.0), level.get('net_volume', 0.0) ] features.append(level_features) # Pad if needed while len(features) < self.max_levels: features.append([0.0] * 7) volume_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0) else: volume_tensor = volume_profile_data batch_size, num_levels, feature_dim = volume_tensor.shape # Encode each level level_features = self.level_encoder(volume_tensor.view(-1, feature_dim)) level_features = level_features.view(batch_size, num_levels, -1) # Apply attention across levels attended_levels, _ = self.level_attention(level_features) # Global average pooling aggregated = torch.mean(attended_levels, dim=1) # Final processing output = self.aggregator(aggregated) return output class EnhancedCNNWithOrderBook(nn.Module): """ Enhanced CNN model integrating traditional market data with order book analysis Features: - Multi-scale convolutional processing for time series data - Specialized order book feature extraction - Volume profile analysis - Order flow signal integration - Multi-head attention mechanisms - Dueling architecture for value and advantage estimation """ def __init__(self, market_input_shape=(60, 50), # Traditional market data orderbook_features=100, # Order book feature dimension n_actions=2, confidence_threshold=0.5): super(EnhancedCNNWithOrderBook, self).__init__() self.market_input_shape = market_input_shape self.orderbook_features = orderbook_features self.n_actions = n_actions self.confidence_threshold = confidence_threshold # Traditional market data processing self.market_encoder = self._build_market_encoder() # Order book data processing self.orderbook_encoder = OrderBookEncoder( input_dim=orderbook_features, hidden_dim=512 ) # Volume profile processing self.volume_encoder = VolumeProfileEncoder( max_levels=50, hidden_dim=256 ) # Feature fusion total_features = 1024 + 512 + 256 # market + orderbook + volume self.feature_fusion = nn.Sequential( nn.Linear(total_features, 1536), nn.ReLU(), nn.Dropout(0.3), nn.Linear(1536, 1024), nn.ReLU(), nn.Dropout(0.3) ) # Multi-head attention for integrated features self.integrated_attention = MultiHeadAttention(1024, num_heads=16) # Dueling architecture self.advantage_stream = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, n_actions) ) self.value_stream = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 1) ) # Auxiliary heads for multi-task learning self.extrema_head = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 3) # bottom, top, neither ) self.market_regime_head = nn.Sequential( nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 8) # trending, ranging, volatile, etc. ) self.confidence_head = nn.Sequential( nn.Linear(1024, 256), nn.ReLU(), nn.Linear(256, 1), nn.Sigmoid() ) # Initialize weights self._initialize_weights() # Device management self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.to(self.device) logger.info(f"Enhanced CNN with Order Book initialized") logger.info(f"Market input shape: {market_input_shape}") logger.info(f"Order book features: {orderbook_features}") logger.info(f"Output actions: {n_actions}") def _build_market_encoder(self): """Build traditional market data encoder""" seq_len, feature_dim = self.market_input_shape return nn.Sequential( # Input projection nn.Linear(feature_dim, 128), nn.ReLU(), nn.Dropout(0.2), # Convolutional layers for temporal patterns nn.Conv1d(128, 256, kernel_size=5, padding=2), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.2), ResidualBlock(256, 512), ResidualBlock(512, 512), ResidualBlock(512, 768), ResidualBlock(768, 768), # Global pooling nn.AdaptiveAvgPool1d(1), nn.Flatten(), # Final projection nn.Linear(768, 1024), nn.ReLU(), nn.Dropout(0.3) ) def _initialize_weights(self): """Initialize model weights""" for m in self.modules(): if isinstance(m, nn.Conv1d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.xavier_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def forward(self, market_data, orderbook_data, volume_profile_data=None): """ Forward pass through integrated model Args: market_data: Traditional market data [batch, seq_len, features] orderbook_data: Order book features [batch, orderbook_features] volume_profile_data: Volume profile data (optional) Returns: Dictionary with Q-values, confidence, regime, and auxiliary predictions """ batch_size = market_data.size(0) # Process market data if len(market_data.shape) == 2: market_data = market_data.unsqueeze(0) # Reshape for convolutional processing market_reshaped = market_data.view(batch_size, -1, market_data.size(-1)) market_features = self.market_encoder(market_reshaped.transpose(1, 2)) # Process order book data orderbook_features = self.orderbook_encoder(orderbook_data) # Process volume profile data if volume_profile_data is not None: volume_features = self.volume_encoder(volume_profile_data) else: volume_features = torch.zeros(batch_size, 256, device=self.device) # Fuse all features combined_features = torch.cat([ market_features, orderbook_features, volume_features ], dim=1) # Feature fusion fused_features = self.feature_fusion(combined_features) # Apply attention attended_features = fused_features.unsqueeze(1) # Add sequence dimension attended_output, attention_weights = self.integrated_attention(attended_features) final_features = attended_output.squeeze(1) # Remove sequence dimension # Dueling architecture advantage = self.advantage_stream(final_features) value = self.value_stream(final_features) # Combine value and advantage q_values = value + advantage - advantage.mean(dim=1, keepdim=True) # Auxiliary predictions extrema_pred = self.extrema_head(final_features) regime_pred = self.market_regime_head(final_features) confidence = self.confidence_head(final_features) return { 'q_values': q_values, 'confidence': confidence, 'extrema_prediction': extrema_pred, 'market_regime': regime_pred, 'attention_weights': attention_weights, 'integrated_features': final_features } def predict(self, market_data, orderbook_data, volume_profile_data=None): """Make prediction with confidence thresholding""" self.eval() with torch.no_grad(): # Convert inputs to tensors if needed if isinstance(market_data, np.ndarray): market_data = torch.FloatTensor(market_data).to(self.device) if isinstance(orderbook_data, np.ndarray): orderbook_data = torch.FloatTensor(orderbook_data).to(self.device) # Ensure batch dimension if len(market_data.shape) == 2: market_data = market_data.unsqueeze(0) if len(orderbook_data.shape) == 1: orderbook_data = orderbook_data.unsqueeze(0) # Forward pass outputs = self.forward(market_data, orderbook_data, volume_profile_data) # Get probabilities q_values = outputs['q_values'] probs = F.softmax(q_values, dim=1) # Handle confidence shape properly to avoid scalar conversion errors confidence_tensor = outputs['confidence'] if isinstance(confidence_tensor, torch.Tensor): if confidence_tensor.numel() == 1: confidence = confidence_tensor.item() else: confidence = confidence_tensor.flatten()[0].item() else: confidence = float(confidence_tensor) # Action selection with confidence thresholding if confidence >= self.confidence_threshold: action = torch.argmax(q_values, dim=1).item() else: action = None # No action due to low confidence return { 'action': action, 'probabilities': probs.cpu().numpy()[0], 'confidence': confidence, 'q_values': q_values.cpu().numpy()[0], 'extrema_prediction': F.softmax(outputs['extrema_prediction'], dim=1).cpu().numpy()[0], 'market_regime': F.softmax(outputs['market_regime'], dim=1).cpu().numpy()[0] } def get_feature_importance(self, market_data, orderbook_data, volume_profile_data=None): """Analyze feature importance using gradients""" self.eval() # Enable gradient computation for inputs market_data.requires_grad_(True) orderbook_data.requires_grad_(True) # Forward pass outputs = self.forward(market_data, orderbook_data, volume_profile_data) # Compute gradients for Q-values q_values = outputs['q_values'] q_values.sum().backward() # Get gradient magnitudes market_importance = torch.abs(market_data.grad).mean().item() orderbook_importance = torch.abs(orderbook_data.grad).mean().item() return { 'market_importance': market_importance, 'orderbook_importance': orderbook_importance, 'total_importance': market_importance + orderbook_importance } def save(self, path): """Save model state""" torch.save({ 'model_state_dict': self.state_dict(), 'market_input_shape': self.market_input_shape, 'orderbook_features': self.orderbook_features, 'n_actions': self.n_actions, 'confidence_threshold': self.confidence_threshold }, path) logger.info(f"Enhanced CNN with Order Book saved to {path}") def load(self, path): """Load model state""" checkpoint = torch.load(path, map_location=self.device) self.load_state_dict(checkpoint['model_state_dict']) logger.info(f"Enhanced CNN with Order Book loaded from {path}") def get_memory_usage(self): """Get model memory usage statistics""" total_params = sum(p.numel() for p in self.parameters()) trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad) return { 'total_parameters': total_params, 'trainable_parameters': trainable_params, 'model_size_mb': total_params * 4 / (1024 * 1024), # Assuming float32 } def create_enhanced_cnn_with_orderbook( market_input_shape=(60, 50), orderbook_features=100, n_actions=2, device='cuda' ): """Create and initialize enhanced CNN with order book integration""" model = EnhancedCNNWithOrderBook( market_input_shape=market_input_shape, orderbook_features=orderbook_features, n_actions=n_actions ) if device and torch.cuda.is_available(): model = model.to(device) memory_usage = model.get_memory_usage() logger.info(f"Created Enhanced CNN with Order Book: {memory_usage['total_parameters']:,} parameters") logger.info(f"Model size: {memory_usage['model_size_mb']:.1f} MB") return model