595 lines
21 KiB
Python
595 lines
21 KiB
Python
"""
|
|
Enhanced CNN Model with Bookmap Order Book Integration
|
|
|
|
This module extends the enhanced CNN to incorporate:
|
|
- Traditional market data (OHLCV, indicators)
|
|
- Order book depth features (COB)
|
|
- Volume profile features (SVP)
|
|
- Order flow signals (sweeps, absorptions, momentum)
|
|
- Market microstructure metrics
|
|
|
|
The integrated model provides comprehensive market awareness for superior trading decisions.
|
|
"""
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
import numpy as np
|
|
import logging
|
|
from typing import Dict, List, Optional, Tuple, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ResidualBlock(nn.Module):
|
|
"""Enhanced residual block with skip connections"""
|
|
|
|
def __init__(self, in_channels, out_channels, stride=1):
|
|
super(ResidualBlock, self).__init__()
|
|
self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
|
|
self.bn1 = nn.BatchNorm1d(out_channels)
|
|
self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
|
|
self.bn2 = nn.BatchNorm1d(out_channels)
|
|
|
|
# Shortcut connection
|
|
self.shortcut = nn.Sequential()
|
|
if stride != 1 or in_channels != out_channels:
|
|
self.shortcut = nn.Sequential(
|
|
nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride),
|
|
nn.BatchNorm1d(out_channels)
|
|
)
|
|
|
|
def forward(self, x):
|
|
out = F.relu(self.bn1(self.conv1(x)))
|
|
out = self.bn2(self.conv2(out))
|
|
# Avoid in-place operation
|
|
out = out + self.shortcut(x)
|
|
out = F.relu(out)
|
|
return out
|
|
|
|
class MultiHeadAttention(nn.Module):
|
|
"""Multi-head attention mechanism"""
|
|
|
|
def __init__(self, dim, num_heads=8, dropout=0.1):
|
|
super(MultiHeadAttention, self).__init__()
|
|
self.dim = dim
|
|
self.num_heads = num_heads
|
|
self.head_dim = dim // num_heads
|
|
|
|
self.q_linear = nn.Linear(dim, dim)
|
|
self.k_linear = nn.Linear(dim, dim)
|
|
self.v_linear = nn.Linear(dim, dim)
|
|
self.dropout = nn.Dropout(dropout)
|
|
self.out = nn.Linear(dim, dim)
|
|
|
|
def forward(self, x):
|
|
batch_size, seq_len, dim = x.size()
|
|
|
|
# Linear transformations
|
|
q = self.q_linear(x).view(batch_size, seq_len, self.num_heads, self.head_dim)
|
|
k = self.k_linear(x).view(batch_size, seq_len, self.num_heads, self.head_dim)
|
|
v = self.v_linear(x).view(batch_size, seq_len, self.num_heads, self.head_dim)
|
|
|
|
# Transpose for attention
|
|
q = q.transpose(1, 2)
|
|
k = k.transpose(1, 2)
|
|
v = v.transpose(1, 2)
|
|
|
|
# Scaled dot-product attention
|
|
scores = torch.matmul(q, k.transpose(-2, -1)) / np.sqrt(self.head_dim)
|
|
attn_weights = F.softmax(scores, dim=-1)
|
|
attn_weights = self.dropout(attn_weights)
|
|
|
|
attn_output = torch.matmul(attn_weights, v)
|
|
attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, dim)
|
|
|
|
return self.out(attn_output), attn_weights
|
|
|
|
class OrderBookEncoder(nn.Module):
|
|
"""Specialized encoder for order book data"""
|
|
|
|
def __init__(self, input_dim=100, hidden_dim=512):
|
|
super(OrderBookEncoder, self).__init__()
|
|
|
|
# Order book feature processing
|
|
self.bid_encoder = nn.Sequential(
|
|
nn.Linear(40, 128), # 20 levels x 2 features
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2),
|
|
nn.Linear(128, 256),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2)
|
|
)
|
|
|
|
self.ask_encoder = nn.Sequential(
|
|
nn.Linear(40, 128), # 20 levels x 2 features
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2),
|
|
nn.Linear(128, 256),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2)
|
|
)
|
|
|
|
# Microstructure features
|
|
self.microstructure_encoder = nn.Sequential(
|
|
nn.Linear(15, 64), # Liquidity + imbalance + flow features
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2),
|
|
nn.Linear(64, 128),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2)
|
|
)
|
|
|
|
# Cross-attention between bids and asks
|
|
self.cross_attention = MultiHeadAttention(256, num_heads=8)
|
|
|
|
# Output projection
|
|
self.output_projection = nn.Sequential(
|
|
nn.Linear(256 + 256 + 128, hidden_dim), # Combine all features
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(hidden_dim, hidden_dim)
|
|
)
|
|
|
|
def forward(self, orderbook_features):
|
|
"""
|
|
Process order book features
|
|
|
|
Args:
|
|
orderbook_features: Tensor of shape [batch, 100] containing:
|
|
- 40 bid features (20 levels x 2)
|
|
- 40 ask features (20 levels x 2)
|
|
- 15 microstructure features
|
|
- 5 flow signal features
|
|
"""
|
|
# Split features
|
|
bid_features = orderbook_features[:, :40] # First 40 features
|
|
ask_features = orderbook_features[:, 40:80] # Next 40 features
|
|
micro_features = orderbook_features[:, 80:95] # Next 15 features
|
|
# flow_features = orderbook_features[:, 95:100] # Last 5 features (included in micro)
|
|
|
|
# Encode each component
|
|
bid_encoded = self.bid_encoder(bid_features) # [batch, 256]
|
|
ask_encoded = self.ask_encoder(ask_features) # [batch, 256]
|
|
micro_encoded = self.microstructure_encoder(micro_features) # [batch, 128]
|
|
|
|
# Add sequence dimension for attention
|
|
bid_seq = bid_encoded.unsqueeze(1) # [batch, 1, 256]
|
|
ask_seq = ask_encoded.unsqueeze(1) # [batch, 1, 256]
|
|
|
|
# Cross-attention between bids and asks
|
|
combined_seq = torch.cat([bid_seq, ask_seq], dim=1) # [batch, 2, 256]
|
|
attended_features, attention_weights = self.cross_attention(combined_seq)
|
|
|
|
# Flatten attended features
|
|
attended_flat = attended_features.view(attended_features.size(0), -1) # [batch, 512]
|
|
|
|
# Combine with microstructure features
|
|
combined_features = torch.cat([attended_flat, micro_encoded], dim=1) # [batch, 640]
|
|
|
|
# Final projection
|
|
output = self.output_projection(combined_features)
|
|
|
|
return output
|
|
|
|
class VolumeProfileEncoder(nn.Module):
|
|
"""Encoder for volume profile data"""
|
|
|
|
def __init__(self, max_levels=50, hidden_dim=256):
|
|
super(VolumeProfileEncoder, self).__init__()
|
|
|
|
self.max_levels = max_levels
|
|
|
|
# Process volume profile levels
|
|
self.level_encoder = nn.Sequential(
|
|
nn.Linear(7, 32), # price, volume, buy_vol, sell_vol, trades, vwap, net_vol
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2),
|
|
nn.Linear(32, 64),
|
|
nn.ReLU()
|
|
)
|
|
|
|
# Attention over price levels
|
|
self.level_attention = MultiHeadAttention(64, num_heads=4)
|
|
|
|
# Final aggregation
|
|
self.aggregator = nn.Sequential(
|
|
nn.Linear(64, hidden_dim),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(hidden_dim, hidden_dim)
|
|
)
|
|
|
|
def forward(self, volume_profile_data):
|
|
"""
|
|
Process volume profile data
|
|
|
|
Args:
|
|
volume_profile_data: List of dicts or tensor with volume profile levels
|
|
"""
|
|
# If input is list of dicts, convert to tensor
|
|
if isinstance(volume_profile_data, list):
|
|
if not volume_profile_data:
|
|
# Return zero features if no data
|
|
batch_size = 1
|
|
return torch.zeros(batch_size, self.aggregator[-1].out_features)
|
|
|
|
# Convert to tensor
|
|
features = []
|
|
for level in volume_profile_data[:self.max_levels]:
|
|
level_features = [
|
|
level.get('price', 0.0),
|
|
level.get('volume', 0.0),
|
|
level.get('buy_volume', 0.0),
|
|
level.get('sell_volume', 0.0),
|
|
level.get('trades_count', 0.0),
|
|
level.get('vwap', 0.0),
|
|
level.get('net_volume', 0.0)
|
|
]
|
|
features.append(level_features)
|
|
|
|
# Pad if needed
|
|
while len(features) < self.max_levels:
|
|
features.append([0.0] * 7)
|
|
|
|
volume_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0)
|
|
else:
|
|
volume_tensor = volume_profile_data
|
|
|
|
batch_size, num_levels, feature_dim = volume_tensor.shape
|
|
|
|
# Encode each level
|
|
level_features = self.level_encoder(volume_tensor.view(-1, feature_dim))
|
|
level_features = level_features.view(batch_size, num_levels, -1)
|
|
|
|
# Apply attention across levels
|
|
attended_levels, _ = self.level_attention(level_features)
|
|
|
|
# Global average pooling
|
|
aggregated = torch.mean(attended_levels, dim=1)
|
|
|
|
# Final processing
|
|
output = self.aggregator(aggregated)
|
|
|
|
return output
|
|
|
|
class EnhancedCNNWithOrderBook(nn.Module):
|
|
"""
|
|
Enhanced CNN model integrating traditional market data with order book analysis
|
|
|
|
Features:
|
|
- Multi-scale convolutional processing for time series data
|
|
- Specialized order book feature extraction
|
|
- Volume profile analysis
|
|
- Order flow signal integration
|
|
- Multi-head attention mechanisms
|
|
- Dueling architecture for value and advantage estimation
|
|
"""
|
|
|
|
def __init__(self,
|
|
market_input_shape=(60, 50), # Traditional market data
|
|
orderbook_features=100, # Order book feature dimension
|
|
n_actions=2,
|
|
confidence_threshold=0.5):
|
|
super(EnhancedCNNWithOrderBook, self).__init__()
|
|
|
|
self.market_input_shape = market_input_shape
|
|
self.orderbook_features = orderbook_features
|
|
self.n_actions = n_actions
|
|
self.confidence_threshold = confidence_threshold
|
|
|
|
# Traditional market data processing
|
|
self.market_encoder = self._build_market_encoder()
|
|
|
|
# Order book data processing
|
|
self.orderbook_encoder = OrderBookEncoder(
|
|
input_dim=orderbook_features,
|
|
hidden_dim=512
|
|
)
|
|
|
|
# Volume profile processing
|
|
self.volume_encoder = VolumeProfileEncoder(
|
|
max_levels=50,
|
|
hidden_dim=256
|
|
)
|
|
|
|
# Feature fusion
|
|
total_features = 1024 + 512 + 256 # market + orderbook + volume
|
|
self.feature_fusion = nn.Sequential(
|
|
nn.Linear(total_features, 1536),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(1536, 1024),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3)
|
|
)
|
|
|
|
# Multi-head attention for integrated features
|
|
self.integrated_attention = MultiHeadAttention(1024, num_heads=16)
|
|
|
|
# Dueling architecture
|
|
self.advantage_stream = nn.Sequential(
|
|
nn.Linear(1024, 512),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(512, 256),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(256, n_actions)
|
|
)
|
|
|
|
self.value_stream = nn.Sequential(
|
|
nn.Linear(1024, 512),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(512, 256),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(256, 1)
|
|
)
|
|
|
|
# Auxiliary heads for multi-task learning
|
|
self.extrema_head = nn.Sequential(
|
|
nn.Linear(1024, 512),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(512, 256),
|
|
nn.ReLU(),
|
|
nn.Linear(256, 3) # bottom, top, neither
|
|
)
|
|
|
|
self.market_regime_head = nn.Sequential(
|
|
nn.Linear(1024, 512),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3),
|
|
nn.Linear(512, 256),
|
|
nn.ReLU(),
|
|
nn.Linear(256, 8) # trending, ranging, volatile, etc.
|
|
)
|
|
|
|
self.confidence_head = nn.Sequential(
|
|
nn.Linear(1024, 256),
|
|
nn.ReLU(),
|
|
nn.Linear(256, 1),
|
|
nn.Sigmoid()
|
|
)
|
|
|
|
# Initialize weights
|
|
self._initialize_weights()
|
|
|
|
# Device management
|
|
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
self.to(self.device)
|
|
|
|
logger.info(f"Enhanced CNN with Order Book initialized")
|
|
logger.info(f"Market input shape: {market_input_shape}")
|
|
logger.info(f"Order book features: {orderbook_features}")
|
|
logger.info(f"Output actions: {n_actions}")
|
|
|
|
def _build_market_encoder(self):
|
|
"""Build traditional market data encoder"""
|
|
seq_len, feature_dim = self.market_input_shape
|
|
|
|
return nn.Sequential(
|
|
# Input projection
|
|
nn.Linear(feature_dim, 128),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2),
|
|
|
|
# Convolutional layers for temporal patterns
|
|
nn.Conv1d(128, 256, kernel_size=5, padding=2),
|
|
nn.BatchNorm1d(256),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.2),
|
|
|
|
ResidualBlock(256, 512),
|
|
ResidualBlock(512, 512),
|
|
ResidualBlock(512, 768),
|
|
ResidualBlock(768, 768),
|
|
|
|
# Global pooling
|
|
nn.AdaptiveAvgPool1d(1),
|
|
nn.Flatten(),
|
|
|
|
# Final projection
|
|
nn.Linear(768, 1024),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.3)
|
|
)
|
|
|
|
def _initialize_weights(self):
|
|
"""Initialize model weights"""
|
|
for m in self.modules():
|
|
if isinstance(m, nn.Conv1d):
|
|
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
|
if m.bias is not None:
|
|
nn.init.constant_(m.bias, 0)
|
|
elif isinstance(m, nn.Linear):
|
|
nn.init.xavier_normal_(m.weight)
|
|
if m.bias is not None:
|
|
nn.init.constant_(m.bias, 0)
|
|
elif isinstance(m, nn.BatchNorm1d):
|
|
nn.init.constant_(m.weight, 1)
|
|
nn.init.constant_(m.bias, 0)
|
|
|
|
def forward(self, market_data, orderbook_data, volume_profile_data=None):
|
|
"""
|
|
Forward pass through integrated model
|
|
|
|
Args:
|
|
market_data: Traditional market data [batch, seq_len, features]
|
|
orderbook_data: Order book features [batch, orderbook_features]
|
|
volume_profile_data: Volume profile data (optional)
|
|
|
|
Returns:
|
|
Dictionary with Q-values, confidence, regime, and auxiliary predictions
|
|
"""
|
|
batch_size = market_data.size(0)
|
|
|
|
# Process market data
|
|
if len(market_data.shape) == 2:
|
|
market_data = market_data.unsqueeze(0)
|
|
|
|
# Reshape for convolutional processing
|
|
market_reshaped = market_data.view(batch_size, -1, market_data.size(-1))
|
|
market_features = self.market_encoder(market_reshaped.transpose(1, 2))
|
|
|
|
# Process order book data
|
|
orderbook_features = self.orderbook_encoder(orderbook_data)
|
|
|
|
# Process volume profile data
|
|
if volume_profile_data is not None:
|
|
volume_features = self.volume_encoder(volume_profile_data)
|
|
else:
|
|
volume_features = torch.zeros(batch_size, 256, device=self.device)
|
|
|
|
# Fuse all features
|
|
combined_features = torch.cat([
|
|
market_features,
|
|
orderbook_features,
|
|
volume_features
|
|
], dim=1)
|
|
|
|
# Feature fusion
|
|
fused_features = self.feature_fusion(combined_features)
|
|
|
|
# Apply attention
|
|
attended_features = fused_features.unsqueeze(1) # Add sequence dimension
|
|
attended_output, attention_weights = self.integrated_attention(attended_features)
|
|
final_features = attended_output.squeeze(1) # Remove sequence dimension
|
|
|
|
# Dueling architecture
|
|
advantage = self.advantage_stream(final_features)
|
|
value = self.value_stream(final_features)
|
|
|
|
# Combine value and advantage
|
|
q_values = value + advantage - advantage.mean(dim=1, keepdim=True)
|
|
|
|
# Auxiliary predictions
|
|
extrema_pred = self.extrema_head(final_features)
|
|
regime_pred = self.market_regime_head(final_features)
|
|
confidence = self.confidence_head(final_features)
|
|
|
|
return {
|
|
'q_values': q_values,
|
|
'confidence': confidence,
|
|
'extrema_prediction': extrema_pred,
|
|
'market_regime': regime_pred,
|
|
'attention_weights': attention_weights,
|
|
'integrated_features': final_features
|
|
}
|
|
|
|
def predict(self, market_data, orderbook_data, volume_profile_data=None):
|
|
"""Make prediction with confidence thresholding"""
|
|
self.eval()
|
|
|
|
with torch.no_grad():
|
|
# Convert inputs to tensors if needed
|
|
if isinstance(market_data, np.ndarray):
|
|
market_data = torch.FloatTensor(market_data).to(self.device)
|
|
if isinstance(orderbook_data, np.ndarray):
|
|
orderbook_data = torch.FloatTensor(orderbook_data).to(self.device)
|
|
|
|
# Ensure batch dimension
|
|
if len(market_data.shape) == 2:
|
|
market_data = market_data.unsqueeze(0)
|
|
if len(orderbook_data.shape) == 1:
|
|
orderbook_data = orderbook_data.unsqueeze(0)
|
|
|
|
# Forward pass
|
|
outputs = self.forward(market_data, orderbook_data, volume_profile_data)
|
|
|
|
# Get probabilities
|
|
q_values = outputs['q_values']
|
|
probs = F.softmax(q_values, dim=1)
|
|
confidence = outputs['confidence'].item()
|
|
|
|
# Action selection with confidence thresholding
|
|
if confidence >= self.confidence_threshold:
|
|
action = torch.argmax(q_values, dim=1).item()
|
|
else:
|
|
action = None # No action due to low confidence
|
|
|
|
return {
|
|
'action': action,
|
|
'probabilities': probs.cpu().numpy()[0],
|
|
'confidence': confidence,
|
|
'q_values': q_values.cpu().numpy()[0],
|
|
'extrema_prediction': F.softmax(outputs['extrema_prediction'], dim=1).cpu().numpy()[0],
|
|
'market_regime': F.softmax(outputs['market_regime'], dim=1).cpu().numpy()[0]
|
|
}
|
|
|
|
def get_feature_importance(self, market_data, orderbook_data, volume_profile_data=None):
|
|
"""Analyze feature importance using gradients"""
|
|
self.eval()
|
|
|
|
# Enable gradient computation for inputs
|
|
market_data.requires_grad_(True)
|
|
orderbook_data.requires_grad_(True)
|
|
|
|
# Forward pass
|
|
outputs = self.forward(market_data, orderbook_data, volume_profile_data)
|
|
|
|
# Compute gradients for Q-values
|
|
q_values = outputs['q_values']
|
|
q_values.sum().backward()
|
|
|
|
# Get gradient magnitudes
|
|
market_importance = torch.abs(market_data.grad).mean().item()
|
|
orderbook_importance = torch.abs(orderbook_data.grad).mean().item()
|
|
|
|
return {
|
|
'market_importance': market_importance,
|
|
'orderbook_importance': orderbook_importance,
|
|
'total_importance': market_importance + orderbook_importance
|
|
}
|
|
|
|
def save(self, path):
|
|
"""Save model state"""
|
|
torch.save({
|
|
'model_state_dict': self.state_dict(),
|
|
'market_input_shape': self.market_input_shape,
|
|
'orderbook_features': self.orderbook_features,
|
|
'n_actions': self.n_actions,
|
|
'confidence_threshold': self.confidence_threshold
|
|
}, path)
|
|
logger.info(f"Enhanced CNN with Order Book saved to {path}")
|
|
|
|
def load(self, path):
|
|
"""Load model state"""
|
|
checkpoint = torch.load(path, map_location=self.device)
|
|
self.load_state_dict(checkpoint['model_state_dict'])
|
|
logger.info(f"Enhanced CNN with Order Book loaded from {path}")
|
|
|
|
def get_memory_usage(self):
|
|
"""Get model memory usage statistics"""
|
|
total_params = sum(p.numel() for p in self.parameters())
|
|
trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
|
|
|
|
return {
|
|
'total_parameters': total_params,
|
|
'trainable_parameters': trainable_params,
|
|
'model_size_mb': total_params * 4 / (1024 * 1024), # Assuming float32
|
|
}
|
|
|
|
def create_enhanced_cnn_with_orderbook(
|
|
market_input_shape=(60, 50),
|
|
orderbook_features=100,
|
|
n_actions=2,
|
|
device='cuda'
|
|
):
|
|
"""Create and initialize enhanced CNN with order book integration"""
|
|
|
|
model = EnhancedCNNWithOrderBook(
|
|
market_input_shape=market_input_shape,
|
|
orderbook_features=orderbook_features,
|
|
n_actions=n_actions
|
|
)
|
|
|
|
if device and torch.cuda.is_available():
|
|
model = model.to(device)
|
|
|
|
memory_usage = model.get_memory_usage()
|
|
logger.info(f"Created Enhanced CNN with Order Book: {memory_usage['total_parameters']:,} parameters")
|
|
logger.info(f"Model size: {memory_usage['model_size_mb']:.1f} MB")
|
|
|
|
return model |