COB integration and refactoring
This commit is contained in:
371
NN/models/cob_rl_model.py
Normal file
371
NN/models/cob_rl_model.py
Normal file
@ -0,0 +1,371 @@
|
||||
"""
|
||||
COB RL Model - 1B Parameter Reinforcement Learning Network for COB Trading
|
||||
|
||||
This module contains the massive 1B+ parameter RL network optimized for real-time
|
||||
Consolidated Order Book (COB) trading. The model processes COB features and performs
|
||||
inference every 200ms for ultra-low latency trading decisions.
|
||||
|
||||
Architecture:
|
||||
- Input: 2000-dimensional COB features
|
||||
- Core: 12-layer transformer with 4096 hidden size (32 attention heads)
|
||||
- Output: Price direction (DOWN/SIDEWAYS/UP), value estimation, confidence
|
||||
- Parameters: ~1B total parameters for maximum market understanding
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MassiveRLNetwork(nn.Module):
|
||||
"""
|
||||
Massive 1B+ parameter RL network optimized for real-time COB trading
|
||||
|
||||
This network processes consolidated order book data and makes predictions about
|
||||
future price movements with high confidence. Designed for 200ms inference cycles.
|
||||
"""
|
||||
|
||||
def __init__(self, input_size: int = 2000, hidden_size: int = 4096, num_layers: int = 12):
|
||||
super(MassiveRLNetwork, self).__init__()
|
||||
|
||||
self.input_size = input_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
# Massive input processing layers
|
||||
self.input_projection = nn.Sequential(
|
||||
nn.Linear(input_size, hidden_size),
|
||||
nn.LayerNorm(hidden_size),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.1)
|
||||
)
|
||||
|
||||
# Massive transformer-style encoder layers
|
||||
self.encoder_layers = nn.ModuleList([
|
||||
nn.TransformerEncoderLayer(
|
||||
d_model=hidden_size,
|
||||
nhead=32, # Large number of attention heads
|
||||
dim_feedforward=hidden_size * 4, # 16K feedforward
|
||||
dropout=0.1,
|
||||
activation='gelu',
|
||||
batch_first=True
|
||||
) for _ in range(num_layers)
|
||||
])
|
||||
|
||||
# Market regime understanding layers
|
||||
self.regime_encoder = nn.Sequential(
|
||||
nn.Linear(hidden_size, hidden_size * 2),
|
||||
nn.LayerNorm(hidden_size * 2),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.1),
|
||||
nn.Linear(hidden_size * 2, hidden_size),
|
||||
nn.LayerNorm(hidden_size),
|
||||
nn.GELU()
|
||||
)
|
||||
|
||||
# Price prediction head (main RL objective)
|
||||
self.price_head = nn.Sequential(
|
||||
nn.Linear(hidden_size, hidden_size // 2),
|
||||
nn.LayerNorm(hidden_size // 2),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(hidden_size // 2, hidden_size // 4),
|
||||
nn.LayerNorm(hidden_size // 4),
|
||||
nn.GELU(),
|
||||
nn.Linear(hidden_size // 4, 3) # DOWN, SIDEWAYS, UP
|
||||
)
|
||||
|
||||
# Value estimation head for RL
|
||||
self.value_head = nn.Sequential(
|
||||
nn.Linear(hidden_size, hidden_size // 2),
|
||||
nn.LayerNorm(hidden_size // 2),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(hidden_size // 2, hidden_size // 4),
|
||||
nn.LayerNorm(hidden_size // 4),
|
||||
nn.GELU(),
|
||||
nn.Linear(hidden_size // 4, 1)
|
||||
)
|
||||
|
||||
# Confidence head
|
||||
self.confidence_head = nn.Sequential(
|
||||
nn.Linear(hidden_size, hidden_size // 4),
|
||||
nn.LayerNorm(hidden_size // 4),
|
||||
nn.GELU(),
|
||||
nn.Linear(hidden_size // 4, 1),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
# Initialize weights
|
||||
self.apply(self._init_weights)
|
||||
|
||||
# Calculate total parameters
|
||||
total_params = sum(p.numel() for p in self.parameters())
|
||||
logger.info(f"COB RL Network initialized with {total_params:,} parameters")
|
||||
|
||||
def _init_weights(self, module):
|
||||
"""Initialize weights with proper scaling for large models"""
|
||||
if isinstance(module, nn.Linear):
|
||||
torch.nn.init.xavier_uniform_(module.weight)
|
||||
if module.bias is not None:
|
||||
torch.nn.init.zeros_(module.bias)
|
||||
elif isinstance(module, nn.LayerNorm):
|
||||
torch.nn.init.ones_(module.weight)
|
||||
torch.nn.init.zeros_(module.bias)
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through massive network
|
||||
|
||||
Args:
|
||||
x: Input tensor of shape [batch_size, input_size] containing COB features
|
||||
|
||||
Returns:
|
||||
Dict containing:
|
||||
- price_logits: Logits for price direction (DOWN/SIDEWAYS/UP)
|
||||
- value: Value estimation for RL
|
||||
- confidence: Confidence score [0, 1]
|
||||
- features: Hidden features for analysis
|
||||
"""
|
||||
batch_size = x.size(0)
|
||||
|
||||
# Project input
|
||||
x = self.input_projection(x) # [batch, hidden_size]
|
||||
|
||||
# Add sequence dimension for transformer
|
||||
x = x.unsqueeze(1) # [batch, 1, hidden_size]
|
||||
|
||||
# Pass through transformer layers
|
||||
for layer in self.encoder_layers:
|
||||
x = layer(x)
|
||||
|
||||
# Remove sequence dimension
|
||||
x = x.squeeze(1) # [batch, hidden_size]
|
||||
|
||||
# Apply regime encoding
|
||||
x = self.regime_encoder(x)
|
||||
|
||||
# Generate predictions
|
||||
price_logits = self.price_head(x)
|
||||
value = self.value_head(x)
|
||||
confidence = self.confidence_head(x)
|
||||
|
||||
return {
|
||||
'price_logits': price_logits,
|
||||
'value': value,
|
||||
'confidence': confidence,
|
||||
'features': x # Hidden features for analysis
|
||||
}
|
||||
|
||||
def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
|
||||
"""
|
||||
High-level prediction method for COB features
|
||||
|
||||
Args:
|
||||
cob_features: COB features as numpy array [input_size]
|
||||
|
||||
Returns:
|
||||
Dict containing prediction results
|
||||
"""
|
||||
self.eval()
|
||||
with torch.no_grad():
|
||||
# Convert to tensor and add batch dimension
|
||||
if isinstance(cob_features, np.ndarray):
|
||||
x = torch.from_numpy(cob_features).float()
|
||||
else:
|
||||
x = cob_features.float()
|
||||
|
||||
if x.dim() == 1:
|
||||
x = x.unsqueeze(0) # Add batch dimension
|
||||
|
||||
# Move to device
|
||||
device = next(self.parameters()).device
|
||||
x = x.to(device)
|
||||
|
||||
# Forward pass
|
||||
outputs = self.forward(x)
|
||||
|
||||
# Process outputs
|
||||
price_probs = F.softmax(outputs['price_logits'], dim=1)
|
||||
predicted_direction = torch.argmax(price_probs, dim=1).item()
|
||||
confidence = outputs['confidence'].item()
|
||||
value = outputs['value'].item()
|
||||
|
||||
return {
|
||||
'predicted_direction': predicted_direction, # 0=DOWN, 1=SIDEWAYS, 2=UP
|
||||
'confidence': confidence,
|
||||
'value': value,
|
||||
'probabilities': price_probs.cpu().numpy()[0],
|
||||
'direction_text': ['DOWN', 'SIDEWAYS', 'UP'][predicted_direction]
|
||||
}
|
||||
|
||||
def get_model_info(self) -> Dict[str, Any]:
|
||||
"""Get model architecture information"""
|
||||
total_params = sum(p.numel() for p in self.parameters())
|
||||
trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
|
||||
|
||||
return {
|
||||
'model_name': 'MassiveRLNetwork',
|
||||
'total_parameters': total_params,
|
||||
'trainable_parameters': trainable_params,
|
||||
'input_size': self.input_size,
|
||||
'hidden_size': self.hidden_size,
|
||||
'num_layers': self.num_layers,
|
||||
'architecture': 'Transformer-based RL Network',
|
||||
'designed_for': 'Real-time COB trading (200ms inference)',
|
||||
'output_classes': ['DOWN', 'SIDEWAYS', 'UP']
|
||||
}
|
||||
|
||||
|
||||
class COBRLModelInterface:
|
||||
"""
|
||||
Interface for the COB RL model that handles model management, training, and inference
|
||||
"""
|
||||
|
||||
def __init__(self, model_checkpoint_dir: str = "models/realtime_rl_cob", device: str = None):
|
||||
self.model_checkpoint_dir = model_checkpoint_dir
|
||||
self.device = torch.device(device if device else ('cuda' if torch.cuda.is_available() else 'cpu'))
|
||||
|
||||
# Initialize model
|
||||
self.model = MassiveRLNetwork().to(self.device)
|
||||
|
||||
# Initialize optimizer
|
||||
self.optimizer = torch.optim.AdamW(
|
||||
self.model.parameters(),
|
||||
lr=1e-5, # Low learning rate for stability
|
||||
weight_decay=1e-6,
|
||||
betas=(0.9, 0.999)
|
||||
)
|
||||
|
||||
# Initialize scaler for mixed precision training
|
||||
self.scaler = torch.cuda.amp.GradScaler() if self.device.type == 'cuda' else None
|
||||
|
||||
logger.info(f"COB RL Model Interface initialized on {self.device}")
|
||||
|
||||
def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
|
||||
"""Make prediction using the model"""
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
# Convert to tensor and add batch dimension
|
||||
if isinstance(cob_features, np.ndarray):
|
||||
x = torch.from_numpy(cob_features).float()
|
||||
else:
|
||||
x = cob_features.float()
|
||||
|
||||
if x.dim() == 1:
|
||||
x = x.unsqueeze(0) # Add batch dimension
|
||||
|
||||
# Move to device
|
||||
x = x.to(self.device)
|
||||
|
||||
# Forward pass
|
||||
outputs = self.model(x)
|
||||
|
||||
# Process outputs
|
||||
price_probs = F.softmax(outputs['price_logits'], dim=1)
|
||||
predicted_direction = torch.argmax(price_probs, dim=1).item()
|
||||
confidence = outputs['confidence'].item()
|
||||
value = outputs['value'].item()
|
||||
|
||||
return {
|
||||
'predicted_direction': predicted_direction, # 0=DOWN, 1=SIDEWAYS, 2=UP
|
||||
'confidence': confidence,
|
||||
'value': value,
|
||||
'probabilities': price_probs.cpu().numpy()[0],
|
||||
'direction_text': ['DOWN', 'SIDEWAYS', 'UP'][predicted_direction]
|
||||
}
|
||||
|
||||
def train_step(self, features: torch.Tensor, targets: Dict[str, torch.Tensor]) -> float:
|
||||
"""
|
||||
Perform one training step
|
||||
|
||||
Args:
|
||||
features: Input COB features [batch_size, input_size]
|
||||
targets: Dict containing 'direction', 'value', 'confidence' targets
|
||||
|
||||
Returns:
|
||||
Training loss value
|
||||
"""
|
||||
self.model.train()
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
if self.scaler:
|
||||
with torch.cuda.amp.autocast():
|
||||
outputs = self.model(features)
|
||||
loss = self._calculate_loss(outputs, targets)
|
||||
|
||||
self.scaler.scale(loss).backward()
|
||||
self.scaler.unscale_(self.optimizer)
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
|
||||
self.scaler.step(self.optimizer)
|
||||
self.scaler.update()
|
||||
else:
|
||||
outputs = self.model(features)
|
||||
loss = self._calculate_loss(outputs, targets)
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
|
||||
self.optimizer.step()
|
||||
|
||||
return loss.item()
|
||||
|
||||
def _calculate_loss(self, outputs: Dict[str, torch.Tensor], targets: Dict[str, torch.Tensor]) -> torch.Tensor:
|
||||
"""Calculate combined loss for RL training"""
|
||||
# Direction prediction loss (cross-entropy)
|
||||
direction_loss = F.cross_entropy(outputs['price_logits'], targets['direction'])
|
||||
|
||||
# Value estimation loss (MSE)
|
||||
value_loss = F.mse_loss(outputs['value'].squeeze(), targets['value'])
|
||||
|
||||
# Confidence loss (BCE)
|
||||
confidence_loss = F.binary_cross_entropy(outputs['confidence'].squeeze(), targets['confidence'])
|
||||
|
||||
# Combined loss with weights
|
||||
total_loss = direction_loss + 0.5 * value_loss + 0.3 * confidence_loss
|
||||
|
||||
return total_loss
|
||||
|
||||
def save_model(self, filepath: str = None):
|
||||
"""Save model checkpoint"""
|
||||
if filepath is None:
|
||||
import os
|
||||
os.makedirs(self.model_checkpoint_dir, exist_ok=True)
|
||||
filepath = f"{self.model_checkpoint_dir}/cob_rl_model_latest.pt"
|
||||
|
||||
checkpoint = {
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||
'model_info': self.model.get_model_info()
|
||||
}
|
||||
|
||||
if self.scaler:
|
||||
checkpoint['scaler_state_dict'] = self.scaler.state_dict()
|
||||
|
||||
torch.save(checkpoint, filepath)
|
||||
logger.info(f"COB RL model saved to {filepath}")
|
||||
|
||||
def load_model(self, filepath: str = None):
|
||||
"""Load model checkpoint"""
|
||||
if filepath is None:
|
||||
filepath = f"{self.model_checkpoint_dir}/cob_rl_model_latest.pt"
|
||||
|
||||
try:
|
||||
checkpoint = torch.load(filepath, map_location=self.device)
|
||||
self.model.load_state_dict(checkpoint['model_state_dict'])
|
||||
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
|
||||
|
||||
if self.scaler and 'scaler_state_dict' in checkpoint:
|
||||
self.scaler.load_state_dict(checkpoint['scaler_state_dict'])
|
||||
|
||||
logger.info(f"COB RL model loaded from {filepath}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load COB RL model from {filepath}: {e}")
|
||||
return False
|
||||
|
||||
def get_model_stats(self) -> Dict[str, Any]:
|
||||
"""Get model statistics"""
|
||||
return self.model.get_model_info()
|
Reference in New Issue
Block a user