400 lines
15 KiB
Python
400 lines
15 KiB
Python
"""
|
|
COB RL Model - 1B Parameter Reinforcement Learning Network for COB Trading
|
|
|
|
This module contains the massive 1B+ parameter RL network optimized for real-time
|
|
Consolidated Order Book (COB) trading. The model processes COB features and performs
|
|
inference every 200ms for ultra-low latency trading decisions.
|
|
|
|
Architecture:
|
|
- Input: 2000-dimensional COB features
|
|
- Core: 12-layer transformer with 4096 hidden size (32 attention heads)
|
|
- Output: Price direction (DOWN/SIDEWAYS/UP), value estimation, confidence
|
|
- Parameters: ~1B total parameters for maximum market understanding
|
|
"""
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
import numpy as np
|
|
import logging
|
|
from typing import Dict, List, Optional, Tuple, Any
|
|
from abc import ABC, abstractmethod
|
|
|
|
from models import ModelInterface
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MassiveRLNetwork(nn.Module):
|
|
"""
|
|
Massive 1B+ parameter RL network optimized for real-time COB trading
|
|
|
|
This network processes consolidated order book data and makes predictions about
|
|
future price movements with high confidence. Designed for 200ms inference cycles.
|
|
"""
|
|
|
|
def __init__(self, input_size: int = 2000, hidden_size: int = 2048, num_layers: int = 8):
|
|
super(MassiveRLNetwork, self).__init__()
|
|
|
|
self.input_size = input_size
|
|
self.hidden_size = hidden_size
|
|
self.num_layers = num_layers
|
|
|
|
# Optimized input processing layers for 400M params
|
|
self.input_projection = nn.Sequential(
|
|
nn.Linear(input_size, hidden_size),
|
|
nn.LayerNorm(hidden_size),
|
|
nn.GELU(),
|
|
nn.Dropout(0.1)
|
|
)
|
|
|
|
# Efficient transformer-style encoder layers (400M target)
|
|
self.encoder_layers = nn.ModuleList([
|
|
nn.TransformerEncoderLayer(
|
|
d_model=hidden_size,
|
|
nhead=16, # Reduced attention heads for efficiency
|
|
dim_feedforward=hidden_size * 3, # 6K feedforward (reduced from 16K)
|
|
dropout=0.1,
|
|
activation='gelu',
|
|
batch_first=True
|
|
) for _ in range(num_layers)
|
|
])
|
|
|
|
# Market regime understanding layers (optimized for 400M)
|
|
self.regime_encoder = nn.Sequential(
|
|
nn.Linear(hidden_size, hidden_size + 512), # Smaller expansion
|
|
nn.LayerNorm(hidden_size + 512),
|
|
nn.GELU(),
|
|
nn.Dropout(0.1),
|
|
nn.Linear(hidden_size + 512, hidden_size),
|
|
nn.LayerNorm(hidden_size),
|
|
nn.GELU()
|
|
)
|
|
|
|
# Price prediction head (main RL objective)
|
|
self.price_head = nn.Sequential(
|
|
nn.Linear(hidden_size, hidden_size // 2),
|
|
nn.LayerNorm(hidden_size // 2),
|
|
nn.GELU(),
|
|
nn.Dropout(0.2),
|
|
nn.Linear(hidden_size // 2, hidden_size // 4),
|
|
nn.LayerNorm(hidden_size // 4),
|
|
nn.GELU(),
|
|
nn.Linear(hidden_size // 4, 3) # DOWN, SIDEWAYS, UP
|
|
)
|
|
|
|
# Value estimation head for RL
|
|
self.value_head = nn.Sequential(
|
|
nn.Linear(hidden_size, hidden_size // 2),
|
|
nn.LayerNorm(hidden_size // 2),
|
|
nn.GELU(),
|
|
nn.Dropout(0.2),
|
|
nn.Linear(hidden_size // 2, hidden_size // 4),
|
|
nn.LayerNorm(hidden_size // 4),
|
|
nn.GELU(),
|
|
nn.Linear(hidden_size // 4, 1)
|
|
)
|
|
|
|
# Confidence head
|
|
self.confidence_head = nn.Sequential(
|
|
nn.Linear(hidden_size, hidden_size // 4),
|
|
nn.LayerNorm(hidden_size // 4),
|
|
nn.GELU(),
|
|
nn.Linear(hidden_size // 4, 1),
|
|
nn.Sigmoid()
|
|
)
|
|
|
|
# Initialize weights
|
|
self.apply(self._init_weights)
|
|
|
|
# Calculate total parameters
|
|
total_params = sum(p.numel() for p in self.parameters())
|
|
logger.info(f"COB RL Network initialized with {total_params:,} parameters")
|
|
|
|
def _init_weights(self, module):
|
|
"""Initialize weights with proper scaling for large models"""
|
|
if isinstance(module, nn.Linear):
|
|
torch.nn.init.xavier_uniform_(module.weight)
|
|
if module.bias is not None:
|
|
torch.nn.init.zeros_(module.bias)
|
|
elif isinstance(module, nn.LayerNorm):
|
|
torch.nn.init.ones_(module.weight)
|
|
torch.nn.init.zeros_(module.bias)
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Forward pass through massive network
|
|
|
|
Args:
|
|
x: Input tensor of shape [batch_size, input_size] containing COB features
|
|
|
|
Returns:
|
|
Dict containing:
|
|
- price_logits: Logits for price direction (DOWN/SIDEWAYS/UP)
|
|
- value: Value estimation for RL
|
|
- confidence: Confidence score [0, 1]
|
|
- features: Hidden features for analysis
|
|
"""
|
|
batch_size = x.size(0)
|
|
|
|
# Project input
|
|
x = self.input_projection(x) # [batch, hidden_size]
|
|
|
|
# Add sequence dimension for transformer
|
|
x = x.unsqueeze(1) # [batch, 1, hidden_size]
|
|
|
|
# Pass through transformer layers
|
|
for layer in self.encoder_layers:
|
|
x = layer(x)
|
|
|
|
# Remove sequence dimension
|
|
x = x.squeeze(1) # [batch, hidden_size]
|
|
|
|
# Apply regime encoding
|
|
x = self.regime_encoder(x)
|
|
|
|
# Generate predictions
|
|
price_logits = self.price_head(x)
|
|
value = self.value_head(x)
|
|
confidence = self.confidence_head(x)
|
|
|
|
return {
|
|
'price_logits': price_logits,
|
|
'value': value,
|
|
'confidence': confidence,
|
|
'features': x # Hidden features for analysis
|
|
}
|
|
|
|
def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
|
|
"""
|
|
High-level prediction method for COB features
|
|
|
|
Args:
|
|
cob_features: COB features as numpy array [input_size]
|
|
|
|
Returns:
|
|
Dict containing prediction results
|
|
"""
|
|
self.eval()
|
|
with torch.no_grad():
|
|
# Convert to tensor and add batch dimension
|
|
if isinstance(cob_features, np.ndarray):
|
|
x = torch.from_numpy(cob_features).float()
|
|
else:
|
|
x = cob_features.float()
|
|
|
|
if x.dim() == 1:
|
|
x = x.unsqueeze(0) # Add batch dimension
|
|
|
|
# Move to device
|
|
device = next(self.parameters()).device
|
|
x = x.to(device)
|
|
|
|
# Forward pass
|
|
outputs = self.forward(x)
|
|
|
|
# Process outputs
|
|
price_probs = F.softmax(outputs['price_logits'], dim=1)
|
|
predicted_direction = torch.argmax(price_probs, dim=1).item()
|
|
confidence = outputs['confidence'].item()
|
|
value = outputs['value'].item()
|
|
|
|
return {
|
|
'predicted_direction': predicted_direction, # 0=DOWN, 1=SIDEWAYS, 2=UP
|
|
'confidence': confidence,
|
|
'value': value,
|
|
'probabilities': price_probs.cpu().numpy()[0],
|
|
'direction_text': ['DOWN', 'SIDEWAYS', 'UP'][predicted_direction]
|
|
}
|
|
|
|
def get_model_info(self) -> Dict[str, Any]:
|
|
"""Get model architecture information"""
|
|
total_params = sum(p.numel() for p in self.parameters())
|
|
trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
|
|
|
|
return {
|
|
'model_name': 'MassiveRLNetwork',
|
|
'total_parameters': total_params,
|
|
'trainable_parameters': trainable_params,
|
|
'input_size': self.input_size,
|
|
'hidden_size': self.hidden_size,
|
|
'num_layers': self.num_layers,
|
|
'architecture': 'Transformer-based RL Network',
|
|
'designed_for': 'Real-time COB trading (200ms inference)',
|
|
'output_classes': ['DOWN', 'SIDEWAYS', 'UP']
|
|
}
|
|
|
|
|
|
class COBRLModelInterface(ModelInterface):
|
|
"""
|
|
Interface for the COB RL model that handles model management, training, and inference
|
|
"""
|
|
|
|
def __init__(self, model_checkpoint_dir: str = "models/realtime_rl_cob", device: str = None):
|
|
super().__init__(name="cob_rl_model") # Initialize ModelInterface with a name
|
|
self.model_checkpoint_dir = model_checkpoint_dir
|
|
self.device = torch.device(device if device else ('cuda' if torch.cuda.is_available() else 'cpu'))
|
|
|
|
# Initialize model
|
|
self.model = MassiveRLNetwork().to(self.device)
|
|
|
|
# Initialize optimizer
|
|
self.optimizer = torch.optim.AdamW(
|
|
self.model.parameters(),
|
|
lr=1e-5, # Low learning rate for stability
|
|
weight_decay=1e-6,
|
|
betas=(0.9, 0.999)
|
|
)
|
|
|
|
# Initialize scaler for mixed precision training
|
|
self.scaler = torch.cuda.amp.GradScaler() if self.device.type == 'cuda' else None
|
|
|
|
logger.info(f"COB RL Model Interface initialized on {self.device}")
|
|
|
|
def to(self, device):
|
|
"""PyTorch-style device movement method"""
|
|
self.device = device
|
|
self.model = self.model.to(device)
|
|
return self
|
|
|
|
def predict(self, cob_features: np.ndarray) -> Dict[str, Any]:
|
|
"""Make prediction using the model"""
|
|
self.model.eval()
|
|
with torch.no_grad():
|
|
# Convert to tensor and add batch dimension
|
|
if isinstance(cob_features, np.ndarray):
|
|
x = torch.from_numpy(cob_features).float()
|
|
else:
|
|
x = cob_features.float()
|
|
|
|
if x.dim() == 1:
|
|
x = x.unsqueeze(0) # Add batch dimension
|
|
|
|
# Move to device
|
|
x = x.to(self.device)
|
|
|
|
# Forward pass
|
|
outputs = self.model(x)
|
|
|
|
# Process outputs
|
|
price_probs = F.softmax(outputs['price_logits'], dim=1)
|
|
predicted_direction = torch.argmax(price_probs, dim=1).item()
|
|
confidence = outputs['confidence'].item()
|
|
value = outputs['value'].item()
|
|
|
|
return {
|
|
'predicted_direction': predicted_direction, # 0=DOWN, 1=SIDEWAYS, 2=UP
|
|
'confidence': confidence,
|
|
'value': value,
|
|
'probabilities': price_probs.cpu().numpy()[0],
|
|
'direction_text': ['DOWN', 'SIDEWAYS', 'UP'][predicted_direction]
|
|
}
|
|
|
|
def train_step(self, features: torch.Tensor, targets: Dict[str, torch.Tensor]) -> float:
|
|
"""
|
|
Perform one training step
|
|
|
|
Args:
|
|
features: Input COB features [batch_size, input_size]
|
|
targets: Dict containing 'direction', 'value', 'confidence' targets
|
|
|
|
Returns:
|
|
Training loss value
|
|
"""
|
|
self.model.train()
|
|
self.optimizer.zero_grad()
|
|
|
|
if self.scaler:
|
|
with torch.cuda.amp.autocast():
|
|
outputs = self.model(features)
|
|
loss = self._calculate_loss(outputs, targets)
|
|
|
|
self.scaler.scale(loss).backward()
|
|
self.scaler.unscale_(self.optimizer)
|
|
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
|
|
self.scaler.step(self.optimizer)
|
|
self.scaler.update()
|
|
else:
|
|
outputs = self.model(features)
|
|
loss = self._calculate_loss(outputs, targets)
|
|
loss.backward()
|
|
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
|
|
self.optimizer.step()
|
|
|
|
return loss.item()
|
|
|
|
def _calculate_loss(self, outputs: Dict[str, torch.Tensor], targets: Dict[str, torch.Tensor]) -> torch.Tensor:
|
|
"""Calculate combined loss for RL training"""
|
|
# Direction prediction loss (cross-entropy)
|
|
direction_loss = F.cross_entropy(outputs['price_logits'], targets['direction'])
|
|
|
|
# Value estimation loss (MSE)
|
|
value_loss = F.mse_loss(outputs['value'].squeeze(), targets['value'])
|
|
|
|
# Confidence loss (BCE)
|
|
confidence_loss = F.binary_cross_entropy(outputs['confidence'].squeeze(), targets['confidence'])
|
|
|
|
# Combined loss with weights
|
|
total_loss = direction_loss + 0.5 * value_loss + 0.3 * confidence_loss
|
|
|
|
return total_loss
|
|
|
|
def save_model(self, filepath: str = None):
|
|
"""Save model checkpoint"""
|
|
if filepath is None:
|
|
import os
|
|
os.makedirs(self.model_checkpoint_dir, exist_ok=True)
|
|
filepath = f"{self.model_checkpoint_dir}/cob_rl_model_latest.pt"
|
|
|
|
checkpoint = {
|
|
'model_state_dict': self.model.state_dict(),
|
|
'optimizer_state_dict': self.optimizer.state_dict(),
|
|
'model_info': self.model.get_model_info()
|
|
}
|
|
|
|
if self.scaler:
|
|
checkpoint['scaler_state_dict'] = self.scaler.state_dict()
|
|
|
|
torch.save(checkpoint, filepath)
|
|
logger.info(f"COB RL model saved to {filepath}")
|
|
|
|
def load_model(self, filepath: str = None):
|
|
"""Load model checkpoint"""
|
|
if filepath is None:
|
|
filepath = f"{self.model_checkpoint_dir}/cob_rl_model_latest.pt"
|
|
|
|
try:
|
|
checkpoint = torch.load(filepath, map_location=self.device)
|
|
self.model.load_state_dict(checkpoint['model_state_dict'])
|
|
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
|
|
|
|
if self.scaler and 'scaler_state_dict' in checkpoint:
|
|
self.scaler.load_state_dict(checkpoint['scaler_state_dict'])
|
|
|
|
logger.info(f"COB RL model loaded from {filepath}")
|
|
return True
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load COB RL model from {filepath}: {e}")
|
|
return False
|
|
|
|
def get_model_stats(self) -> Dict[str, Any]:
|
|
"""Get model statistics"""
|
|
return self.model.get_model_info()
|
|
|
|
def get_memory_usage(self) -> float:
|
|
"""Estimate COBRLModel memory usage in MB"""
|
|
# This is an estimation. For a more precise value, you'd inspect tensors.
|
|
# A massive network might take hundreds of MBs or even GBs.
|
|
# Let's use a more realistic estimate for a 1B parameter model.
|
|
# Assuming float32 (4 bytes per parameter), 1B params = 4GB.
|
|
# For a 400M parameter network (as mentioned in comments), it's 1.6GB.
|
|
# Let's use a placeholder if it's too complex to calculate dynamically.
|
|
try:
|
|
# Calculate total parameters and convert to MB
|
|
total_params = sum(p.numel() for p in self.model.parameters())
|
|
# Assuming float32 (4 bytes per parameter) and converting to MB
|
|
memory_bytes = total_params * 4
|
|
memory_mb = memory_bytes / (1024 * 1024)
|
|
return memory_mb
|
|
except Exception as e:
|
|
logger.debug(f"Could not estimate COBRLModel memory usage: {e}")
|
|
return 1600.0 # Default to 1.6 GB as an estimate if calculation fails |