removed COB 400M Model, text data stream wip
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -52,3 +52,4 @@ wandb/
|
||||
*.wandb
|
||||
*__pycache__/*
|
||||
NN/__pycache__/__init__.cpython-312.pyc
|
||||
data_snapshot_*.json
|
||||
|
251
COB_MODEL_ARCHITECTURE_DOCUMENTATION.md
Normal file
251
COB_MODEL_ARCHITECTURE_DOCUMENTATION.md
Normal file
@@ -0,0 +1,251 @@
|
||||
# COB RL Model Architecture Documentation
|
||||
|
||||
**Status**: REMOVED (Preserved for Future Recreation)
|
||||
**Date**: 2025-01-03
|
||||
**Reason**: Clean up code while preserving architecture for future improvement when quality COB data is available
|
||||
|
||||
## Overview
|
||||
|
||||
The COB (Consolidated Order Book) RL Model was a massive 356M+ parameter neural network specifically designed for real-time market microstructure analysis and trading decisions based on order book data.
|
||||
|
||||
## Architecture Details
|
||||
|
||||
### Core Network: `MassiveRLNetwork`
|
||||
|
||||
**Input**: 2000-dimensional COB features
|
||||
**Target Parameters**: ~356M (optimized from initial 1B target)
|
||||
**Inference Target**: 200ms cycles for ultra-low latency trading
|
||||
|
||||
#### Layer Structure:
|
||||
|
||||
```python
|
||||
class MassiveRLNetwork(nn.Module):
|
||||
def __init__(self, input_size=2000, hidden_size=2048, num_layers=8):
|
||||
# Input projection layer
|
||||
self.input_projection = nn.Sequential(
|
||||
nn.Linear(input_size, hidden_size), # 2000 -> 2048
|
||||
nn.LayerNorm(hidden_size),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.1)
|
||||
)
|
||||
|
||||
# 8 Transformer encoder layers (main parameter bulk)
|
||||
self.encoder_layers = nn.ModuleList([
|
||||
nn.TransformerEncoderLayer(
|
||||
d_model=2048, # Hidden dimension
|
||||
nhead=16, # 16 attention heads
|
||||
dim_feedforward=6144, # 3x hidden (6K feedforward)
|
||||
dropout=0.1,
|
||||
activation='gelu',
|
||||
batch_first=True
|
||||
) for _ in range(8) # 8 layers
|
||||
])
|
||||
|
||||
# Market regime understanding
|
||||
self.regime_encoder = nn.Sequential(
|
||||
nn.Linear(2048, 2560), # Expansion layer
|
||||
nn.LayerNorm(2560),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.1),
|
||||
nn.Linear(2560, 2048), # Back to hidden size
|
||||
nn.LayerNorm(2048),
|
||||
nn.GELU()
|
||||
)
|
||||
|
||||
# Output heads
|
||||
self.price_head = ... # 3-class: DOWN/SIDEWAYS/UP
|
||||
self.value_head = ... # RL value estimation
|
||||
self.confidence_head = ... # Confidence [0,1]
|
||||
```
|
||||
|
||||
#### Parameter Breakdown:
|
||||
- **Input Projection**: ~4M parameters (2000×2048 + bias)
|
||||
- **Transformer Layers**: ~320M parameters (8 layers × ~40M each)
|
||||
- **Regime Encoder**: ~10M parameters
|
||||
- **Output Heads**: ~15M parameters
|
||||
- **Total**: ~356M parameters
|
||||
|
||||
### Model Interface: `COBRLModelInterface`
|
||||
|
||||
Wrapper class providing:
|
||||
- Model management and lifecycle
|
||||
- Training step functionality with mixed precision
|
||||
- Checkpoint saving/loading
|
||||
- Prediction interface
|
||||
- Memory usage estimation
|
||||
|
||||
#### Key Features:
|
||||
```python
|
||||
class COBRLModelInterface(ModelInterface):
|
||||
def __init__(self):
|
||||
self.model = MassiveRLNetwork().to(device)
|
||||
self.optimizer = torch.optim.AdamW(lr=1e-5, weight_decay=1e-6)
|
||||
self.scaler = torch.cuda.amp.GradScaler() # Mixed precision
|
||||
|
||||
def predict(self, cob_features) -> Dict[str, Any]:
|
||||
# Returns: predicted_direction, confidence, value, probabilities
|
||||
|
||||
def train_step(self, features, targets) -> float:
|
||||
# Combined loss: direction + value + confidence
|
||||
# Uses gradient clipping and mixed precision
|
||||
```
|
||||
|
||||
## Input Data Format
|
||||
|
||||
### COB Features (2000-dimensional):
|
||||
The model expected structured COB features containing:
|
||||
- **Order Book Levels**: Bid/ask prices and volumes at multiple levels
|
||||
- **Market Microstructure**: Spread, depth, imbalance ratios
|
||||
- **Temporal Features**: Order flow dynamics, recent changes
|
||||
- **Aggregated Metrics**: Volume-weighted averages, momentum indicators
|
||||
|
||||
### Target Training Data:
|
||||
```python
|
||||
targets = {
|
||||
'direction': torch.tensor([0, 1, 2]), # 0=DOWN, 1=SIDEWAYS, 2=UP
|
||||
'value': torch.tensor([reward_value]), # RL value estimation
|
||||
'confidence': torch.tensor([0.0, 1.0]) # Confidence in prediction
|
||||
}
|
||||
```
|
||||
|
||||
## Training Methodology
|
||||
|
||||
### Loss Function:
|
||||
```python
|
||||
def _calculate_loss(outputs, targets):
|
||||
direction_loss = F.cross_entropy(outputs['price_logits'], targets['direction'])
|
||||
value_loss = F.mse_loss(outputs['value'], targets['value'])
|
||||
confidence_loss = F.binary_cross_entropy(outputs['confidence'], targets['confidence'])
|
||||
|
||||
total_loss = direction_loss + 0.5 * value_loss + 0.3 * confidence_loss
|
||||
return total_loss
|
||||
```
|
||||
|
||||
### Optimization:
|
||||
- **Optimizer**: AdamW with low learning rate (1e-5)
|
||||
- **Weight Decay**: 1e-6 for regularization
|
||||
- **Gradient Clipping**: Max norm 1.0
|
||||
- **Mixed Precision**: CUDA AMP for efficiency
|
||||
- **Batch Processing**: Designed for mini-batch training
|
||||
|
||||
## Integration Points
|
||||
|
||||
### In Trading Orchestrator:
|
||||
```python
|
||||
# Model initialization
|
||||
self.cob_rl_agent = COBRLModelInterface()
|
||||
|
||||
# During prediction
|
||||
cob_features = self._extract_cob_features(symbol) # 2000-dim array
|
||||
prediction = self.cob_rl_agent.predict(cob_features)
|
||||
```
|
||||
|
||||
### COB Data Flow:
|
||||
```
|
||||
COB Integration -> Feature Extraction -> MassiveRLNetwork -> Trading Decision
|
||||
^ ^ ^ ^
|
||||
COB Provider (2000 features) (356M params) (BUY/SELL/HOLD)
|
||||
```
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
### Memory Usage:
|
||||
- **Model Parameters**: ~1.4GB (356M × 4 bytes)
|
||||
- **Activations**: ~100MB (during inference)
|
||||
- **Total GPU Memory**: ~2GB for inference, ~4GB for training
|
||||
|
||||
### Computational Complexity:
|
||||
- **FLOPs per Inference**: ~700M operations
|
||||
- **Target Latency**: 200ms per prediction
|
||||
- **Hardware Requirements**: GPU with 4GB+ VRAM
|
||||
|
||||
## Issues Identified
|
||||
|
||||
### Data Quality Problems:
|
||||
1. **COB Data Inconsistency**: Raw COB data had quality issues
|
||||
2. **Feature Engineering**: 2000-dimensional features needed better preprocessing
|
||||
3. **Missing Market Context**: Isolated COB analysis without broader market view
|
||||
4. **Temporal Alignment**: COB timestamps not properly synchronized
|
||||
|
||||
### Architecture Limitations:
|
||||
1. **Massive Parameter Count**: 356M params for specialized task may be overkill
|
||||
2. **Context Isolation**: No integration with price/volume patterns from other models
|
||||
3. **Training Data**: Insufficient quality labeled data for RL training
|
||||
4. **Real-time Performance**: 200ms latency target challenging for 356M model
|
||||
|
||||
## Future Improvement Strategy
|
||||
|
||||
### When COB Data Quality is Resolved:
|
||||
|
||||
#### Phase 1: Data Infrastructure
|
||||
```python
|
||||
# Improved COB data pipeline
|
||||
class HighQualityCOBProvider:
|
||||
def __init__(self):
|
||||
self.quality_validators = [...]
|
||||
self.feature_normalizers = [...]
|
||||
self.temporal_aligners = [...]
|
||||
|
||||
def get_quality_cob_features(self, symbol: str) -> np.ndarray:
|
||||
# Return validated, normalized, properly timestamped COB features
|
||||
pass
|
||||
```
|
||||
|
||||
#### Phase 2: Architecture Optimization
|
||||
```python
|
||||
# More efficient architecture
|
||||
class OptimizedCOBNetwork(nn.Module):
|
||||
def __init__(self, input_size=1000, hidden_size=1024, num_layers=6):
|
||||
# Reduced parameter count: ~100M instead of 356M
|
||||
# Better efficiency while maintaining capability
|
||||
pass
|
||||
```
|
||||
|
||||
#### Phase 3: Integration Enhancement
|
||||
```python
|
||||
# Hybrid approach: COB + Market Context
|
||||
class HybridCOBCNNModel(nn.Module):
|
||||
def __init__(self):
|
||||
self.cob_encoder = OptimizedCOBNetwork()
|
||||
self.market_encoder = EnhancedCNN()
|
||||
self.fusion_layer = AttentionFusion()
|
||||
|
||||
def forward(self, cob_features, market_features):
|
||||
# Combine COB microstructure with broader market patterns
|
||||
pass
|
||||
```
|
||||
|
||||
## Removal Justification
|
||||
|
||||
### Why Removed Now:
|
||||
1. **COB Data Quality**: Current COB data pipeline has quality issues
|
||||
2. **Parameter Efficiency**: 356M params not justified without quality data
|
||||
3. **Development Focus**: Better to fix data pipeline first
|
||||
4. **Code Cleanliness**: Remove complexity while preserving knowledge
|
||||
|
||||
### Preservation Strategy:
|
||||
1. **Complete Documentation**: This document preserves full architecture
|
||||
2. **Interface Compatibility**: Easy to recreate interface when needed
|
||||
3. **Test Framework**: Existing tests can validate future recreation
|
||||
4. **Integration Points**: Clear documentation of how to reintegrate
|
||||
|
||||
## Recreation Checklist
|
||||
|
||||
When ready to recreate an improved COB model:
|
||||
|
||||
- [ ] Verify COB data quality and consistency
|
||||
- [ ] Implement proper feature engineering pipeline
|
||||
- [ ] Design architecture with appropriate parameter count
|
||||
- [ ] Create comprehensive training dataset
|
||||
- [ ] Implement proper integration with other models
|
||||
- [ ] Validate real-time performance requirements
|
||||
- [ ] Test extensively before production deployment
|
||||
|
||||
## Code Preservation
|
||||
|
||||
Original files preserved in git history:
|
||||
- `NN/models/cob_rl_model.py` (full implementation)
|
||||
- Integration code in `core/orchestrator.py`
|
||||
- Related test files
|
||||
|
||||
**Note**: This documentation ensures the COB model can be accurately recreated when COB data quality issues are resolved and the massive parameter advantage can be properly evaluated.
|
71
check_data_stream_status.py
Normal file
71
check_data_stream_status.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Data Stream Status Checker
|
||||
|
||||
This script provides better information about the data stream status
|
||||
when the dashboard is running.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
def check_dashboard_status():
|
||||
"""Check if dashboard is running and get basic status"""
|
||||
try:
|
||||
response = requests.get('http://127.0.0.1:8050', timeout=3)
|
||||
if response.status_code == 200:
|
||||
return True, "Dashboard is running"
|
||||
else:
|
||||
return False, f"Dashboard responded with status {response.status_code}"
|
||||
except requests.exceptions.ConnectionError:
|
||||
return False, "Dashboard not running (connection refused)"
|
||||
except Exception as e:
|
||||
return False, f"Error checking dashboard: {e}"
|
||||
|
||||
def main():
|
||||
print("🔍 Data Stream Status Check")
|
||||
print("=" * 50)
|
||||
|
||||
# Check if dashboard is running
|
||||
dashboard_running, dashboard_msg = check_dashboard_status()
|
||||
|
||||
if dashboard_running:
|
||||
print("✅ Dashboard Status: RUNNING")
|
||||
print(f" URL: http://127.0.0.1:8050")
|
||||
print(f" Message: {dashboard_msg}")
|
||||
print()
|
||||
print("📊 Data Stream Information:")
|
||||
print(" The data stream monitor is running inside the dashboard process.")
|
||||
print(" You should see data stream output in the dashboard console.")
|
||||
print()
|
||||
print("🔧 How to Access Data Stream:")
|
||||
print(" 1. Check the dashboard console output for data stream samples")
|
||||
print(" 2. The dashboard automatically starts data streaming")
|
||||
print(" 3. Data is being collected and displayed in real-time")
|
||||
print()
|
||||
print("📝 Expected Console Output (in dashboard terminal):")
|
||||
print(" =================================================")
|
||||
print(" DATA STREAM SAMPLE - 16:10:30")
|
||||
print(" =================================================")
|
||||
print(" OHLCV (1m): ETH/USDT | O:4335.67 H:4338.92 L:4334.21 C:4336.67 V:125.8")
|
||||
print(" TICK: ETH/USDT | Price:4336.67 Vol:0.0456 Side:buy")
|
||||
print(" MODEL: DQN | Conf:0.78 Pred:BUY Loss:0.0234")
|
||||
print(" =================================================")
|
||||
print()
|
||||
print("💡 Note: The data_stream_control.py script cannot access the")
|
||||
print(" dashboard's data stream due to process isolation.")
|
||||
print(" The data stream is active and working within the dashboard.")
|
||||
|
||||
else:
|
||||
print("❌ Dashboard Status: NOT RUNNING")
|
||||
print(f" Error: {dashboard_msg}")
|
||||
print()
|
||||
print("🔧 To start the dashboard:")
|
||||
print(" python run_clean_dashboard.py")
|
||||
print()
|
||||
print(" Then check this status again.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -341,11 +341,11 @@ class TradingOrchestrator:
|
||||
logger.warning("Extrema trainer not available")
|
||||
self.extrema_trainer = None
|
||||
|
||||
# COB RL functionality is now integrated into the Enhanced CNN model
|
||||
# The Enhanced CNN already receives COB data and has microstructure attention
|
||||
# This eliminates redundancy and improves context integration
|
||||
logger.info("COB RL functionality integrated into Enhanced CNN - no separate model needed")
|
||||
self.cob_rl_agent = None # Deprecated in favor of Enhanced CNN integration
|
||||
# COB RL Model REMOVED - See COB_MODEL_ARCHITECTURE_DOCUMENTATION.md
|
||||
# Reason: Need quality COB data first before evaluating massive parameter benefit
|
||||
# Will recreate improved version when COB data pipeline is fixed
|
||||
logger.info("COB RL model removed - focusing on COB data quality first")
|
||||
self.cob_rl_agent = None
|
||||
|
||||
# Initialize TRANSFORMER Model
|
||||
try:
|
||||
@@ -488,9 +488,9 @@ class TradingOrchestrator:
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register Extrema Trainer: {e}")
|
||||
|
||||
# COB RL functionality is now integrated into Enhanced CNN
|
||||
# No separate registration needed - COB analysis is part of CNN microstructure attention
|
||||
logger.info("COB RL functionality integrated into Enhanced CNN - no separate registration needed")
|
||||
# COB RL Model registration removed - model was removed for cleanup
|
||||
# See COB_MODEL_ARCHITECTURE_DOCUMENTATION.md for recreation details
|
||||
logger.info("COB RL model registration skipped - model removed pending COB data quality improvements")
|
||||
|
||||
# Register Transformer Model
|
||||
if hasattr(self, 'transformer_model') and self.transformer_model:
|
||||
@@ -548,7 +548,7 @@ class TradingOrchestrator:
|
||||
# Normalize weights after all registrations
|
||||
self._normalize_weights()
|
||||
logger.info(f"Current model weights: {self.model_weights}")
|
||||
logger.info("COB_RL consolidation completed - Enhanced CNN now handles microstructure analysis")
|
||||
logger.info("COB_RL model removed - cleaner architecture pending COB data quality fixes")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing ML models: {e}")
|
||||
|
@@ -25,6 +25,15 @@ from data_stream_monitor import get_data_stream_monitor
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def check_dashboard_running():
|
||||
"""Check if the dashboard is currently running"""
|
||||
try:
|
||||
import requests
|
||||
response = requests.get('http://127.0.0.1:8050', timeout=2)
|
||||
return response.status_code == 200
|
||||
except:
|
||||
return False
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python data_stream_control.py <command>")
|
||||
@@ -39,13 +48,28 @@ def main():
|
||||
|
||||
command = sys.argv[1].lower()
|
||||
|
||||
# Check if dashboard is running first
|
||||
if not check_dashboard_running():
|
||||
print("❌ Dashboard not running!")
|
||||
print(" The data stream requires the dashboard to be active.")
|
||||
print(" Please start the dashboard first:")
|
||||
print(" python run_clean_dashboard.py")
|
||||
print()
|
||||
print(" Then use this control script to manage streaming.")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get the monitor instance (will be None if not initialized)
|
||||
monitor = get_data_stream_monitor()
|
||||
|
||||
if command == 'start':
|
||||
if monitor is None:
|
||||
print("ERROR: Data stream monitor not initialized. Run the dashboard first.")
|
||||
if monitor.orchestrator is None or monitor.data_provider is None:
|
||||
print("❌ ERROR: Data stream monitor not properly initialized.")
|
||||
print(" The data stream requires active orchestrator and data provider.")
|
||||
print(" Please start the dashboard first:")
|
||||
print(" python run_clean_dashboard.py")
|
||||
print()
|
||||
print(" Then use this control script to manage streaming.")
|
||||
return
|
||||
|
||||
if not hasattr(monitor, 'is_streaming') or not monitor.is_streaming:
|
||||
@@ -89,9 +113,27 @@ def main():
|
||||
|
||||
elif command == 'status':
|
||||
if monitor:
|
||||
if monitor.orchestrator is None or monitor.data_provider is None:
|
||||
print("✅ Dashboard is running at http://127.0.0.1:8050")
|
||||
print("❌ Data Stream Status: NOT CONNECTED")
|
||||
print(" Orchestrator:", "Missing" if monitor.orchestrator is None else "Connected")
|
||||
print(" Data Provider:", "Missing" if monitor.data_provider is None else "Connected")
|
||||
print()
|
||||
print(" The dashboard is running but the data stream monitor")
|
||||
print(" is not properly connected to the trading system.")
|
||||
print()
|
||||
print(" 💡 SOLUTION: The data stream is actually running inside")
|
||||
print(" the dashboard process! Check the dashboard console output")
|
||||
print(" for live data stream samples.")
|
||||
print()
|
||||
print(" For detailed status, run:")
|
||||
print(" python check_data_stream_status.py")
|
||||
else:
|
||||
status = "ACTIVE" if monitor.is_streaming else "INACTIVE"
|
||||
format_type = "compact" if monitor.stream_config.get('compact_format', False) else "detailed"
|
||||
print(f"Data Stream Status: {status}")
|
||||
print(f"✅ Data Stream Status: {status}")
|
||||
print(f" Orchestrator: Connected")
|
||||
print(f" Data Provider: Connected")
|
||||
print(f" Output Format: {format_type}")
|
||||
print(f" Sampling Rate: {monitor.stream_config.get('sampling_rate', 1.0)} seconds")
|
||||
|
||||
@@ -100,7 +142,7 @@ def main():
|
||||
for stream_name, buffer in monitor.data_streams.items():
|
||||
print(f" {stream_name}: {len(buffer)} entries")
|
||||
else:
|
||||
print("Data stream monitor not initialized.")
|
||||
print("ERROR: Data stream monitor not initialized.")
|
||||
|
||||
else:
|
||||
print(f"Unknown command: {command}")
|
||||
|
@@ -480,5 +480,14 @@ def get_data_stream_monitor(orchestrator=None, data_provider=None, training_syst
|
||||
global _data_stream_monitor
|
||||
if _data_stream_monitor is None:
|
||||
_data_stream_monitor = DataStreamMonitor(orchestrator, data_provider, training_system)
|
||||
elif orchestrator is not None or data_provider is not None or training_system is not None:
|
||||
# Update existing instance with new connections if provided
|
||||
if orchestrator is not None:
|
||||
_data_stream_monitor.orchestrator = orchestrator
|
||||
if data_provider is not None:
|
||||
_data_stream_monitor.data_provider = data_provider
|
||||
if training_system is not None:
|
||||
_data_stream_monitor.training_system = training_system
|
||||
logger.info("Updated existing DataStreamMonitor with new connections")
|
||||
return _data_stream_monitor
|
||||
|
||||
|
@@ -36,8 +36,8 @@ class ModelCheckpointSaver:
|
||||
if hasattr(self.orchestrator, 'extrema_trainer') and self.orchestrator.extrema_trainer:
|
||||
results['extrema_trainer'] = self._save_extrema_checkpoint(force)
|
||||
|
||||
# COB RL functionality is now integrated into Enhanced CNN
|
||||
# No separate checkpoint needed
|
||||
# COB RL model removed - see COB_MODEL_ARCHITECTURE_DOCUMENTATION.md
|
||||
# Will recreate when COB data quality is improved
|
||||
|
||||
# Save Transformer
|
||||
if hasattr(self.orchestrator, 'transformer_trainer') and self.orchestrator.transformer_trainer:
|
||||
@@ -219,7 +219,7 @@ class ModelCheckpointSaver:
|
||||
model_exists = True
|
||||
elif model_name == 'extrema_trainer' and hasattr(self.orchestrator, 'extrema_trainer') and self.orchestrator.extrema_trainer:
|
||||
model_exists = True
|
||||
# COB RL functionality integrated into Enhanced CNN - no separate model
|
||||
# COB RL model removed - focusing on COB data quality first
|
||||
elif model_name == 'transformer' and hasattr(self.orchestrator, 'transformer_model') and self.orchestrator.transformer_model:
|
||||
model_exists = True
|
||||
elif model_name == 'decision' and hasattr(self.orchestrator, 'decision_model') and self.orchestrator.decision_model:
|
||||
|
Reference in New Issue
Block a user