rl cob agent

2025-06-24 15:39:22 +03:00
parent 1f3166e1e5
commit ec420c2a5f
5 changed files with 1974 additions and 0 deletions
--- a/test_realtime_rl_cob_trader.py
+++ b/test_realtime_rl_cob_trader.py
@ -0,0 +1,547 @@
+#!/usr/bin/env python3
+"""
+Test Script for Real-time RL COB Trader
+
+This script tests the real-time reinforcement learning system to ensure:
+1. Proper model initialization and parameter count (~1B parameters)
+2. COB data integration and feature extraction
+3. Real-time inference pipeline
+4. Signal accumulation and consensus
+5. Training loop functionality
+6. Trade execution integration
+
+Run this before deploying the live system.
+"""
+
+import asyncio
+import logging
+import numpy as np
+import torch
+import time
+import json
+from datetime import datetime
+from typing import Dict, Any
+
+# Local imports
+from core.realtime_rl_cob_trader import RealtimeRLCOBTrader, MassiveRLNetwork, PredictionResult
+from core.trading_executor import TradingExecutor
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+
+logger = logging.getLogger(__name__)
+
+class RealtimeRLTester:
+    """
+    Comprehensive tester for Real-time RL COB Trader
+    """
+    
+    def __init__(self):
+        self.test_results = {}
+        self.trader = None
+        self.trading_executor = None
+    
+    async def run_all_tests(self):
+        """Run all tests and generate report"""
+        logger.info("=" * 60)
+        logger.info("REAL-TIME RL COB TRADER TESTING SUITE")
+        logger.info("=" * 60)
+        
+        tests = [
+            self.test_model_initialization,
+            self.test_model_parameter_count,
+            self.test_feature_extraction,
+            self.test_inference_performance,
+            self.test_signal_accumulation,
+            self.test_training_pipeline,
+            self.test_trading_integration,
+            self.test_performance_monitoring
+        ]
+        
+        for test in tests:
+            try:
+                await test()
+            except Exception as e:
+                logger.error(f"Test {test.__name__} failed: {e}")
+                self.test_results[test.__name__] = {'status': 'FAILED', 'error': str(e)}
+        
+        await self.generate_test_report()
+    
+    async def test_model_initialization(self):
+        """Test model initialization and architecture"""
+        logger.info("🧠 Testing Model Initialization...")
+        
+        try:
+            # Test model creation
+            model = MassiveRLNetwork(input_size=2000, hidden_size=4096, num_layers=12)
+            
+            # Check if CUDA is available
+            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+            model = model.to(device)
+            
+            # Test forward pass
+            batch_size = 4
+            test_input = torch.randn(batch_size, 2000).to(device)
+            
+            with torch.no_grad():
+                outputs = model(test_input)
+            
+            # Verify outputs
+            assert 'price_logits' in outputs
+            assert 'value' in outputs
+            assert 'confidence' in outputs
+            assert 'features' in outputs
+            
+            assert outputs['price_logits'].shape == (batch_size, 3)  # DOWN, SIDEWAYS, UP
+            assert outputs['value'].shape == (batch_size, 1)
+            assert outputs['confidence'].shape == (batch_size, 1)
+            
+            self.test_results['test_model_initialization'] = {
+                'status': 'PASSED',
+                'device': str(device),
+                'output_shapes': {k: list(v.shape) for k, v in outputs.items()}
+            }
+            
+            logger.info("✅ Model initialization test PASSED")
+            
+        except Exception as e:
+            self.test_results['test_model_initialization'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def test_model_parameter_count(self):
+        """Test that model has approximately 1B parameters"""
+        logger.info("🔢 Testing Model Parameter Count...")
+        
+        try:
+            model = MassiveRLNetwork(input_size=2000, hidden_size=4096, num_layers=12)
+            
+            total_params = sum(p.numel() for p in model.parameters())
+            trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+            
+            logger.info(f"Total parameters: {total_params:,}")
+            logger.info(f"Trainable parameters: {trainable_params:,}")
+            
+            self.test_results['test_model_parameter_count'] = {
+                'status': 'PASSED',
+                'total_parameters': total_params,
+                'trainable_parameters': trainable_params,
+                'parameter_size_gb': (total_params * 4) / (1024**3),  # 4 bytes per float32
+                'is_massive': total_params > 100_000_000  # At least 100M parameters
+            }
+            
+            logger.info(f"✅ Model has {total_params:,} parameters ({total_params/1e9:.2f}B)")
+            
+        except Exception as e:
+            self.test_results['test_model_parameter_count'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def test_feature_extraction(self):
+        """Test feature extraction from COB data"""
+        logger.info("🔍 Testing Feature Extraction...")
+        
+        try:
+            # Initialize trader
+            self.trading_executor = TradingExecutor(simulation_mode=True)
+            self.trader = RealtimeRLCOBTrader(
+                symbols=['BTC/USDT'],
+                trading_executor=self.trading_executor,
+                inference_interval_ms=1000  # Slower for testing
+            )
+            
+            # Create mock COB data
+            mock_cob_data = {
+                'state': np.random.randn(1500),  # Mock state features
+                'timestamp': datetime.now(),
+                'type': 'cob_state'
+            }
+            
+            # Test feature extraction
+            features = self.trader._extract_features('BTC/USDT', mock_cob_data)
+            
+            assert features is not None
+            assert len(features) == 2000  # Target feature size
+            assert features.dtype == np.float32
+            assert not np.any(np.isnan(features))
+            assert not np.any(np.isinf(features))
+            
+            # Test normalization
+            assert np.abs(np.mean(features)) < 1.0  # Roughly normalized
+            assert np.std(features) < 10.0  # Not too spread out
+            
+            self.test_results['test_feature_extraction'] = {
+                'status': 'PASSED',
+                'feature_size': len(features),
+                'feature_range': [float(np.min(features)), float(np.max(features))],
+                'feature_stats': {
+                    'mean': float(np.mean(features)),
+                    'std': float(np.std(features)),
+                    'median': float(np.median(features))
+                }
+            }
+            
+            logger.info("✅ Feature extraction test PASSED")
+            
+        except Exception as e:
+            self.test_results['test_feature_extraction'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def test_inference_performance(self):
+        """Test inference speed and quality"""
+        logger.info("⚡ Testing Inference Performance...")
+        
+        try:
+            if not self.trader:
+                self.trading_executor = TradingExecutor(simulation_mode=True)
+                self.trader = RealtimeRLCOBTrader(
+                    symbols=['BTC/USDT'],
+                    trading_executor=self.trading_executor
+                )
+            
+            # Test multiple inferences
+            num_tests = 10
+            inference_times = []
+            
+            for i in range(num_tests):
+                # Create test features
+                test_features = np.random.randn(2000).astype(np.float32)
+                test_features = self.trader._normalize_features(test_features)
+                
+                # Time inference
+                start_time = time.time()
+                prediction = self.trader._predict('BTC/USDT', test_features)
+                inference_time = (time.time() - start_time) * 1000
+                
+                inference_times.append(inference_time)
+                
+                # Verify prediction structure
+                assert 'direction' in prediction
+                assert 'confidence' in prediction
+                assert 'change' in prediction
+                assert 'value' in prediction
+                
+                assert 0 <= prediction['direction'] <= 2
+                assert 0.0 <= prediction['confidence'] <= 1.0
+                assert isinstance(prediction['change'], float)
+                assert isinstance(prediction['value'], float)
+            
+            avg_inference_time = np.mean(inference_times)
+            max_inference_time = np.max(inference_times)
+            
+            # Check if inference is fast enough (target: <50ms per inference)
+            inference_target_ms = 50.0
+            
+            self.test_results['test_inference_performance'] = {
+                'status': 'PASSED' if avg_inference_time < inference_target_ms else 'WARNING',
+                'average_inference_time_ms': float(avg_inference_time),
+                'max_inference_time_ms': float(max_inference_time),
+                'target_time_ms': inference_target_ms,
+                'meets_target': avg_inference_time < inference_target_ms,
+                'inferences_per_second': 1000.0 / avg_inference_time
+            }
+            
+            logger.info(f"✅ Average inference time: {avg_inference_time:.2f}ms")
+            logger.info(f"✅ Max inference time: {max_inference_time:.2f}ms")
+            logger.info(f"✅ Inferences per second: {1000.0/avg_inference_time:.1f}")
+            
+        except Exception as e:
+            self.test_results['test_inference_performance'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def test_signal_accumulation(self):
+        """Test signal accumulation and consensus logic"""
+        logger.info("🎯 Testing Signal Accumulation...")
+        
+        try:
+            if not self.trader:
+                self.trading_executor = TradingExecutor(simulation_mode=True)
+                self.trader = RealtimeRLCOBTrader(
+                    symbols=['BTC/USDT'],
+                    trading_executor=self.trading_executor,
+                    required_confident_predictions=3
+                )
+            
+            symbol = 'BTC/USDT'
+            accumulator = self.trader.signal_accumulators[symbol]
+            
+            # Test adding signals
+            test_predictions = []
+            for i in range(5):
+                prediction = PredictionResult(
+                    timestamp=datetime.now(),
+                    symbol=symbol,
+                    predicted_direction=2,  # UP
+                    confidence=0.8,
+                    predicted_change=0.001,
+                    features=np.random.randn(2000).astype(np.float32)
+                )
+                test_predictions.append(prediction)
+                self.trader._add_signal(symbol, prediction)
+            
+            # Check accumulator state
+            assert len(accumulator.signals) == 5
+            assert accumulator.confidence_sum == 5 * 0.8
+            assert accumulator.total_predictions == 5
+            
+            # Test consensus logic (simulate processing)
+            recent_signals = list(accumulator.signals)[-3:]
+            directions = [signal.predicted_direction for signal in recent_signals]
+            
+            # All should be direction 2 (UP)
+            direction_counts = {0: 0, 1: 0, 2: 0}
+            for direction in directions:
+                direction_counts[direction] += 1
+            
+            dominant_direction = max(direction_counts, key=direction_counts.get)
+            consensus_count = direction_counts[dominant_direction]
+            
+            assert dominant_direction == 2
+            assert consensus_count == 3
+            
+            self.test_results['test_signal_accumulation'] = {
+                'status': 'PASSED',
+                'signals_added': len(accumulator.signals),
+                'confidence_sum': accumulator.confidence_sum,
+                'consensus_direction': dominant_direction,
+                'consensus_count': consensus_count
+            }
+            
+            logger.info("✅ Signal accumulation test PASSED")
+            
+        except Exception as e:
+            self.test_results['test_signal_accumulation'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def test_training_pipeline(self):
+        """Test training pipeline functionality"""
+        logger.info("🧠 Testing Training Pipeline...")
+        
+        try:
+            if not self.trader:
+                self.trading_executor = TradingExecutor(simulation_mode=True)
+                self.trader = RealtimeRLCOBTrader(
+                    symbols=['BTC/USDT'],
+                    trading_executor=self.trading_executor
+                )
+            
+            symbol = 'BTC/USDT'
+            
+            # Create mock training data
+            test_predictions = []
+            for i in range(10):
+                prediction = PredictionResult(
+                    timestamp=datetime.now(),
+                    symbol=symbol,
+                    predicted_direction=np.random.randint(0, 3),
+                    confidence=np.random.uniform(0.5, 1.0),
+                    predicted_change=np.random.uniform(-0.001, 0.001),
+                    features=np.random.randn(2000).astype(np.float32),
+                    actual_direction=np.random.randint(0, 3),
+                    actual_change=np.random.uniform(-0.001, 0.001),
+                    reward=np.random.uniform(-1.0, 1.0)
+                )
+                test_predictions.append(prediction)
+            
+            # Test training batch
+            loss = await self.trader._train_batch(symbol, test_predictions)
+            
+            assert isinstance(loss, float)
+            assert not np.isnan(loss)
+            assert not np.isinf(loss)
+            assert loss >= 0.0  # Loss should be non-negative
+            
+            self.test_results['test_training_pipeline'] = {
+                'status': 'PASSED',
+                'training_loss': float(loss),
+                'batch_size': len(test_predictions),
+                'training_successful': True
+            }
+            
+            logger.info(f"✅ Training pipeline test PASSED (loss: {loss:.6f})")
+            
+        except Exception as e:
+            self.test_results['test_training_pipeline'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def test_trading_integration(self):
+        """Test integration with trading executor"""
+        logger.info("💰 Testing Trading Integration...")
+        
+        try:
+            # Initialize with simulation mode
+            trading_executor = TradingExecutor(simulation_mode=True)
+            
+            # Test signal execution
+            success = trading_executor.execute_signal(
+                symbol='BTC/USDT',
+                action='BUY',
+                confidence=0.8,
+                current_price=50000.0
+            )
+            
+            # In simulation mode, this should always succeed
+            assert success == True
+            
+            # Check positions
+            positions = trading_executor.get_positions()
+            assert 'BTC/USDT' in positions
+            
+            # Test sell signal
+            success = trading_executor.execute_signal(
+                symbol='BTC/USDT',
+                action='SELL',
+                confidence=0.8,
+                current_price=50100.0
+            )
+            
+            assert success == True
+            
+            # Check trade history
+            trade_history = trading_executor.get_trade_history()
+            assert len(trade_history) > 0
+            
+            last_trade = trade_history[-1]
+            assert last_trade.symbol == 'BTC/USDT'
+            assert last_trade.pnl != 0  # Should have some P&L
+            
+            self.test_results['test_trading_integration'] = {
+                'status': 'PASSED',
+                'simulation_mode': True,
+                'trades_executed': len(trade_history),
+                'last_trade_pnl': float(last_trade.pnl)
+            }
+            
+            logger.info("✅ Trading integration test PASSED")
+            
+        except Exception as e:
+            self.test_results['test_trading_integration'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def test_performance_monitoring(self):
+        """Test performance monitoring and statistics"""
+        logger.info("📊 Testing Performance Monitoring...")
+        
+        try:
+            if not self.trader:
+                self.trading_executor = TradingExecutor(simulation_mode=True)
+                self.trader = RealtimeRLCOBTrader(
+                    symbols=['BTC/USDT', 'ETH/USDT'],
+                    trading_executor=self.trading_executor
+                )
+            
+            # Get performance stats
+            stats = self.trader.get_performance_stats()
+            
+            # Verify structure
+            assert 'symbols' in stats
+            assert 'training_stats' in stats
+            assert 'inference_stats' in stats
+            assert 'signal_stats' in stats
+            assert 'model_info' in stats
+            
+            # Check symbols
+            assert 'BTC/USDT' in stats['symbols']
+            assert 'ETH/USDT' in stats['symbols']
+            
+            # Check model info
+            for symbol in stats['symbols']:
+                assert symbol in stats['model_info']
+                model_info = stats['model_info'][symbol]
+                assert 'total_parameters' in model_info
+                assert 'trainable_parameters' in model_info
+                assert model_info['total_parameters'] > 0
+            
+            self.test_results['test_performance_monitoring'] = {
+                'status': 'PASSED',
+                'stats_structure_valid': True,
+                'symbols_tracked': len(stats['symbols']),
+                'model_info_available': len(stats['model_info'])
+            }
+            
+            logger.info("✅ Performance monitoring test PASSED")
+            
+        except Exception as e:
+            self.test_results['test_performance_monitoring'] = {'status': 'FAILED', 'error': str(e)}
+            raise
+    
+    async def generate_test_report(self):
+        """Generate comprehensive test report"""
+        logger.info("=" * 60)
+        logger.info("REAL-TIME RL COB TRADER TEST REPORT")
+        logger.info("=" * 60)
+        
+        total_tests = len(self.test_results)
+        passed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'PASSED')
+        failed_tests = sum(1 for result in self.test_results.values() if result['status'] == 'FAILED')
+        warning_tests = sum(1 for result in self.test_results.values() if result['status'] == 'WARNING')
+        
+        logger.info(f"📊 Test Summary:")
+        logger.info(f"   Total Tests: {total_tests}")
+        logger.info(f"   ✅ Passed: {passed_tests}")
+        logger.info(f"   ⚠️ Warnings: {warning_tests}")
+        logger.info(f"   ❌ Failed: {failed_tests}")
+        
+        success_rate = (passed_tests / total_tests) * 100 if total_tests > 0 else 0
+        logger.info(f"   Success Rate: {success_rate:.1f}%")
+        
+        logger.info("\n📋 Detailed Results:")
+        for test_name, result in self.test_results.items():
+            status_icon = "✅" if result['status'] == 'PASSED' else "⚠️" if result['status'] == 'WARNING' else "❌"
+            logger.info(f"   {status_icon} {test_name}: {result['status']}")
+            
+            if result['status'] == 'FAILED':
+                logger.error(f"      Error: {result.get('error', 'Unknown error')}")
+        
+        # System readiness assessment
+        logger.info("\n🎯 System Readiness Assessment:")
+        if failed_tests == 0:
+            if warning_tests == 0:
+                logger.info("   🟢 SYSTEM READY FOR DEPLOYMENT")
+                logger.info("   All tests passed. The real-time RL COB trader is ready for live operation.")
+            else:
+                logger.info("   🟡 SYSTEM READY WITH WARNINGS")
+                logger.info("   System is functional but some performance warnings exist.")
+        else:
+            logger.info("   🔴 SYSTEM NOT READY")
+            logger.info("   Critical issues found. Fix errors before deployment.")
+        
+        # Save detailed report
+        report_data = {
+            'timestamp': datetime.now().isoformat(),
+            'test_summary': {
+                'total_tests': total_tests,
+                'passed_tests': passed_tests,
+                'warning_tests': warning_tests,
+                'failed_tests': failed_tests,
+                'success_rate': success_rate
+            },
+            'test_results': self.test_results,
+            'system_readiness': 'READY' if failed_tests == 0 else 'NOT_READY'
+        }
+        
+        report_file = f"test_reports/realtime_rl_test_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+        
+        import os
+        os.makedirs('test_reports', exist_ok=True)
+        
+        with open(report_file, 'w') as f:
+            json.dump(report_data, f, indent=2, default=str)
+        
+        logger.info(f"\n📄 Detailed report saved to: {report_file}")
+        logger.info("=" * 60)
+
+async def main():
+    """Main test entry point"""
+    logger.info("Starting Real-time RL COB Trader Test Suite...")
+    
+    tester = RealtimeRLTester()
+    await tester.run_all_tests()
+
+if __name__ == "__main__":
+    # Set event loop policy for Windows compatibility
+    if hasattr(asyncio, 'WindowsProactorEventLoopPolicy'):
+        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+    
+    asyncio.run(main())