""" Enhanced Reward System Integration Example This example demonstrates how to integrate the new MSE-based reward system with the existing trading orchestrator and models. Usage: python examples/enhanced_reward_system_example.py This example shows: 1. How to integrate the enhanced reward system with TradingOrchestrator 2. How to add predictions from existing models 3. How to monitor accuracy and training statistics 4. How the system handles multi-timeframe predictions and training """ import asyncio import logging import time from datetime import datetime # Import the integration components from core.enhanced_reward_system_integration import ( integrate_enhanced_rewards, start_enhanced_rewards_for_orchestrator, add_prediction_to_enhanced_rewards ) # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) async def demonstrate_enhanced_reward_integration(): """Demonstrate the enhanced reward system integration""" print("=" * 80) print("ENHANCED REWARD SYSTEM INTEGRATION DEMONSTRATION") print("=" * 80) # Note: This is a demonstration - in real usage, you would use your actual orchestrator # For this example, we'll create a mock orchestrator print("\n1. Setting up mock orchestrator...") mock_orchestrator = create_mock_orchestrator() print("\n2. Integrating enhanced reward system...") # This is the main integration step - just one line! enhanced_rewards = integrate_enhanced_rewards(mock_orchestrator, ['ETH/USDT', 'BTC/USDT']) print("\n3. Starting enhanced reward system...") await start_enhanced_rewards_for_orchestrator(mock_orchestrator) print("\n4. System is now running with enhanced rewards!") print(" - CNN predictions every 10 seconds (current rate)") print(" - Continuous inference every 5 seconds") print(" - Hourly multi-timeframe inference (4 predictions per hour)") print(" - Real-time MSE-based reward calculation") print(" - Automatic training when predictions are evaluated") # Demonstrate adding predictions from existing models await demonstrate_prediction_tracking(mock_orchestrator) # Demonstrate monitoring and statistics await demonstrate_monitoring(mock_orchestrator) # Demonstrate force evaluation for testing await demonstrate_force_evaluation(mock_orchestrator) print("\n8. Stopping enhanced reward system...") await mock_orchestrator.enhanced_reward_system.stop_integration() print("\nāœ… Enhanced Reward System demonstration completed successfully!") print("\nTo integrate with your actual system:") print("1. Add these imports to your orchestrator file") print("2. Call integrate_enhanced_rewards(your_orchestrator) in __init__") print("3. Call await start_enhanced_rewards_for_orchestrator(your_orchestrator) in run()") print("4. Use add_prediction_to_enhanced_rewards() in your model inference code") async def demonstrate_prediction_tracking(orchestrator): """Demonstrate how to track predictions from existing models""" print("\n5. Demonstrating prediction tracking...") # Simulate predictions from different models and timeframes predictions = [ # CNN predictions for multiple timeframes ('ETH/USDT', '1s', 3150.50, 1, 0.85, 3150.00, 'enhanced_cnn'), ('ETH/USDT', '1m', 3155.00, 1, 0.78, 3150.00, 'enhanced_cnn'), ('ETH/USDT', '1h', 3200.00, 1, 0.72, 3150.00, 'enhanced_cnn'), ('ETH/USDT', '1d', 3300.00, 1, 0.65, 3150.00, 'enhanced_cnn'), # DQN predictions ('ETH/USDT', '1s', 3149.00, -1, 0.70, 3150.00, 'dqn_agent'), ('BTC/USDT', '1s', 51200.00, 1, 0.75, 51150.00, 'dqn_agent'), # COB RL predictions ('ETH/USDT', '1s', 3151.20, 1, 0.88, 3150.00, 'cob_rl'), ('BTC/USDT', '1s', 51180.00, 1, 0.82, 51150.00, 'cob_rl'), ] prediction_ids = [] for symbol, timeframe, pred_price, direction, confidence, curr_price, model in predictions: prediction_id = add_prediction_to_enhanced_rewards( orchestrator, symbol, timeframe, pred_price, direction, confidence, curr_price, model ) prediction_ids.append(prediction_id) print(f" āœ“ Added prediction: {model} predicts {symbol} {timeframe} " f"direction={direction} confidence={confidence:.2f}") print(f" šŸ“Š Total predictions added: {len(prediction_ids)}") async def demonstrate_monitoring(orchestrator): """Demonstrate monitoring and statistics""" print("\n6. Demonstrating monitoring and statistics...") # Wait a bit for some processing await asyncio.sleep(2) # Get integration statistics stats = orchestrator.enhanced_reward_system.get_integration_statistics() print(" šŸ“ˆ Integration Statistics:") print(f" - System running: {stats.get('is_running', False)}") print(f" - Start time: {stats.get('start_time', 'N/A')}") print(f" - Predictions tracked: {stats.get('total_predictions_tracked', 0)}") # Get accuracy summary accuracy = orchestrator.enhanced_reward_system.get_model_accuracy() print("\n šŸŽÆ Accuracy Summary by Symbol and Timeframe:") for symbol, timeframes in accuracy.items(): print(f" - {symbol}:") for timeframe, metrics in timeframes.items(): print(f" - {timeframe}: {metrics['total_predictions']} predictions, " f"{metrics['direction_accuracy']:.1f}% accuracy") async def demonstrate_force_evaluation(orchestrator): """Demonstrate force evaluation for testing""" print("\n7. Demonstrating force evaluation for testing...") # Simulate some price changes by updating prices print(" šŸ’° Simulating price changes...") orchestrator.enhanced_reward_system.reward_calculator.update_price('ETH/USDT', 3152.50) orchestrator.enhanced_reward_system.reward_calculator.update_price('BTC/USDT', 51175.00) # Force evaluation of all predictions print(" ⚔ Force evaluating all predictions...") orchestrator.enhanced_reward_system.force_evaluation_and_training() # Get updated statistics await asyncio.sleep(1) stats = orchestrator.enhanced_reward_system.get_integration_statistics() print(" šŸ“Š Updated statistics after evaluation:") accuracy = orchestrator.enhanced_reward_system.get_model_accuracy() total_evaluated = sum( sum(tf_data['total_predictions'] for tf_data in symbol_data.values()) for symbol_data in accuracy.values() ) print(f" - Total predictions evaluated: {total_evaluated}") def create_mock_orchestrator(): """Create a mock orchestrator for demonstration purposes""" class MockDataProvider: def __init__(self): self.current_prices = { 'ETH/USDT': 3150.00, 'BTC/USDT': 51150.00 } class MockOrchestrator: def __init__(self): self.data_provider = MockDataProvider() # Add other mock attributes as needed return MockOrchestrator() def show_integration_instructions(): """Show step-by-step integration instructions""" print("\n" + "=" * 80) print("INTEGRATION INSTRUCTIONS FOR YOUR ACTUAL SYSTEM") print("=" * 80) print(""" To integrate the enhanced reward system with your actual TradingOrchestrator: 1. ADD IMPORTS to your orchestrator.py: ```python from core.enhanced_reward_system_integration import ( integrate_enhanced_rewards, add_prediction_to_enhanced_rewards ) ``` 2. INTEGRATE in TradingOrchestrator.__init__(): ```python # Add this line in your __init__ method integrate_enhanced_rewards(self, symbols=['ETH/USDT', 'BTC/USDT']) ``` 3. START in TradingOrchestrator.run(): ```python # Add this line in your run() method, after initialization await self.enhanced_reward_system.start_integration() ``` 4. ADD PREDICTIONS in your model inference code: ```python # In your CNN/DQN/COB model inference methods, add: prediction_id = add_prediction_to_enhanced_rewards( self, # orchestrator instance symbol, # e.g., 'ETH/USDT' timeframe, # e.g., '1s', '1m', '1h', '1d' predicted_price, # model's price prediction direction, # -1 (down), 0 (neutral), 1 (up) confidence, # 0.0 to 1.0 current_price, # current market price model_name # e.g., 'enhanced_cnn', 'dqn_agent' ) ``` 5. MONITOR with: ```python # Get statistics anytime stats = self.enhanced_reward_system.get_integration_statistics() accuracy = self.enhanced_reward_system.get_model_accuracy() ``` The system will automatically: - Track predictions for multiple timeframes separately - Calculate MSE-based rewards when outcomes are available - Trigger real-time training with enhanced rewards - Maintain accuracy statistics for each model and timeframe - Handle hourly multi-timeframe inference scheduling Key Benefits: āœ… MSE-based accuracy measurement (better than simple directional accuracy) āœ… Separate tracking for up to 6 last predictions per timeframe āœ… Real-time training at each inference when outcomes available āœ… Multi-timeframe prediction support (1s, 1m, 1h, 1d) āœ… Hourly inference on all timeframes (4 predictions per hour) āœ… Models know which timeframe they're predicting on āœ… Backward compatible with existing code """) if __name__ == "__main__": # Run the demonstration asyncio.run(demonstrate_enhanced_reward_integration()) # Show integration instructions show_integration_instructions()