Files
gogo2/examples/enhanced_reward_system_example.py
2025-08-23 01:07:05 +03:00

266 lines
9.6 KiB
Python

"""
Enhanced Reward System Integration Example
This example demonstrates how to integrate the new MSE-based reward system
with the existing trading orchestrator and models.
Usage:
python examples/enhanced_reward_system_example.py
This example shows:
1. How to integrate the enhanced reward system with TradingOrchestrator
2. How to add predictions from existing models
3. How to monitor accuracy and training statistics
4. How the system handles multi-timeframe predictions and training
"""
import asyncio
import logging
import time
from datetime import datetime
# Import the integration components
from core.enhanced_reward_system_integration import (
integrate_enhanced_rewards,
start_enhanced_rewards_for_orchestrator,
add_prediction_to_enhanced_rewards
)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
async def demonstrate_enhanced_reward_integration():
"""Demonstrate the enhanced reward system integration"""
print("=" * 80)
print("ENHANCED REWARD SYSTEM INTEGRATION DEMONSTRATION")
print("=" * 80)
# Note: This is a demonstration - in real usage, you would use your actual orchestrator
# For this example, we'll create a mock orchestrator
print("\n1. Setting up mock orchestrator...")
mock_orchestrator = create_mock_orchestrator()
print("\n2. Integrating enhanced reward system...")
# This is the main integration step - just one line!
enhanced_rewards = integrate_enhanced_rewards(mock_orchestrator, ['ETH/USDT', 'BTC/USDT'])
print("\n3. Starting enhanced reward system...")
await start_enhanced_rewards_for_orchestrator(mock_orchestrator)
print("\n4. System is now running with enhanced rewards!")
print(" - CNN predictions every 10 seconds (current rate)")
print(" - Continuous inference every 5 seconds")
print(" - Hourly multi-timeframe inference (4 predictions per hour)")
print(" - Real-time MSE-based reward calculation")
print(" - Automatic training when predictions are evaluated")
# Demonstrate adding predictions from existing models
await demonstrate_prediction_tracking(mock_orchestrator)
# Demonstrate monitoring and statistics
await demonstrate_monitoring(mock_orchestrator)
# Demonstrate force evaluation for testing
await demonstrate_force_evaluation(mock_orchestrator)
print("\n8. Stopping enhanced reward system...")
await mock_orchestrator.enhanced_reward_system.stop_integration()
print("\n✅ Enhanced Reward System demonstration completed successfully!")
print("\nTo integrate with your actual system:")
print("1. Add these imports to your orchestrator file")
print("2. Call integrate_enhanced_rewards(your_orchestrator) in __init__")
print("3. Call await start_enhanced_rewards_for_orchestrator(your_orchestrator) in run()")
print("4. Use add_prediction_to_enhanced_rewards() in your model inference code")
async def demonstrate_prediction_tracking(orchestrator):
"""Demonstrate how to track predictions from existing models"""
print("\n5. Demonstrating prediction tracking...")
# Simulate predictions from different models and timeframes
predictions = [
# CNN predictions for multiple timeframes
('ETH/USDT', '1s', 3150.50, 1, 0.85, 3150.00, 'enhanced_cnn'),
('ETH/USDT', '1m', 3155.00, 1, 0.78, 3150.00, 'enhanced_cnn'),
('ETH/USDT', '1h', 3200.00, 1, 0.72, 3150.00, 'enhanced_cnn'),
('ETH/USDT', '1d', 3300.00, 1, 0.65, 3150.00, 'enhanced_cnn'),
# DQN predictions
('ETH/USDT', '1s', 3149.00, -1, 0.70, 3150.00, 'dqn_agent'),
('BTC/USDT', '1s', 51200.00, 1, 0.75, 51150.00, 'dqn_agent'),
# COB RL predictions
('ETH/USDT', '1s', 3151.20, 1, 0.88, 3150.00, 'cob_rl'),
('BTC/USDT', '1s', 51180.00, 1, 0.82, 51150.00, 'cob_rl'),
]
prediction_ids = []
for symbol, timeframe, pred_price, direction, confidence, curr_price, model in predictions:
prediction_id = add_prediction_to_enhanced_rewards(
orchestrator, symbol, timeframe, pred_price, direction, confidence, curr_price, model
)
prediction_ids.append(prediction_id)
print(f" ✓ Added prediction: {model} predicts {symbol} {timeframe} "
f"direction={direction} confidence={confidence:.2f}")
print(f" 📊 Total predictions added: {len(prediction_ids)}")
async def demonstrate_monitoring(orchestrator):
"""Demonstrate monitoring and statistics"""
print("\n6. Demonstrating monitoring and statistics...")
# Wait a bit for some processing
await asyncio.sleep(2)
# Get integration statistics
stats = orchestrator.enhanced_reward_system.get_integration_statistics()
print(" 📈 Integration Statistics:")
print(f" - System running: {stats.get('is_running', False)}")
print(f" - Start time: {stats.get('start_time', 'N/A')}")
print(f" - Predictions tracked: {stats.get('total_predictions_tracked', 0)}")
# Get accuracy summary
accuracy = orchestrator.enhanced_reward_system.get_model_accuracy()
print("\n 🎯 Accuracy Summary by Symbol and Timeframe:")
for symbol, timeframes in accuracy.items():
print(f" - {symbol}:")
for timeframe, metrics in timeframes.items():
print(f" - {timeframe}: {metrics['total_predictions']} predictions, "
f"{metrics['direction_accuracy']:.1f}% accuracy")
async def demonstrate_force_evaluation(orchestrator):
"""Demonstrate force evaluation for testing"""
print("\n7. Demonstrating force evaluation for testing...")
# Simulate some price changes by updating prices
print(" 💰 Simulating price changes...")
orchestrator.enhanced_reward_system.reward_calculator.update_price('ETH/USDT', 3152.50)
orchestrator.enhanced_reward_system.reward_calculator.update_price('BTC/USDT', 51175.00)
# Force evaluation of all predictions
print(" ⚡ Force evaluating all predictions...")
orchestrator.enhanced_reward_system.force_evaluation_and_training()
# Get updated statistics
await asyncio.sleep(1)
stats = orchestrator.enhanced_reward_system.get_integration_statistics()
print(" 📊 Updated statistics after evaluation:")
accuracy = orchestrator.enhanced_reward_system.get_model_accuracy()
total_evaluated = sum(
sum(tf_data['total_predictions'] for tf_data in symbol_data.values())
for symbol_data in accuracy.values()
)
print(f" - Total predictions evaluated: {total_evaluated}")
def create_mock_orchestrator():
"""Create a mock orchestrator for demonstration purposes"""
class MockDataProvider:
def __init__(self):
self.current_prices = {
'ETH/USDT': 3150.00,
'BTC/USDT': 51150.00
}
class MockOrchestrator:
def __init__(self):
self.data_provider = MockDataProvider()
# Add other mock attributes as needed
return MockOrchestrator()
def show_integration_instructions():
"""Show step-by-step integration instructions"""
print("\n" + "=" * 80)
print("INTEGRATION INSTRUCTIONS FOR YOUR ACTUAL SYSTEM")
print("=" * 80)
print("""
To integrate the enhanced reward system with your actual TradingOrchestrator:
1. ADD IMPORTS to your orchestrator.py:
```python
from core.enhanced_reward_system_integration import (
integrate_enhanced_rewards,
add_prediction_to_enhanced_rewards
)
```
2. INTEGRATE in TradingOrchestrator.__init__():
```python
# Add this line in your __init__ method
integrate_enhanced_rewards(self, symbols=['ETH/USDT', 'BTC/USDT'])
```
3. START in TradingOrchestrator.run():
```python
# Add this line in your run() method, after initialization
await self.enhanced_reward_system.start_integration()
```
4. ADD PREDICTIONS in your model inference code:
```python
# In your CNN/DQN/COB model inference methods, add:
prediction_id = add_prediction_to_enhanced_rewards(
self, # orchestrator instance
symbol, # e.g., 'ETH/USDT'
timeframe, # e.g., '1s', '1m', '1h', '1d'
predicted_price, # model's price prediction
direction, # -1 (down), 0 (neutral), 1 (up)
confidence, # 0.0 to 1.0
current_price, # current market price
model_name # e.g., 'enhanced_cnn', 'dqn_agent'
)
```
5. MONITOR with:
```python
# Get statistics anytime
stats = self.enhanced_reward_system.get_integration_statistics()
accuracy = self.enhanced_reward_system.get_model_accuracy()
```
The system will automatically:
- Track predictions for multiple timeframes separately
- Calculate MSE-based rewards when outcomes are available
- Trigger real-time training with enhanced rewards
- Maintain accuracy statistics for each model and timeframe
- Handle hourly multi-timeframe inference scheduling
Key Benefits:
✅ MSE-based accuracy measurement (better than simple directional accuracy)
✅ Separate tracking for up to 6 last predictions per timeframe
✅ Real-time training at each inference when outcomes available
✅ Multi-timeframe prediction support (1s, 1m, 1h, 1d)
✅ Hourly inference on all timeframes (4 predictions per hour)
✅ Models know which timeframe they're predicting on
✅ Backward compatible with existing code
""")
if __name__ == "__main__":
# Run the demonstration
asyncio.run(demonstrate_enhanced_reward_integration())
# Show integration instructions
show_integration_instructions()