266 lines
9.6 KiB
Python
266 lines
9.6 KiB
Python
"""
|
|
Enhanced Reward System Integration Example
|
|
|
|
This example demonstrates how to integrate the new MSE-based reward system
|
|
with the existing trading orchestrator and models.
|
|
|
|
Usage:
|
|
python examples/enhanced_reward_system_example.py
|
|
|
|
This example shows:
|
|
1. How to integrate the enhanced reward system with TradingOrchestrator
|
|
2. How to add predictions from existing models
|
|
3. How to monitor accuracy and training statistics
|
|
4. How the system handles multi-timeframe predictions and training
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from datetime import datetime
|
|
|
|
# Import the integration components
|
|
from core.enhanced_reward_system_integration import (
|
|
integrate_enhanced_rewards,
|
|
start_enhanced_rewards_for_orchestrator,
|
|
add_prediction_to_enhanced_rewards
|
|
)
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def demonstrate_enhanced_reward_integration():
|
|
"""Demonstrate the enhanced reward system integration"""
|
|
|
|
print("=" * 80)
|
|
print("ENHANCED REWARD SYSTEM INTEGRATION DEMONSTRATION")
|
|
print("=" * 80)
|
|
|
|
# Note: This is a demonstration - in real usage, you would use your actual orchestrator
|
|
# For this example, we'll create a mock orchestrator
|
|
|
|
print("\n1. Setting up mock orchestrator...")
|
|
mock_orchestrator = create_mock_orchestrator()
|
|
|
|
print("\n2. Integrating enhanced reward system...")
|
|
# This is the main integration step - just one line!
|
|
enhanced_rewards = integrate_enhanced_rewards(mock_orchestrator, ['ETH/USDT', 'BTC/USDT'])
|
|
|
|
print("\n3. Starting enhanced reward system...")
|
|
await start_enhanced_rewards_for_orchestrator(mock_orchestrator)
|
|
|
|
print("\n4. System is now running with enhanced rewards!")
|
|
print(" - CNN predictions every 10 seconds (current rate)")
|
|
print(" - Continuous inference every 5 seconds")
|
|
print(" - Hourly multi-timeframe inference (4 predictions per hour)")
|
|
print(" - Real-time MSE-based reward calculation")
|
|
print(" - Automatic training when predictions are evaluated")
|
|
|
|
# Demonstrate adding predictions from existing models
|
|
await demonstrate_prediction_tracking(mock_orchestrator)
|
|
|
|
# Demonstrate monitoring and statistics
|
|
await demonstrate_monitoring(mock_orchestrator)
|
|
|
|
# Demonstrate force evaluation for testing
|
|
await demonstrate_force_evaluation(mock_orchestrator)
|
|
|
|
print("\n8. Stopping enhanced reward system...")
|
|
await mock_orchestrator.enhanced_reward_system.stop_integration()
|
|
|
|
print("\n✅ Enhanced Reward System demonstration completed successfully!")
|
|
print("\nTo integrate with your actual system:")
|
|
print("1. Add these imports to your orchestrator file")
|
|
print("2. Call integrate_enhanced_rewards(your_orchestrator) in __init__")
|
|
print("3. Call await start_enhanced_rewards_for_orchestrator(your_orchestrator) in run()")
|
|
print("4. Use add_prediction_to_enhanced_rewards() in your model inference code")
|
|
|
|
|
|
async def demonstrate_prediction_tracking(orchestrator):
|
|
"""Demonstrate how to track predictions from existing models"""
|
|
|
|
print("\n5. Demonstrating prediction tracking...")
|
|
|
|
# Simulate predictions from different models and timeframes
|
|
predictions = [
|
|
# CNN predictions for multiple timeframes
|
|
('ETH/USDT', '1s', 3150.50, 1, 0.85, 3150.00, 'enhanced_cnn'),
|
|
('ETH/USDT', '1m', 3155.00, 1, 0.78, 3150.00, 'enhanced_cnn'),
|
|
('ETH/USDT', '1h', 3200.00, 1, 0.72, 3150.00, 'enhanced_cnn'),
|
|
('ETH/USDT', '1d', 3300.00, 1, 0.65, 3150.00, 'enhanced_cnn'),
|
|
|
|
# DQN predictions
|
|
('ETH/USDT', '1s', 3149.00, -1, 0.70, 3150.00, 'dqn_agent'),
|
|
('BTC/USDT', '1s', 51200.00, 1, 0.75, 51150.00, 'dqn_agent'),
|
|
|
|
# COB RL predictions
|
|
('ETH/USDT', '1s', 3151.20, 1, 0.88, 3150.00, 'cob_rl'),
|
|
('BTC/USDT', '1s', 51180.00, 1, 0.82, 51150.00, 'cob_rl'),
|
|
]
|
|
|
|
prediction_ids = []
|
|
for symbol, timeframe, pred_price, direction, confidence, curr_price, model in predictions:
|
|
prediction_id = add_prediction_to_enhanced_rewards(
|
|
orchestrator, symbol, timeframe, pred_price, direction, confidence, curr_price, model
|
|
)
|
|
prediction_ids.append(prediction_id)
|
|
print(f" ✓ Added prediction: {model} predicts {symbol} {timeframe} "
|
|
f"direction={direction} confidence={confidence:.2f}")
|
|
|
|
print(f" 📊 Total predictions added: {len(prediction_ids)}")
|
|
|
|
|
|
async def demonstrate_monitoring(orchestrator):
|
|
"""Demonstrate monitoring and statistics"""
|
|
|
|
print("\n6. Demonstrating monitoring and statistics...")
|
|
|
|
# Wait a bit for some processing
|
|
await asyncio.sleep(2)
|
|
|
|
# Get integration statistics
|
|
stats = orchestrator.enhanced_reward_system.get_integration_statistics()
|
|
|
|
print(" 📈 Integration Statistics:")
|
|
print(f" - System running: {stats.get('is_running', False)}")
|
|
print(f" - Start time: {stats.get('start_time', 'N/A')}")
|
|
print(f" - Predictions tracked: {stats.get('total_predictions_tracked', 0)}")
|
|
|
|
# Get accuracy summary
|
|
accuracy = orchestrator.enhanced_reward_system.get_model_accuracy()
|
|
print("\n 🎯 Accuracy Summary by Symbol and Timeframe:")
|
|
for symbol, timeframes in accuracy.items():
|
|
print(f" - {symbol}:")
|
|
for timeframe, metrics in timeframes.items():
|
|
print(f" - {timeframe}: {metrics['total_predictions']} predictions, "
|
|
f"{metrics['direction_accuracy']:.1f}% accuracy")
|
|
|
|
|
|
async def demonstrate_force_evaluation(orchestrator):
|
|
"""Demonstrate force evaluation for testing"""
|
|
|
|
print("\n7. Demonstrating force evaluation for testing...")
|
|
|
|
# Simulate some price changes by updating prices
|
|
print(" 💰 Simulating price changes...")
|
|
orchestrator.enhanced_reward_system.reward_calculator.update_price('ETH/USDT', 3152.50)
|
|
orchestrator.enhanced_reward_system.reward_calculator.update_price('BTC/USDT', 51175.00)
|
|
|
|
# Force evaluation of all predictions
|
|
print(" ⚡ Force evaluating all predictions...")
|
|
orchestrator.enhanced_reward_system.force_evaluation_and_training()
|
|
|
|
# Get updated statistics
|
|
await asyncio.sleep(1)
|
|
stats = orchestrator.enhanced_reward_system.get_integration_statistics()
|
|
|
|
print(" 📊 Updated statistics after evaluation:")
|
|
accuracy = orchestrator.enhanced_reward_system.get_model_accuracy()
|
|
total_evaluated = sum(
|
|
sum(tf_data['total_predictions'] for tf_data in symbol_data.values())
|
|
for symbol_data in accuracy.values()
|
|
)
|
|
print(f" - Total predictions evaluated: {total_evaluated}")
|
|
|
|
|
|
def create_mock_orchestrator():
|
|
"""Create a mock orchestrator for demonstration purposes"""
|
|
|
|
class MockDataProvider:
|
|
def __init__(self):
|
|
self.current_prices = {
|
|
'ETH/USDT': 3150.00,
|
|
'BTC/USDT': 51150.00
|
|
}
|
|
|
|
class MockOrchestrator:
|
|
def __init__(self):
|
|
self.data_provider = MockDataProvider()
|
|
# Add other mock attributes as needed
|
|
|
|
return MockOrchestrator()
|
|
|
|
|
|
def show_integration_instructions():
|
|
"""Show step-by-step integration instructions"""
|
|
|
|
print("\n" + "=" * 80)
|
|
print("INTEGRATION INSTRUCTIONS FOR YOUR ACTUAL SYSTEM")
|
|
print("=" * 80)
|
|
|
|
print("""
|
|
To integrate the enhanced reward system with your actual TradingOrchestrator:
|
|
|
|
1. ADD IMPORTS to your orchestrator.py:
|
|
```python
|
|
from core.enhanced_reward_system_integration import (
|
|
integrate_enhanced_rewards,
|
|
add_prediction_to_enhanced_rewards
|
|
)
|
|
```
|
|
|
|
2. INTEGRATE in TradingOrchestrator.__init__():
|
|
```python
|
|
# Add this line in your __init__ method
|
|
integrate_enhanced_rewards(self, symbols=['ETH/USDT', 'BTC/USDT'])
|
|
```
|
|
|
|
3. START in TradingOrchestrator.run():
|
|
```python
|
|
# Add this line in your run() method, after initialization
|
|
await self.enhanced_reward_system.start_integration()
|
|
```
|
|
|
|
4. ADD PREDICTIONS in your model inference code:
|
|
```python
|
|
# In your CNN/DQN/COB model inference methods, add:
|
|
prediction_id = add_prediction_to_enhanced_rewards(
|
|
self, # orchestrator instance
|
|
symbol, # e.g., 'ETH/USDT'
|
|
timeframe, # e.g., '1s', '1m', '1h', '1d'
|
|
predicted_price, # model's price prediction
|
|
direction, # -1 (down), 0 (neutral), 1 (up)
|
|
confidence, # 0.0 to 1.0
|
|
current_price, # current market price
|
|
model_name # e.g., 'enhanced_cnn', 'dqn_agent'
|
|
)
|
|
```
|
|
|
|
5. MONITOR with:
|
|
```python
|
|
# Get statistics anytime
|
|
stats = self.enhanced_reward_system.get_integration_statistics()
|
|
accuracy = self.enhanced_reward_system.get_model_accuracy()
|
|
```
|
|
|
|
The system will automatically:
|
|
- Track predictions for multiple timeframes separately
|
|
- Calculate MSE-based rewards when outcomes are available
|
|
- Trigger real-time training with enhanced rewards
|
|
- Maintain accuracy statistics for each model and timeframe
|
|
- Handle hourly multi-timeframe inference scheduling
|
|
|
|
Key Benefits:
|
|
✅ MSE-based accuracy measurement (better than simple directional accuracy)
|
|
✅ Separate tracking for up to 6 last predictions per timeframe
|
|
✅ Real-time training at each inference when outcomes available
|
|
✅ Multi-timeframe prediction support (1s, 1m, 1h, 1d)
|
|
✅ Hourly inference on all timeframes (4 predictions per hour)
|
|
✅ Models know which timeframe they're predicting on
|
|
✅ Backward compatible with existing code
|
|
""")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run the demonstration
|
|
asyncio.run(demonstrate_enhanced_reward_integration())
|
|
|
|
# Show integration instructions
|
|
show_integration_instructions()
|
|
|