gogo2/examples/enhanced_reward_system_example.py

"""
Enhanced Reward System Integration Example

This example demonstrates how to integrate the new MSE-based reward system
with the existing trading orchestrator and models.

Usage:
    python examples/enhanced_reward_system_example.py

This example shows:
1. How to integrate the enhanced reward system with TradingOrchestrator
2. How to add predictions from existing models
3. How to monitor accuracy and training statistics
4. How the system handles multi-timeframe predictions and training
"""

import asyncio
import logging
import time
from datetime import datetime

# Import the integration components
from core.enhanced_reward_system_integration import (
    integrate_enhanced_rewards,
    start_enhanced_rewards_for_orchestrator,
    add_prediction_to_enhanced_rewards
)

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


async def demonstrate_enhanced_reward_integration():
    """Demonstrate the enhanced reward system integration"""

    print("=" * 80)
    print("ENHANCED REWARD SYSTEM INTEGRATION DEMONSTRATION")
    print("=" * 80)

    # Note: This is a demonstration - in real usage, you would use your actual orchestrator
    # For this example, we'll create a mock orchestrator

    print("\n1. Setting up mock orchestrator...")
    mock_orchestrator = create_mock_orchestrator()

    print("\n2. Integrating enhanced reward system...")
    # This is the main integration step - just one line!
    enhanced_rewards = integrate_enhanced_rewards(mock_orchestrator, ['ETH/USDT', 'BTC/USDT'])

    print("\n3. Starting enhanced reward system...")
    await start_enhanced_rewards_for_orchestrator(mock_orchestrator)

    print("\n4. System is now running with enhanced rewards!")
    print("   - CNN predictions every 10 seconds (current rate)")
    print("   - Continuous inference every 5 seconds")
    print("   - Hourly multi-timeframe inference (4 predictions per hour)")
    print("   - Real-time MSE-based reward calculation")
    print("   - Automatic training when predictions are evaluated")

    # Demonstrate adding predictions from existing models
    await demonstrate_prediction_tracking(mock_orchestrator)

    # Demonstrate monitoring and statistics
    await demonstrate_monitoring(mock_orchestrator)

    # Demonstrate force evaluation for testing
    await demonstrate_force_evaluation(mock_orchestrator)

    print("\n8. Stopping enhanced reward system...")
    await mock_orchestrator.enhanced_reward_system.stop_integration()

    print("\n✅ Enhanced Reward System demonstration completed successfully!")
    print("\nTo integrate with your actual system:")
    print("1. Add these imports to your orchestrator file")
    print("2. Call integrate_enhanced_rewards(your_orchestrator) in __init__")
    print("3. Call await start_enhanced_rewards_for_orchestrator(your_orchestrator) in run()")
    print("4. Use add_prediction_to_enhanced_rewards() in your model inference code")


async def demonstrate_prediction_tracking(orchestrator):
    """Demonstrate how to track predictions from existing models"""

    print("\n5. Demonstrating prediction tracking...")

    # Simulate predictions from different models and timeframes
    predictions = [
        # CNN predictions for multiple timeframes
        ('ETH/USDT', '1s', 3150.50, 1, 0.85, 3150.00, 'enhanced_cnn'),
        ('ETH/USDT', '1m', 3155.00, 1, 0.78, 3150.00, 'enhanced_cnn'),
        ('ETH/USDT', '1h', 3200.00, 1, 0.72, 3150.00, 'enhanced_cnn'),
        ('ETH/USDT', '1d', 3300.00, 1, 0.65, 3150.00, 'enhanced_cnn'),

        # DQN predictions
        ('ETH/USDT', '1s', 3149.00, -1, 0.70, 3150.00, 'dqn_agent'),
        ('BTC/USDT', '1s', 51200.00, 1, 0.75, 51150.00, 'dqn_agent'),

        # COB RL predictions
        ('ETH/USDT', '1s', 3151.20, 1, 0.88, 3150.00, 'cob_rl'),
        ('BTC/USDT', '1s', 51180.00, 1, 0.82, 51150.00, 'cob_rl'),
    ]

    prediction_ids = []
    for symbol, timeframe, pred_price, direction, confidence, curr_price, model in predictions:
        prediction_id = add_prediction_to_enhanced_rewards(
            orchestrator, symbol, timeframe, pred_price, direction, confidence, curr_price, model
        )
        prediction_ids.append(prediction_id)
        print(f"   ✓ Added prediction: {model} predicts {symbol} {timeframe} "
              f"direction={direction} confidence={confidence:.2f}")

    print(f"   📊 Total predictions added: {len(prediction_ids)}")


async def demonstrate_monitoring(orchestrator):
    """Demonstrate monitoring and statistics"""

    print("\n6. Demonstrating monitoring and statistics...")

    # Wait a bit for some processing
    await asyncio.sleep(2)

    # Get integration statistics
    stats = orchestrator.enhanced_reward_system.get_integration_statistics()

    print("   📈 Integration Statistics:")
    print(f"   - System running: {stats.get('is_running', False)}")
    print(f"   - Start time: {stats.get('start_time', 'N/A')}")
    print(f"   - Predictions tracked: {stats.get('total_predictions_tracked', 0)}")

    # Get accuracy summary
    accuracy = orchestrator.enhanced_reward_system.get_model_accuracy()
    print("\n   🎯 Accuracy Summary by Symbol and Timeframe:")
    for symbol, timeframes in accuracy.items():
        print(f"   - {symbol}:")
        for timeframe, metrics in timeframes.items():
            print(f"     - {timeframe}: {metrics['total_predictions']} predictions, "
                  f"{metrics['direction_accuracy']:.1f}% accuracy")


async def demonstrate_force_evaluation(orchestrator):
    """Demonstrate force evaluation for testing"""

    print("\n7. Demonstrating force evaluation for testing...")

    # Simulate some price changes by updating prices
    print("   💰 Simulating price changes...")
    orchestrator.enhanced_reward_system.reward_calculator.update_price('ETH/USDT', 3152.50)
    orchestrator.enhanced_reward_system.reward_calculator.update_price('BTC/USDT', 51175.00)

    # Force evaluation of all predictions
    print("   ⚡ Force evaluating all predictions...")
    orchestrator.enhanced_reward_system.force_evaluation_and_training()

    # Get updated statistics
    await asyncio.sleep(1)
    stats = orchestrator.enhanced_reward_system.get_integration_statistics()

    print("   📊 Updated statistics after evaluation:")
    accuracy = orchestrator.enhanced_reward_system.get_model_accuracy()
    total_evaluated = sum(
        sum(tf_data['total_predictions'] for tf_data in symbol_data.values())
        for symbol_data in accuracy.values()
    )
    print(f"   - Total predictions evaluated: {total_evaluated}")


def create_mock_orchestrator():
    """Create a mock orchestrator for demonstration purposes"""

    class MockDataProvider:
        def __init__(self):
            self.current_prices = {
                'ETH/USDT': 3150.00,
                'BTC/USDT': 51150.00
            }

    class MockOrchestrator:
        def __init__(self):
            self.data_provider = MockDataProvider()
            # Add other mock attributes as needed

    return MockOrchestrator()


def show_integration_instructions():
    """Show step-by-step integration instructions"""

    print("\n" + "=" * 80)
    print("INTEGRATION INSTRUCTIONS FOR YOUR ACTUAL SYSTEM")
    print("=" * 80)

    print("""
To integrate the enhanced reward system with your actual TradingOrchestrator:

1. ADD IMPORTS to your orchestrator.py:
   ```python
   from core.enhanced_reward_system_integration import (
       integrate_enhanced_rewards,
       add_prediction_to_enhanced_rewards
   )
   ```

2. INTEGRATE in TradingOrchestrator.__init__():
   ```python
   # Add this line in your __init__ method
   integrate_enhanced_rewards(self, symbols=['ETH/USDT', 'BTC/USDT'])
   ```

3. START in TradingOrchestrator.run():
   ```python
   # Add this line in your run() method, after initialization
   await self.enhanced_reward_system.start_integration()
   ```

4. ADD PREDICTIONS in your model inference code:
   ```python
   # In your CNN/DQN/COB model inference methods, add:
   prediction_id = add_prediction_to_enhanced_rewards(
       self,  # orchestrator instance
       symbol,  # e.g., 'ETH/USDT'
       timeframe,  # e.g., '1s', '1m', '1h', '1d'
       predicted_price,  # model's price prediction
       direction,  # -1 (down), 0 (neutral), 1 (up)
       confidence,  # 0.0 to 1.0
       current_price,  # current market price
       model_name  # e.g., 'enhanced_cnn', 'dqn_agent'
   )
   ```

5. MONITOR with:
   ```python
   # Get statistics anytime
   stats = self.enhanced_reward_system.get_integration_statistics()
   accuracy = self.enhanced_reward_system.get_model_accuracy()
   ```

The system will automatically:
- Track predictions for multiple timeframes separately
- Calculate MSE-based rewards when outcomes are available
- Trigger real-time training with enhanced rewards
- Maintain accuracy statistics for each model and timeframe
- Handle hourly multi-timeframe inference scheduling

Key Benefits:
✅ MSE-based accuracy measurement (better than simple directional accuracy)
✅ Separate tracking for up to 6 last predictions per timeframe
✅ Real-time training at each inference when outcomes available
✅ Multi-timeframe prediction support (1s, 1m, 1h, 1d)
✅ Hourly inference on all timeframes (4 predictions per hour)
✅ Models know which timeframe they're predicting on
✅ Backward compatible with existing code
""")


if __name__ == "__main__":
    # Run the demonstration
    asyncio.run(demonstrate_enhanced_reward_integration())

    # Show integration instructions
    show_integration_instructions()