gogo2/run_clean_dashboard.py

#!/usr/bin/env python3
"""
Clean Trading Dashboard Runner with Enhanced Stability and Error Handling
"""

import sys
import logging
import traceback
import gc
import time
import psutil
import torch
from pathlib import Path

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def clear_gpu_memory():
    """Clear GPU memory cache"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

def check_system_resources():
    """Check if system has enough resources"""
    available_ram = psutil.virtual_memory().available / 1024**3
    if available_ram < 2.0:  # Less than 2GB available
        logger.warning(f"Low RAM: {available_ram:.1f} GB available")
        gc.collect()
        clear_gpu_memory()
        return False
    return True

def run_dashboard_with_recovery():
    """Run dashboard with automatic error recovery"""
    max_retries = 3
    retry_count = 0

    while retry_count < max_retries:
        try:
            logger.info(f"Starting Clean Trading Dashboard (attempt {retry_count + 1}/{max_retries})")

            # Check system resources
            if not check_system_resources():
                logger.warning("System resources low, waiting 30 seconds...")
                time.sleep(30)
                continue

            # Import here to avoid memory issues on restart
            from core.data_provider import DataProvider
            from core.orchestrator import TradingOrchestrator
            from core.trading_executor import TradingExecutor
            from web.clean_dashboard import create_clean_dashboard

            logger.info("Creating data provider...")
            data_provider = DataProvider()

            logger.info("Creating trading orchestrator...")
            orchestrator = TradingOrchestrator(
                data_provider=data_provider,
                enhanced_rl_training=True
            )

            logger.info("Creating trading executor...")
            trading_executor = TradingExecutor()

            logger.info("Creating clean dashboard...")
            dashboard = create_clean_dashboard(data_provider, orchestrator, trading_executor)

            logger.info("Dashboard created successfully")
            logger.info("=== Clean Trading Dashboard Status ===")
            logger.info("- Data Provider: Active")
            logger.info("- Trading Orchestrator: Active")
            logger.info("- Trading Executor: Active")
            logger.info("- Enhanced Training: Active")
            logger.info("- Dashboard: Ready")
            logger.info("=======================================")

            # Start the dashboard server with error handling
            try:
                logger.info("Starting dashboard server on http://127.0.0.1:8050")
                dashboard.run_server(host='127.0.0.1', port=8050, debug=False)
            except KeyboardInterrupt:
                logger.info("Dashboard stopped by user")
                break
            except Exception as e:
                logger.error(f"Dashboard server error: {e}")
                logger.error(traceback.format_exc())
                raise

        except Exception as e:
            logger.error(f"Critical error in dashboard: {e}")
            logger.error(traceback.format_exc())

            retry_count += 1
            if retry_count < max_retries:
                logger.info(f"Attempting recovery... ({retry_count}/{max_retries})")

                # Cleanup
                gc.collect()
                clear_gpu_memory()

                # Wait before retry
                wait_time = 30 * retry_count  # Exponential backoff
                logger.info(f"Waiting {wait_time} seconds before retry...")
                time.sleep(wait_time)
            else:
                logger.error("Max retries reached. Exiting.")
                sys.exit(1)

if __name__ == "__main__":
    try:
        run_dashboard_with_recovery()
    except KeyboardInterrupt:
        logger.info("Application stopped by user")
        sys.exit(0)
    except Exception as e:
        logger.error(f"Fatal error: {e}")
        logger.error(traceback.format_exc())
        sys.exit(1)