#!/usr/bin/env python3 """ Clean Trading Dashboard Runner with Enhanced Stability and Error Handling """ # Ensure we run with the project's virtual environment Python try: import os import sys from pathlib import Path import platform def _ensure_project_venv(): try: project_root = Path(__file__).resolve().parent if platform.system().lower().startswith('win'): venv_python = project_root / 'venv' / 'Scripts' / 'python.exe' else: venv_python = project_root / 'venv' / 'bin' / 'python' if venv_python.exists(): current = Path(sys.executable).resolve() target = venv_python.resolve() if current != target: os.execv(str(target), [str(target), *sys.argv]) except Exception: # If anything goes wrong, continue with current interpreter pass _ensure_project_venv() except Exception: pass import sys import logging import traceback import gc import time import psutil import torch from pathlib import Path # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def clear_gpu_memory(): """Clear GPU memory cache""" if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.synchronize() def check_system_resources(): """Check if system has enough resources""" available_ram = psutil.virtual_memory().available / 1024**3 if available_ram < 2.0: # Less than 2GB available logger.warning(f"Low RAM: {available_ram:.1f} GB available") gc.collect() clear_gpu_memory() return False return True def kill_existing_dashboard_processes(): """Kill any existing dashboard processes and free port 8050""" import subprocess import signal try: # Find processes using port 8050 logger.info("Checking for processes using port 8050...") # Method 1: Use lsof to find processes using port 8050 try: result = subprocess.run(['lsof', '-ti', ':8050'], capture_output=True, text=True, timeout=10) if result.returncode == 0 and result.stdout.strip(): pids = result.stdout.strip().split('\n') logger.info(f"Found processes using port 8050: {pids}") for pid in pids: if pid.strip(): try: logger.info(f"Killing process {pid}") os.kill(int(pid), signal.SIGTERM) time.sleep(1) # Force kill if still running os.kill(int(pid), signal.SIGKILL) except (ProcessLookupError, ValueError) as e: logger.debug(f"Process {pid} already terminated: {e}") except Exception as e: logger.warning(f"Error killing process {pid}: {e}") except (subprocess.TimeoutExpired, FileNotFoundError): logger.debug("lsof not available or timed out") # Method 2: Use ps and grep to find Python processes try: result = subprocess.run(['ps', 'aux'], capture_output=True, text=True, timeout=10) if result.returncode == 0: lines = result.stdout.split('\n') for line in lines: if 'run_clean_dashboard' in line or 'clean_dashboard' in line: parts = line.split() if len(parts) > 1: pid = parts[1] try: logger.info(f"Killing dashboard process {pid}") os.kill(int(pid), signal.SIGTERM) time.sleep(1) os.kill(int(pid), signal.SIGKILL) except (ProcessLookupError, ValueError) as e: logger.debug(f"Process {pid} already terminated: {e}") except Exception as e: logger.warning(f"Error killing process {pid}: {e}") except (subprocess.TimeoutExpired, FileNotFoundError): logger.debug("ps not available or timed out") # Method 3: Use netstat to find processes using port 8050 try: result = subprocess.run(['netstat', '-tlnp'], capture_output=True, text=True, timeout=10) if result.returncode == 0: lines = result.stdout.split('\n') for line in lines: if ':8050' in line and 'LISTEN' in line: parts = line.split() if len(parts) > 6: pid_part = parts[6] if '/' in pid_part: pid = pid_part.split('/')[0] try: logger.info(f"Killing process {pid} using port 8050") os.kill(int(pid), signal.SIGTERM) time.sleep(1) os.kill(int(pid), signal.SIGKILL) except (ProcessLookupError, ValueError) as e: logger.debug(f"Process {pid} already terminated: {e}") except Exception as e: logger.warning(f"Error killing process {pid}: {e}") except (subprocess.TimeoutExpired, FileNotFoundError): logger.debug("netstat not available or timed out") # Wait a bit for processes to fully terminate time.sleep(2) # Verify port is free try: result = subprocess.run(['lsof', '-ti', ':8050'], capture_output=True, text=True, timeout=5) if result.returncode == 0 and result.stdout.strip(): logger.warning("Port 8050 still in use after cleanup") return False else: logger.info("Port 8050 is now free") return True except (subprocess.TimeoutExpired, FileNotFoundError): logger.info("Port 8050 cleanup verification skipped") return True except Exception as e: logger.error(f"Error during process cleanup: {e}") return False def check_port_availability(port=8050): """Check if a port is available""" import socket try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(('127.0.0.1', port)) return True except OSError: return False def run_dashboard_with_recovery(): """Run dashboard with automatic error recovery""" max_retries = 3 retry_count = 0 while retry_count < max_retries: try: logger.info(f"Starting Clean Trading Dashboard (attempt {retry_count + 1}/{max_retries})") # Clean up existing processes and free port 8050 if not check_port_availability(8050): logger.info("Port 8050 is in use, cleaning up existing processes...") if not kill_existing_dashboard_processes(): logger.warning("Failed to free port 8050, waiting 10 seconds...") time.sleep(10) continue # Check system resources if not check_system_resources(): logger.warning("System resources low, waiting 30 seconds...") time.sleep(30) continue # Import here to avoid memory issues on restart from core.data_provider import DataProvider from core.orchestrator import TradingOrchestrator from core.trading_executor import TradingExecutor from web.clean_dashboard import create_clean_dashboard from data_stream_monitor import get_data_stream_monitor logger.info("Creating data provider...") data_provider = DataProvider() logger.info("Creating trading orchestrator...") orchestrator = TradingOrchestrator( data_provider=data_provider, enhanced_rl_training=True ) logger.info("Creating trading executor...") trading_executor = TradingExecutor() logger.info("Creating clean dashboard...") dashboard = create_clean_dashboard(data_provider, orchestrator, trading_executor) # Initialize data stream monitor for model input capture (managed by orchestrator) logger.info("Data stream is managed by orchestrator; no separate control needed") try: status = orchestrator.get_data_stream_status() logger.info(f"Data Stream: connected={status.get('connected')} streaming={status.get('streaming')}") except Exception: pass logger.info("Dashboard created successfully") logger.info("=== Clean Trading Dashboard Status ===") logger.info("- Data Provider: Active") logger.info("- Trading Orchestrator: Active") logger.info("- Trading Executor: Active") logger.info("- Enhanced Training: Active") logger.info("- Data Stream Monitor: Active") logger.info("- Dashboard: Ready") logger.info("=======================================") # Start the dashboard server with error handling try: logger.info("Starting dashboard server on http://127.0.0.1:8050") dashboard.run_server(host='127.0.0.1', port=8050, debug=False) except KeyboardInterrupt: logger.info("Dashboard stopped by user") break except Exception as e: logger.error(f"Dashboard server error: {e}") logger.error(traceback.format_exc()) raise except Exception as e: logger.error(f"Critical error in dashboard: {e}") logger.error(traceback.format_exc()) retry_count += 1 if retry_count < max_retries: logger.info(f"Attempting recovery... ({retry_count}/{max_retries})") # Cleanup gc.collect() clear_gpu_memory() # Wait before retry wait_time = 30 * retry_count # Exponential backoff logger.info(f"Waiting {wait_time} seconds before retry...") time.sleep(wait_time) else: logger.error("Max retries reached. Exiting.") sys.exit(1) if __name__ == "__main__": try: run_dashboard_with_recovery() except KeyboardInterrupt: logger.info("Application stopped by user") sys.exit(0) except Exception as e: logger.error(f"Fatal error: {e}") logger.error(traceback.format_exc()) sys.exit(1)