#!/usr/bin/env python3 """ Port Management Utility This script provides utilities to: 1. Find available ports in a specified range 2. Kill stale processes running on specific ports 3. Kill all debug/training instances Usage: - As a module: import port_manager and use its functions - Directly: python port_manager.py --kill-stale --min-port 6000 --max-port 7000 """ import os import sys import socket import argparse import psutil import logging import time import signal from typing import List, Tuple, Optional, Set # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger('port_manager') # Define process names to look for when killing stale instances DEBUG_PROCESS_KEYWORDS = [ 'tensorboard', 'python train_', 'realtime.py', 'train_rl_with_realtime.py' ] def is_port_in_use(port: int) -> bool: """ Check if a port is in use Args: port (int): Port number to check Returns: bool: True if port is in use, False otherwise """ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: return s.connect_ex(('localhost', port)) == 0 def find_available_port(start_port: int, end_port: int) -> Optional[int]: """ Find an available port in the specified range Args: start_port (int): Lower bound of port range end_port (int): Upper bound of port range Returns: Optional[int]: Available port number or None if no ports available """ for port in range(start_port, end_port + 1): if not is_port_in_use(port): return port return None def get_process_by_port(port: int) -> List[psutil.Process]: """ Get processes using a specific port Args: port (int): Port number to check Returns: List[psutil.Process]: List of processes using the port """ processes = [] for proc in psutil.process_iter(['pid', 'name', 'cmdline']): try: for conn in proc.connections(kind='inet'): if conn.laddr.port == port: processes.append(proc) except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): pass return processes def kill_process_by_port(port: int) -> Tuple[int, List[str]]: """ Kill processes using a specific port Args: port (int): Port number to check Returns: Tuple[int, List[str]]: Count of killed processes and their names """ processes = get_process_by_port(port) killed = [] for proc in processes: try: proc_name = " ".join(proc.cmdline()) if proc.cmdline() else proc.name() logger.info(f"Terminating process {proc.pid}: {proc_name}") proc.terminate() killed.append(proc_name) except (psutil.NoSuchProcess, psutil.AccessDenied): pass # Give processes time to terminate gracefully if processes: time.sleep(0.5) # Force kill any remaining processes for proc in processes: try: if proc.is_running(): logger.info(f"Force killing process {proc.pid}") proc.kill() except (psutil.NoSuchProcess, psutil.AccessDenied): pass return len(killed), killed def kill_stale_debug_instances() -> Tuple[int, Set[str]]: """ Kill all stale debug and training instances based on process names Returns: Tuple[int, Set[str]]: Count of killed processes and their names """ killed_count = 0 killed_procs = set() for proc in psutil.process_iter(['pid', 'name', 'cmdline']): try: cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name() # Check if this is a debug/training process we should kill if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS): logger.info(f"Terminating stale process {proc.pid}: {cmd}") proc.terminate() killed_count += 1 killed_procs.add(cmd) except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): pass # Give processes time to terminate if killed_count > 0: time.sleep(1) # Force kill any remaining processes for proc in psutil.process_iter(['pid', 'name', 'cmdline']): try: cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name() if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS) and proc.is_running(): logger.info(f"Force killing stale process {proc.pid}") proc.kill() except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): pass return killed_count, killed_procs def get_port_with_fallback(preferred_port: int, min_port: int, max_port: int) -> int: """ Try to use preferred port, fall back to any available port in range Args: preferred_port (int): Preferred port to use min_port (int): Minimum port in fallback range max_port (int): Maximum port in fallback range Returns: int: Available port number """ # First try the preferred port if not is_port_in_use(preferred_port): return preferred_port # If preferred port is in use, try to free it logger.info(f"Preferred port {preferred_port} is in use, attempting to free it") kill_count, _ = kill_process_by_port(preferred_port) if kill_count > 0 and not is_port_in_use(preferred_port): logger.info(f"Successfully freed port {preferred_port}") return preferred_port # If we couldn't free the preferred port, find another available port logger.info(f"Looking for available port in range {min_port}-{max_port}") available_port = find_available_port(min_port, max_port) if available_port: logger.info(f"Using alternative port: {available_port}") return available_port else: # If no ports are available, force kill processes in the entire range logger.warning(f"No available ports in range {min_port}-{max_port}, freeing ports") for port in range(min_port, max_port + 1): kill_process_by_port(port) # Try again available_port = find_available_port(min_port, max_port) if available_port: logger.info(f"Using port {available_port} after freeing") return available_port else: logger.error(f"Could not find available port even after freeing range {min_port}-{max_port}") raise RuntimeError(f"No available ports in range {min_port}-{max_port}") if __name__ == '__main__': parser = argparse.ArgumentParser(description='Port management utility') parser.add_argument('--kill-stale', action='store_true', help='Kill all stale debug instances') parser.add_argument('--free-port', type=int, help='Free a specific port') parser.add_argument('--find-port', action='store_true', help='Find an available port') parser.add_argument('--min-port', type=int, default=6000, help='Minimum port in range') parser.add_argument('--max-port', type=int, default=7000, help='Maximum port in range') parser.add_argument('--preferred-port', type=int, help='Preferred port to use') args = parser.parse_args() if args.kill_stale: count, procs = kill_stale_debug_instances() logger.info(f"Killed {count} stale processes") for proc in procs: logger.info(f" - {proc}") if args.free_port: count, killed = kill_process_by_port(args.free_port) logger.info(f"Killed {count} processes using port {args.free_port}") for proc in killed: logger.info(f" - {proc}") if args.find_port or args.preferred_port: preferred = args.preferred_port if args.preferred_port else args.min_port port = get_port_with_fallback(preferred, args.min_port, args.max_port) print(port) # Print only the port number for easy capture in scripts