238 lines
8.1 KiB
Python
238 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Port Management Utility
|
|
|
|
This script provides utilities to:
|
|
1. Find available ports in a specified range
|
|
2. Kill stale processes running on specific ports
|
|
3. Kill all debug/training instances
|
|
|
|
Usage:
|
|
- As a module: import port_manager and use its functions
|
|
- Directly: python port_manager.py --kill-stale --min-port 6000 --max-port 7000
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import socket
|
|
import argparse
|
|
import psutil
|
|
import logging
|
|
import time
|
|
import signal
|
|
from typing import List, Tuple, Optional, Set
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger('port_manager')
|
|
|
|
# Define process names to look for when killing stale instances
|
|
DEBUG_PROCESS_KEYWORDS = [
|
|
'tensorboard',
|
|
'python train_',
|
|
'realtime.py',
|
|
'train_rl_with_realtime.py'
|
|
]
|
|
|
|
def is_port_in_use(port: int) -> bool:
|
|
"""
|
|
Check if a port is in use
|
|
|
|
Args:
|
|
port (int): Port number to check
|
|
|
|
Returns:
|
|
bool: True if port is in use, False otherwise
|
|
"""
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
return s.connect_ex(('localhost', port)) == 0
|
|
|
|
def find_available_port(start_port: int, end_port: int) -> Optional[int]:
|
|
"""
|
|
Find an available port in the specified range
|
|
|
|
Args:
|
|
start_port (int): Lower bound of port range
|
|
end_port (int): Upper bound of port range
|
|
|
|
Returns:
|
|
Optional[int]: Available port number or None if no ports available
|
|
"""
|
|
for port in range(start_port, end_port + 1):
|
|
if not is_port_in_use(port):
|
|
return port
|
|
return None
|
|
|
|
def get_process_by_port(port: int) -> List[psutil.Process]:
|
|
"""
|
|
Get processes using a specific port
|
|
|
|
Args:
|
|
port (int): Port number to check
|
|
|
|
Returns:
|
|
List[psutil.Process]: List of processes using the port
|
|
"""
|
|
processes = []
|
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
|
try:
|
|
for conn in proc.connections(kind='inet'):
|
|
if conn.laddr.port == port:
|
|
processes.append(proc)
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
pass
|
|
return processes
|
|
|
|
def kill_process_by_port(port: int) -> Tuple[int, List[str]]:
|
|
"""
|
|
Kill processes using a specific port
|
|
|
|
Args:
|
|
port (int): Port number to check
|
|
|
|
Returns:
|
|
Tuple[int, List[str]]: Count of killed processes and their names
|
|
"""
|
|
processes = get_process_by_port(port)
|
|
killed = []
|
|
|
|
for proc in processes:
|
|
try:
|
|
proc_name = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
|
logger.info(f"Terminating process {proc.pid}: {proc_name}")
|
|
proc.terminate()
|
|
killed.append(proc_name)
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
pass
|
|
|
|
# Give processes time to terminate gracefully
|
|
if processes:
|
|
time.sleep(0.5)
|
|
|
|
# Force kill any remaining processes
|
|
for proc in processes:
|
|
try:
|
|
if proc.is_running():
|
|
logger.info(f"Force killing process {proc.pid}")
|
|
proc.kill()
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
pass
|
|
|
|
return len(killed), killed
|
|
|
|
def kill_stale_debug_instances() -> Tuple[int, Set[str]]:
|
|
"""
|
|
Kill all stale debug and training instances based on process names
|
|
|
|
Returns:
|
|
Tuple[int, Set[str]]: Count of killed processes and their names
|
|
"""
|
|
killed_count = 0
|
|
killed_procs = set()
|
|
|
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
|
try:
|
|
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
|
|
|
# Check if this is a debug/training process we should kill
|
|
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS):
|
|
logger.info(f"Terminating stale process {proc.pid}: {cmd}")
|
|
proc.terminate()
|
|
killed_count += 1
|
|
killed_procs.add(cmd)
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
pass
|
|
|
|
# Give processes time to terminate
|
|
if killed_count > 0:
|
|
time.sleep(1)
|
|
|
|
# Force kill any remaining processes
|
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
|
try:
|
|
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
|
|
|
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS) and proc.is_running():
|
|
logger.info(f"Force killing stale process {proc.pid}")
|
|
proc.kill()
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
pass
|
|
|
|
return killed_count, killed_procs
|
|
|
|
def get_port_with_fallback(preferred_port: int, min_port: int, max_port: int) -> int:
|
|
"""
|
|
Try to use preferred port, fall back to any available port in range
|
|
|
|
Args:
|
|
preferred_port (int): Preferred port to use
|
|
min_port (int): Minimum port in fallback range
|
|
max_port (int): Maximum port in fallback range
|
|
|
|
Returns:
|
|
int: Available port number
|
|
"""
|
|
# First try the preferred port
|
|
if not is_port_in_use(preferred_port):
|
|
return preferred_port
|
|
|
|
# If preferred port is in use, try to free it
|
|
logger.info(f"Preferred port {preferred_port} is in use, attempting to free it")
|
|
kill_count, _ = kill_process_by_port(preferred_port)
|
|
|
|
if kill_count > 0 and not is_port_in_use(preferred_port):
|
|
logger.info(f"Successfully freed port {preferred_port}")
|
|
return preferred_port
|
|
|
|
# If we couldn't free the preferred port, find another available port
|
|
logger.info(f"Looking for available port in range {min_port}-{max_port}")
|
|
available_port = find_available_port(min_port, max_port)
|
|
|
|
if available_port:
|
|
logger.info(f"Using alternative port: {available_port}")
|
|
return available_port
|
|
else:
|
|
# If no ports are available, force kill processes in the entire range
|
|
logger.warning(f"No available ports in range {min_port}-{max_port}, freeing ports")
|
|
for port in range(min_port, max_port + 1):
|
|
kill_process_by_port(port)
|
|
|
|
# Try again
|
|
available_port = find_available_port(min_port, max_port)
|
|
if available_port:
|
|
logger.info(f"Using port {available_port} after freeing")
|
|
return available_port
|
|
else:
|
|
logger.error(f"Could not find available port even after freeing range {min_port}-{max_port}")
|
|
raise RuntimeError(f"No available ports in range {min_port}-{max_port}")
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(description='Port management utility')
|
|
parser.add_argument('--kill-stale', action='store_true', help='Kill all stale debug instances')
|
|
parser.add_argument('--free-port', type=int, help='Free a specific port')
|
|
parser.add_argument('--find-port', action='store_true', help='Find an available port')
|
|
parser.add_argument('--min-port', type=int, default=6000, help='Minimum port in range')
|
|
parser.add_argument('--max-port', type=int, default=7000, help='Maximum port in range')
|
|
parser.add_argument('--preferred-port', type=int, help='Preferred port to use')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.kill_stale:
|
|
count, procs = kill_stale_debug_instances()
|
|
logger.info(f"Killed {count} stale processes")
|
|
for proc in procs:
|
|
logger.info(f" - {proc}")
|
|
|
|
if args.free_port:
|
|
count, killed = kill_process_by_port(args.free_port)
|
|
logger.info(f"Killed {count} processes using port {args.free_port}")
|
|
for proc in killed:
|
|
logger.info(f" - {proc}")
|
|
|
|
if args.find_port or args.preferred_port:
|
|
preferred = args.preferred_port if args.preferred_port else args.min_port
|
|
port = get_port_with_fallback(preferred, args.min_port, args.max_port)
|
|
print(port) # Print only the port number for easy capture in scripts |