gogo2/utils/port_manager.py
Dobromir Popov c0872248ab misc
2025-05-13 17:19:52 +03:00

238 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""
Port Management Utility
This script provides utilities to:
1. Find available ports in a specified range
2. Kill stale processes running on specific ports
3. Kill all debug/training instances
Usage:
- As a module: import port_manager and use its functions
- Directly: python port_manager.py --kill-stale --min-port 6000 --max-port 7000
"""
import os
import sys
import socket
import argparse
import psutil
import logging
import time
import signal
from typing import List, Tuple, Optional, Set
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('port_manager')
# Define process names to look for when killing stale instances
DEBUG_PROCESS_KEYWORDS = [
'tensorboard',
'python train_',
'realtime.py',
'train_rl_with_realtime.py'
]
def is_port_in_use(port: int) -> bool:
"""
Check if a port is in use
Args:
port (int): Port number to check
Returns:
bool: True if port is in use, False otherwise
"""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('localhost', port)) == 0
def find_available_port(start_port: int, end_port: int) -> Optional[int]:
"""
Find an available port in the specified range
Args:
start_port (int): Lower bound of port range
end_port (int): Upper bound of port range
Returns:
Optional[int]: Available port number or None if no ports available
"""
for port in range(start_port, end_port + 1):
if not is_port_in_use(port):
return port
return None
def get_process_by_port(port: int) -> List[psutil.Process]:
"""
Get processes using a specific port
Args:
port (int): Port number to check
Returns:
List[psutil.Process]: List of processes using the port
"""
processes = []
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
for conn in proc.connections(kind='inet'):
if conn.laddr.port == port:
processes.append(proc)
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return processes
def kill_process_by_port(port: int) -> Tuple[int, List[str]]:
"""
Kill processes using a specific port
Args:
port (int): Port number to check
Returns:
Tuple[int, List[str]]: Count of killed processes and their names
"""
processes = get_process_by_port(port)
killed = []
for proc in processes:
try:
proc_name = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
logger.info(f"Terminating process {proc.pid}: {proc_name}")
proc.terminate()
killed.append(proc_name)
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
# Give processes time to terminate gracefully
if processes:
time.sleep(0.5)
# Force kill any remaining processes
for proc in processes:
try:
if proc.is_running():
logger.info(f"Force killing process {proc.pid}")
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
return len(killed), killed
def kill_stale_debug_instances() -> Tuple[int, Set[str]]:
"""
Kill all stale debug and training instances based on process names
Returns:
Tuple[int, Set[str]]: Count of killed processes and their names
"""
killed_count = 0
killed_procs = set()
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
# Check if this is a debug/training process we should kill
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS):
logger.info(f"Terminating stale process {proc.pid}: {cmd}")
proc.terminate()
killed_count += 1
killed_procs.add(cmd)
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
# Give processes time to terminate
if killed_count > 0:
time.sleep(1)
# Force kill any remaining processes
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS) and proc.is_running():
logger.info(f"Force killing stale process {proc.pid}")
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return killed_count, killed_procs
def get_port_with_fallback(preferred_port: int, min_port: int, max_port: int) -> int:
"""
Try to use preferred port, fall back to any available port in range
Args:
preferred_port (int): Preferred port to use
min_port (int): Minimum port in fallback range
max_port (int): Maximum port in fallback range
Returns:
int: Available port number
"""
# First try the preferred port
if not is_port_in_use(preferred_port):
return preferred_port
# If preferred port is in use, try to free it
logger.info(f"Preferred port {preferred_port} is in use, attempting to free it")
kill_count, _ = kill_process_by_port(preferred_port)
if kill_count > 0 and not is_port_in_use(preferred_port):
logger.info(f"Successfully freed port {preferred_port}")
return preferred_port
# If we couldn't free the preferred port, find another available port
logger.info(f"Looking for available port in range {min_port}-{max_port}")
available_port = find_available_port(min_port, max_port)
if available_port:
logger.info(f"Using alternative port: {available_port}")
return available_port
else:
# If no ports are available, force kill processes in the entire range
logger.warning(f"No available ports in range {min_port}-{max_port}, freeing ports")
for port in range(min_port, max_port + 1):
kill_process_by_port(port)
# Try again
available_port = find_available_port(min_port, max_port)
if available_port:
logger.info(f"Using port {available_port} after freeing")
return available_port
else:
logger.error(f"Could not find available port even after freeing range {min_port}-{max_port}")
raise RuntimeError(f"No available ports in range {min_port}-{max_port}")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Port management utility')
parser.add_argument('--kill-stale', action='store_true', help='Kill all stale debug instances')
parser.add_argument('--free-port', type=int, help='Free a specific port')
parser.add_argument('--find-port', action='store_true', help='Find an available port')
parser.add_argument('--min-port', type=int, default=6000, help='Minimum port in range')
parser.add_argument('--max-port', type=int, default=7000, help='Maximum port in range')
parser.add_argument('--preferred-port', type=int, help='Preferred port to use')
args = parser.parse_args()
if args.kill_stale:
count, procs = kill_stale_debug_instances()
logger.info(f"Killed {count} stale processes")
for proc in procs:
logger.info(f" - {proc}")
if args.free_port:
count, killed = kill_process_by_port(args.free_port)
logger.info(f"Killed {count} processes using port {args.free_port}")
for proc in killed:
logger.info(f" - {proc}")
if args.find_port or args.preferred_port:
preferred = args.preferred_port if args.preferred_port else args.min_port
port = get_port_with_fallback(preferred, args.min_port, args.max_port)
print(port) # Print only the port number for easy capture in scripts