misc
This commit is contained in:
238
utils/port_manager.py
Normal file
238
utils/port_manager.py
Normal file
@@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Port Management Utility
|
||||
|
||||
This script provides utilities to:
|
||||
1. Find available ports in a specified range
|
||||
2. Kill stale processes running on specific ports
|
||||
3. Kill all debug/training instances
|
||||
|
||||
Usage:
|
||||
- As a module: import port_manager and use its functions
|
||||
- Directly: python port_manager.py --kill-stale --min-port 6000 --max-port 7000
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import socket
|
||||
import argparse
|
||||
import psutil
|
||||
import logging
|
||||
import time
|
||||
import signal
|
||||
from typing import List, Tuple, Optional, Set
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger('port_manager')
|
||||
|
||||
# Define process names to look for when killing stale instances
|
||||
DEBUG_PROCESS_KEYWORDS = [
|
||||
'tensorboard',
|
||||
'python train_',
|
||||
'realtime.py',
|
||||
'train_rl_with_realtime.py'
|
||||
]
|
||||
|
||||
def is_port_in_use(port: int) -> bool:
|
||||
"""
|
||||
Check if a port is in use
|
||||
|
||||
Args:
|
||||
port (int): Port number to check
|
||||
|
||||
Returns:
|
||||
bool: True if port is in use, False otherwise
|
||||
"""
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
return s.connect_ex(('localhost', port)) == 0
|
||||
|
||||
def find_available_port(start_port: int, end_port: int) -> Optional[int]:
|
||||
"""
|
||||
Find an available port in the specified range
|
||||
|
||||
Args:
|
||||
start_port (int): Lower bound of port range
|
||||
end_port (int): Upper bound of port range
|
||||
|
||||
Returns:
|
||||
Optional[int]: Available port number or None if no ports available
|
||||
"""
|
||||
for port in range(start_port, end_port + 1):
|
||||
if not is_port_in_use(port):
|
||||
return port
|
||||
return None
|
||||
|
||||
def get_process_by_port(port: int) -> List[psutil.Process]:
|
||||
"""
|
||||
Get processes using a specific port
|
||||
|
||||
Args:
|
||||
port (int): Port number to check
|
||||
|
||||
Returns:
|
||||
List[psutil.Process]: List of processes using the port
|
||||
"""
|
||||
processes = []
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||
try:
|
||||
for conn in proc.connections(kind='inet'):
|
||||
if conn.laddr.port == port:
|
||||
processes.append(proc)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
return processes
|
||||
|
||||
def kill_process_by_port(port: int) -> Tuple[int, List[str]]:
|
||||
"""
|
||||
Kill processes using a specific port
|
||||
|
||||
Args:
|
||||
port (int): Port number to check
|
||||
|
||||
Returns:
|
||||
Tuple[int, List[str]]: Count of killed processes and their names
|
||||
"""
|
||||
processes = get_process_by_port(port)
|
||||
killed = []
|
||||
|
||||
for proc in processes:
|
||||
try:
|
||||
proc_name = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
||||
logger.info(f"Terminating process {proc.pid}: {proc_name}")
|
||||
proc.terminate()
|
||||
killed.append(proc_name)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
|
||||
# Give processes time to terminate gracefully
|
||||
if processes:
|
||||
time.sleep(0.5)
|
||||
|
||||
# Force kill any remaining processes
|
||||
for proc in processes:
|
||||
try:
|
||||
if proc.is_running():
|
||||
logger.info(f"Force killing process {proc.pid}")
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
|
||||
return len(killed), killed
|
||||
|
||||
def kill_stale_debug_instances() -> Tuple[int, Set[str]]:
|
||||
"""
|
||||
Kill all stale debug and training instances based on process names
|
||||
|
||||
Returns:
|
||||
Tuple[int, Set[str]]: Count of killed processes and their names
|
||||
"""
|
||||
killed_count = 0
|
||||
killed_procs = set()
|
||||
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||
try:
|
||||
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
||||
|
||||
# Check if this is a debug/training process we should kill
|
||||
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS):
|
||||
logger.info(f"Terminating stale process {proc.pid}: {cmd}")
|
||||
proc.terminate()
|
||||
killed_count += 1
|
||||
killed_procs.add(cmd)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
|
||||
# Give processes time to terminate
|
||||
if killed_count > 0:
|
||||
time.sleep(1)
|
||||
|
||||
# Force kill any remaining processes
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||
try:
|
||||
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
||||
|
||||
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS) and proc.is_running():
|
||||
logger.info(f"Force killing stale process {proc.pid}")
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
|
||||
return killed_count, killed_procs
|
||||
|
||||
def get_port_with_fallback(preferred_port: int, min_port: int, max_port: int) -> int:
|
||||
"""
|
||||
Try to use preferred port, fall back to any available port in range
|
||||
|
||||
Args:
|
||||
preferred_port (int): Preferred port to use
|
||||
min_port (int): Minimum port in fallback range
|
||||
max_port (int): Maximum port in fallback range
|
||||
|
||||
Returns:
|
||||
int: Available port number
|
||||
"""
|
||||
# First try the preferred port
|
||||
if not is_port_in_use(preferred_port):
|
||||
return preferred_port
|
||||
|
||||
# If preferred port is in use, try to free it
|
||||
logger.info(f"Preferred port {preferred_port} is in use, attempting to free it")
|
||||
kill_count, _ = kill_process_by_port(preferred_port)
|
||||
|
||||
if kill_count > 0 and not is_port_in_use(preferred_port):
|
||||
logger.info(f"Successfully freed port {preferred_port}")
|
||||
return preferred_port
|
||||
|
||||
# If we couldn't free the preferred port, find another available port
|
||||
logger.info(f"Looking for available port in range {min_port}-{max_port}")
|
||||
available_port = find_available_port(min_port, max_port)
|
||||
|
||||
if available_port:
|
||||
logger.info(f"Using alternative port: {available_port}")
|
||||
return available_port
|
||||
else:
|
||||
# If no ports are available, force kill processes in the entire range
|
||||
logger.warning(f"No available ports in range {min_port}-{max_port}, freeing ports")
|
||||
for port in range(min_port, max_port + 1):
|
||||
kill_process_by_port(port)
|
||||
|
||||
# Try again
|
||||
available_port = find_available_port(min_port, max_port)
|
||||
if available_port:
|
||||
logger.info(f"Using port {available_port} after freeing")
|
||||
return available_port
|
||||
else:
|
||||
logger.error(f"Could not find available port even after freeing range {min_port}-{max_port}")
|
||||
raise RuntimeError(f"No available ports in range {min_port}-{max_port}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Port management utility')
|
||||
parser.add_argument('--kill-stale', action='store_true', help='Kill all stale debug instances')
|
||||
parser.add_argument('--free-port', type=int, help='Free a specific port')
|
||||
parser.add_argument('--find-port', action='store_true', help='Find an available port')
|
||||
parser.add_argument('--min-port', type=int, default=6000, help='Minimum port in range')
|
||||
parser.add_argument('--max-port', type=int, default=7000, help='Maximum port in range')
|
||||
parser.add_argument('--preferred-port', type=int, help='Preferred port to use')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.kill_stale:
|
||||
count, procs = kill_stale_debug_instances()
|
||||
logger.info(f"Killed {count} stale processes")
|
||||
for proc in procs:
|
||||
logger.info(f" - {proc}")
|
||||
|
||||
if args.free_port:
|
||||
count, killed = kill_process_by_port(args.free_port)
|
||||
logger.info(f"Killed {count} processes using port {args.free_port}")
|
||||
for proc in killed:
|
||||
logger.info(f" - {proc}")
|
||||
|
||||
if args.find_port or args.preferred_port:
|
||||
preferred = args.preferred_port if args.preferred_port else args.min_port
|
||||
port = get_port_with_fallback(preferred, args.min_port, args.max_port)
|
||||
print(port) # Print only the port number for easy capture in scripts
|
||||
Reference in New Issue
Block a user