misc
This commit is contained in:
49
utils/README.md
Normal file
49
utils/README.md
Normal file
@ -0,0 +1,49 @@
|
||||
# Utility Scripts
|
||||
|
||||
This directory contains utility scripts for managing the trading bot development environment.
|
||||
|
||||
## Port Management Utility
|
||||
|
||||
The `port_manager.py` script provides utilities for managing ports and killing stale processes:
|
||||
|
||||
```bash
|
||||
# Kill all stale debug instances
|
||||
python utils/port_manager.py --kill-stale
|
||||
|
||||
# Free a specific port
|
||||
python utils/port_manager.py --free-port 6007
|
||||
|
||||
# Find an available port in a range
|
||||
python utils/port_manager.py --find-port --min-port 6000 --max-port 7000
|
||||
|
||||
# Try to use a preferred port with fallback
|
||||
python utils/port_manager.py --preferred-port 6007 --min-port 6000 --max-port 7000
|
||||
```
|
||||
|
||||
## TensorBoard Launcher
|
||||
|
||||
The `launch_tensorboard.py` script launches TensorBoard with automatic port management:
|
||||
|
||||
```bash
|
||||
# Launch TensorBoard with default settings
|
||||
python utils/launch_tensorboard.py
|
||||
|
||||
# Launch with custom log directory and port range
|
||||
python utils/launch_tensorboard.py --logdir=path/to/logs --preferred-port=6007 --port-range=6000-7000
|
||||
|
||||
# Launch and kill stale processes first
|
||||
python utils/launch_tensorboard.py --kill-stale
|
||||
```
|
||||
|
||||
## Integration with VSCode Tasks
|
||||
|
||||
These utilities are integrated with VSCode tasks in `.vscode/tasks.json`:
|
||||
|
||||
1. **Start TensorBoard**: Launches TensorBoard with automatic port management
|
||||
2. **Kill Stale Processes**: Kills all stale debug instances
|
||||
|
||||
You can run these tasks from the VSCode command palette (Ctrl+Shift+P) by typing "Tasks: Run Task" and selecting the task.
|
||||
|
||||
## Requirements
|
||||
|
||||
These utilities require the `psutil` package, which is included in the project's `requirements.txt` file.
|
19
utils/__init__.py
Normal file
19
utils/__init__.py
Normal file
@ -0,0 +1,19 @@
|
||||
"""
|
||||
Utility functions for port management, launching services, and debug tools.
|
||||
"""
|
||||
|
||||
from utils.port_manager import (
|
||||
is_port_in_use,
|
||||
find_available_port,
|
||||
kill_process_by_port,
|
||||
kill_stale_debug_instances,
|
||||
get_port_with_fallback
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'is_port_in_use',
|
||||
'find_available_port',
|
||||
'kill_process_by_port',
|
||||
'kill_stale_debug_instances',
|
||||
'get_port_with_fallback'
|
||||
]
|
164
utils/launch_tensorboard.py
Normal file
164
utils/launch_tensorboard.py
Normal file
@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
TensorBoard Launcher with Automatic Port Management
|
||||
|
||||
This script launches TensorBoard with automatic port fallback if the preferred port is in use.
|
||||
It also kills any stale debug instances that might be running.
|
||||
|
||||
Usage:
|
||||
python launch_tensorboard.py --logdir=path/to/logs --preferred-port=6007 --port-range=6000-7000
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to path
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
from utils.port_manager import get_port_with_fallback, kill_stale_debug_instances
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger('tensorboard_launcher')
|
||||
|
||||
def launch_tensorboard(logdir, port, host='localhost', open_browser=True):
|
||||
"""
|
||||
Launch TensorBoard on the specified port
|
||||
|
||||
Args:
|
||||
logdir (str): Path to log directory
|
||||
port (int): Port to use
|
||||
host (str): Host to bind to
|
||||
open_browser (bool): Whether to open browser automatically
|
||||
|
||||
Returns:
|
||||
subprocess.Popen: Process object
|
||||
"""
|
||||
cmd = [
|
||||
sys.executable, "-m", "tensorboard.main",
|
||||
f"--logdir={logdir}",
|
||||
f"--port={port}",
|
||||
f"--host={host}"
|
||||
]
|
||||
|
||||
# Add --load_fast=false to improve startup times
|
||||
cmd.append("--load_fast=false")
|
||||
|
||||
# Control whether to open browser
|
||||
if not open_browser:
|
||||
cmd.append("--window_title=TensorBoard")
|
||||
|
||||
logger.info(f"Launching TensorBoard: {' '.join(cmd)}")
|
||||
|
||||
# Use subprocess.Popen to start TensorBoard without waiting for it to finish
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True,
|
||||
bufsize=1
|
||||
)
|
||||
|
||||
# Log the first few lines of output to confirm it's starting correctly
|
||||
line_count = 0
|
||||
for line in process.stdout:
|
||||
logger.info(f"TensorBoard: {line.strip()}")
|
||||
line_count += 1
|
||||
|
||||
# Check if TensorBoard has started successfully
|
||||
if "TensorBoard" in line and "http://" in line:
|
||||
url = line.strip().split("http://")[1].split(" ")[0]
|
||||
logger.info(f"TensorBoard available at: http://{url}")
|
||||
|
||||
# Only log the first few lines
|
||||
if line_count >= 10:
|
||||
break
|
||||
|
||||
# Continue reading output in background to prevent pipe from filling
|
||||
def read_output():
|
||||
for line in process.stdout:
|
||||
pass
|
||||
|
||||
import threading
|
||||
threading.Thread(target=read_output, daemon=True).start()
|
||||
|
||||
return process
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Launch TensorBoard with automatic port management')
|
||||
parser.add_argument('--logdir', type=str, default='NN/models/saved/logs',
|
||||
help='Directory containing TensorBoard event files')
|
||||
parser.add_argument('--preferred-port', type=int, default=6007,
|
||||
help='Preferred port to use')
|
||||
parser.add_argument('--port-range', type=str, default='6000-7000',
|
||||
help='Port range to try if preferred port is unavailable (format: min-max)')
|
||||
parser.add_argument('--host', type=str, default='localhost',
|
||||
help='Host to bind to')
|
||||
parser.add_argument('--no-browser', action='store_true',
|
||||
help='Do not open browser automatically')
|
||||
parser.add_argument('--kill-stale', action='store_true',
|
||||
help='Kill stale debug instances before starting')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse port range
|
||||
try:
|
||||
min_port, max_port = map(int, args.port_range.split('-'))
|
||||
except ValueError:
|
||||
logger.error(f"Invalid port range format: {args.port_range}. Use format: min-max")
|
||||
return 1
|
||||
|
||||
# Kill stale instances if requested
|
||||
if args.kill_stale:
|
||||
logger.info("Killing stale debug instances...")
|
||||
count, _ = kill_stale_debug_instances()
|
||||
logger.info(f"Killed {count} stale instances")
|
||||
|
||||
# Get an available port
|
||||
try:
|
||||
port = get_port_with_fallback(args.preferred_port, min_port, max_port)
|
||||
logger.info(f"Using port {port} for TensorBoard")
|
||||
except RuntimeError as e:
|
||||
logger.error(str(e))
|
||||
return 1
|
||||
|
||||
# Ensure log directory exists
|
||||
logdir = os.path.abspath(args.logdir)
|
||||
os.makedirs(logdir, exist_ok=True)
|
||||
|
||||
# Launch TensorBoard
|
||||
process = launch_tensorboard(
|
||||
logdir=logdir,
|
||||
port=port,
|
||||
host=args.host,
|
||||
open_browser=not args.no_browser
|
||||
)
|
||||
|
||||
# Wait for process to end (it shouldn't unless there's an error or user kills it)
|
||||
try:
|
||||
return_code = process.wait()
|
||||
if return_code != 0:
|
||||
logger.error(f"TensorBoard exited with code {return_code}")
|
||||
return return_code
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received keyboard interrupt, shutting down TensorBoard...")
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning("TensorBoard didn't terminate gracefully, forcing kill")
|
||||
process.kill()
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
238
utils/port_manager.py
Normal file
238
utils/port_manager.py
Normal file
@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Port Management Utility
|
||||
|
||||
This script provides utilities to:
|
||||
1. Find available ports in a specified range
|
||||
2. Kill stale processes running on specific ports
|
||||
3. Kill all debug/training instances
|
||||
|
||||
Usage:
|
||||
- As a module: import port_manager and use its functions
|
||||
- Directly: python port_manager.py --kill-stale --min-port 6000 --max-port 7000
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import socket
|
||||
import argparse
|
||||
import psutil
|
||||
import logging
|
||||
import time
|
||||
import signal
|
||||
from typing import List, Tuple, Optional, Set
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger('port_manager')
|
||||
|
||||
# Define process names to look for when killing stale instances
|
||||
DEBUG_PROCESS_KEYWORDS = [
|
||||
'tensorboard',
|
||||
'python train_',
|
||||
'realtime.py',
|
||||
'train_rl_with_realtime.py'
|
||||
]
|
||||
|
||||
def is_port_in_use(port: int) -> bool:
|
||||
"""
|
||||
Check if a port is in use
|
||||
|
||||
Args:
|
||||
port (int): Port number to check
|
||||
|
||||
Returns:
|
||||
bool: True if port is in use, False otherwise
|
||||
"""
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
return s.connect_ex(('localhost', port)) == 0
|
||||
|
||||
def find_available_port(start_port: int, end_port: int) -> Optional[int]:
|
||||
"""
|
||||
Find an available port in the specified range
|
||||
|
||||
Args:
|
||||
start_port (int): Lower bound of port range
|
||||
end_port (int): Upper bound of port range
|
||||
|
||||
Returns:
|
||||
Optional[int]: Available port number or None if no ports available
|
||||
"""
|
||||
for port in range(start_port, end_port + 1):
|
||||
if not is_port_in_use(port):
|
||||
return port
|
||||
return None
|
||||
|
||||
def get_process_by_port(port: int) -> List[psutil.Process]:
|
||||
"""
|
||||
Get processes using a specific port
|
||||
|
||||
Args:
|
||||
port (int): Port number to check
|
||||
|
||||
Returns:
|
||||
List[psutil.Process]: List of processes using the port
|
||||
"""
|
||||
processes = []
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||
try:
|
||||
for conn in proc.connections(kind='inet'):
|
||||
if conn.laddr.port == port:
|
||||
processes.append(proc)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
return processes
|
||||
|
||||
def kill_process_by_port(port: int) -> Tuple[int, List[str]]:
|
||||
"""
|
||||
Kill processes using a specific port
|
||||
|
||||
Args:
|
||||
port (int): Port number to check
|
||||
|
||||
Returns:
|
||||
Tuple[int, List[str]]: Count of killed processes and their names
|
||||
"""
|
||||
processes = get_process_by_port(port)
|
||||
killed = []
|
||||
|
||||
for proc in processes:
|
||||
try:
|
||||
proc_name = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
||||
logger.info(f"Terminating process {proc.pid}: {proc_name}")
|
||||
proc.terminate()
|
||||
killed.append(proc_name)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
|
||||
# Give processes time to terminate gracefully
|
||||
if processes:
|
||||
time.sleep(0.5)
|
||||
|
||||
# Force kill any remaining processes
|
||||
for proc in processes:
|
||||
try:
|
||||
if proc.is_running():
|
||||
logger.info(f"Force killing process {proc.pid}")
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
|
||||
return len(killed), killed
|
||||
|
||||
def kill_stale_debug_instances() -> Tuple[int, Set[str]]:
|
||||
"""
|
||||
Kill all stale debug and training instances based on process names
|
||||
|
||||
Returns:
|
||||
Tuple[int, Set[str]]: Count of killed processes and their names
|
||||
"""
|
||||
killed_count = 0
|
||||
killed_procs = set()
|
||||
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||
try:
|
||||
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
||||
|
||||
# Check if this is a debug/training process we should kill
|
||||
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS):
|
||||
logger.info(f"Terminating stale process {proc.pid}: {cmd}")
|
||||
proc.terminate()
|
||||
killed_count += 1
|
||||
killed_procs.add(cmd)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
|
||||
# Give processes time to terminate
|
||||
if killed_count > 0:
|
||||
time.sleep(1)
|
||||
|
||||
# Force kill any remaining processes
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||
try:
|
||||
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
|
||||
|
||||
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS) and proc.is_running():
|
||||
logger.info(f"Force killing stale process {proc.pid}")
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
|
||||
return killed_count, killed_procs
|
||||
|
||||
def get_port_with_fallback(preferred_port: int, min_port: int, max_port: int) -> int:
|
||||
"""
|
||||
Try to use preferred port, fall back to any available port in range
|
||||
|
||||
Args:
|
||||
preferred_port (int): Preferred port to use
|
||||
min_port (int): Minimum port in fallback range
|
||||
max_port (int): Maximum port in fallback range
|
||||
|
||||
Returns:
|
||||
int: Available port number
|
||||
"""
|
||||
# First try the preferred port
|
||||
if not is_port_in_use(preferred_port):
|
||||
return preferred_port
|
||||
|
||||
# If preferred port is in use, try to free it
|
||||
logger.info(f"Preferred port {preferred_port} is in use, attempting to free it")
|
||||
kill_count, _ = kill_process_by_port(preferred_port)
|
||||
|
||||
if kill_count > 0 and not is_port_in_use(preferred_port):
|
||||
logger.info(f"Successfully freed port {preferred_port}")
|
||||
return preferred_port
|
||||
|
||||
# If we couldn't free the preferred port, find another available port
|
||||
logger.info(f"Looking for available port in range {min_port}-{max_port}")
|
||||
available_port = find_available_port(min_port, max_port)
|
||||
|
||||
if available_port:
|
||||
logger.info(f"Using alternative port: {available_port}")
|
||||
return available_port
|
||||
else:
|
||||
# If no ports are available, force kill processes in the entire range
|
||||
logger.warning(f"No available ports in range {min_port}-{max_port}, freeing ports")
|
||||
for port in range(min_port, max_port + 1):
|
||||
kill_process_by_port(port)
|
||||
|
||||
# Try again
|
||||
available_port = find_available_port(min_port, max_port)
|
||||
if available_port:
|
||||
logger.info(f"Using port {available_port} after freeing")
|
||||
return available_port
|
||||
else:
|
||||
logger.error(f"Could not find available port even after freeing range {min_port}-{max_port}")
|
||||
raise RuntimeError(f"No available ports in range {min_port}-{max_port}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Port management utility')
|
||||
parser.add_argument('--kill-stale', action='store_true', help='Kill all stale debug instances')
|
||||
parser.add_argument('--free-port', type=int, help='Free a specific port')
|
||||
parser.add_argument('--find-port', action='store_true', help='Find an available port')
|
||||
parser.add_argument('--min-port', type=int, default=6000, help='Minimum port in range')
|
||||
parser.add_argument('--max-port', type=int, default=7000, help='Maximum port in range')
|
||||
parser.add_argument('--preferred-port', type=int, help='Preferred port to use')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.kill_stale:
|
||||
count, procs = kill_stale_debug_instances()
|
||||
logger.info(f"Killed {count} stale processes")
|
||||
for proc in procs:
|
||||
logger.info(f" - {proc}")
|
||||
|
||||
if args.free_port:
|
||||
count, killed = kill_process_by_port(args.free_port)
|
||||
logger.info(f"Killed {count} processes using port {args.free_port}")
|
||||
for proc in killed:
|
||||
logger.info(f" - {proc}")
|
||||
|
||||
if args.find_port or args.preferred_port:
|
||||
preferred = args.preferred_port if args.preferred_port else args.min_port
|
||||
port = get_port_with_fallback(preferred, args.min_port, args.max_port)
|
||||
print(port) # Print only the port number for easy capture in scripts
|
Reference in New Issue
Block a user