This commit is contained in:
Dobromir Popov
2025-05-13 17:19:52 +03:00
parent 7dda00b64a
commit c0872248ab
60 changed files with 42085 additions and 6885 deletions

49
utils/README.md Normal file
View File

@ -0,0 +1,49 @@
# Utility Scripts
This directory contains utility scripts for managing the trading bot development environment.
## Port Management Utility
The `port_manager.py` script provides utilities for managing ports and killing stale processes:
```bash
# Kill all stale debug instances
python utils/port_manager.py --kill-stale
# Free a specific port
python utils/port_manager.py --free-port 6007
# Find an available port in a range
python utils/port_manager.py --find-port --min-port 6000 --max-port 7000
# Try to use a preferred port with fallback
python utils/port_manager.py --preferred-port 6007 --min-port 6000 --max-port 7000
```
## TensorBoard Launcher
The `launch_tensorboard.py` script launches TensorBoard with automatic port management:
```bash
# Launch TensorBoard with default settings
python utils/launch_tensorboard.py
# Launch with custom log directory and port range
python utils/launch_tensorboard.py --logdir=path/to/logs --preferred-port=6007 --port-range=6000-7000
# Launch and kill stale processes first
python utils/launch_tensorboard.py --kill-stale
```
## Integration with VSCode Tasks
These utilities are integrated with VSCode tasks in `.vscode/tasks.json`:
1. **Start TensorBoard**: Launches TensorBoard with automatic port management
2. **Kill Stale Processes**: Kills all stale debug instances
You can run these tasks from the VSCode command palette (Ctrl+Shift+P) by typing "Tasks: Run Task" and selecting the task.
## Requirements
These utilities require the `psutil` package, which is included in the project's `requirements.txt` file.

19
utils/__init__.py Normal file
View File

@ -0,0 +1,19 @@
"""
Utility functions for port management, launching services, and debug tools.
"""
from utils.port_manager import (
is_port_in_use,
find_available_port,
kill_process_by_port,
kill_stale_debug_instances,
get_port_with_fallback
)
__all__ = [
'is_port_in_use',
'find_available_port',
'kill_process_by_port',
'kill_stale_debug_instances',
'get_port_with_fallback'
]

164
utils/launch_tensorboard.py Normal file
View File

@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""
TensorBoard Launcher with Automatic Port Management
This script launches TensorBoard with automatic port fallback if the preferred port is in use.
It also kills any stale debug instances that might be running.
Usage:
python launch_tensorboard.py --logdir=path/to/logs --preferred-port=6007 --port-range=6000-7000
"""
import os
import sys
import subprocess
import argparse
import logging
from pathlib import Path
# Add project root to path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from utils.port_manager import get_port_with_fallback, kill_stale_debug_instances
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('tensorboard_launcher')
def launch_tensorboard(logdir, port, host='localhost', open_browser=True):
"""
Launch TensorBoard on the specified port
Args:
logdir (str): Path to log directory
port (int): Port to use
host (str): Host to bind to
open_browser (bool): Whether to open browser automatically
Returns:
subprocess.Popen: Process object
"""
cmd = [
sys.executable, "-m", "tensorboard.main",
f"--logdir={logdir}",
f"--port={port}",
f"--host={host}"
]
# Add --load_fast=false to improve startup times
cmd.append("--load_fast=false")
# Control whether to open browser
if not open_browser:
cmd.append("--window_title=TensorBoard")
logger.info(f"Launching TensorBoard: {' '.join(cmd)}")
# Use subprocess.Popen to start TensorBoard without waiting for it to finish
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
bufsize=1
)
# Log the first few lines of output to confirm it's starting correctly
line_count = 0
for line in process.stdout:
logger.info(f"TensorBoard: {line.strip()}")
line_count += 1
# Check if TensorBoard has started successfully
if "TensorBoard" in line and "http://" in line:
url = line.strip().split("http://")[1].split(" ")[0]
logger.info(f"TensorBoard available at: http://{url}")
# Only log the first few lines
if line_count >= 10:
break
# Continue reading output in background to prevent pipe from filling
def read_output():
for line in process.stdout:
pass
import threading
threading.Thread(target=read_output, daemon=True).start()
return process
def main():
parser = argparse.ArgumentParser(description='Launch TensorBoard with automatic port management')
parser.add_argument('--logdir', type=str, default='NN/models/saved/logs',
help='Directory containing TensorBoard event files')
parser.add_argument('--preferred-port', type=int, default=6007,
help='Preferred port to use')
parser.add_argument('--port-range', type=str, default='6000-7000',
help='Port range to try if preferred port is unavailable (format: min-max)')
parser.add_argument('--host', type=str, default='localhost',
help='Host to bind to')
parser.add_argument('--no-browser', action='store_true',
help='Do not open browser automatically')
parser.add_argument('--kill-stale', action='store_true',
help='Kill stale debug instances before starting')
args = parser.parse_args()
# Parse port range
try:
min_port, max_port = map(int, args.port_range.split('-'))
except ValueError:
logger.error(f"Invalid port range format: {args.port_range}. Use format: min-max")
return 1
# Kill stale instances if requested
if args.kill_stale:
logger.info("Killing stale debug instances...")
count, _ = kill_stale_debug_instances()
logger.info(f"Killed {count} stale instances")
# Get an available port
try:
port = get_port_with_fallback(args.preferred_port, min_port, max_port)
logger.info(f"Using port {port} for TensorBoard")
except RuntimeError as e:
logger.error(str(e))
return 1
# Ensure log directory exists
logdir = os.path.abspath(args.logdir)
os.makedirs(logdir, exist_ok=True)
# Launch TensorBoard
process = launch_tensorboard(
logdir=logdir,
port=port,
host=args.host,
open_browser=not args.no_browser
)
# Wait for process to end (it shouldn't unless there's an error or user kills it)
try:
return_code = process.wait()
if return_code != 0:
logger.error(f"TensorBoard exited with code {return_code}")
return return_code
except KeyboardInterrupt:
logger.info("Received keyboard interrupt, shutting down TensorBoard...")
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
logger.warning("TensorBoard didn't terminate gracefully, forcing kill")
process.kill()
return 0
if __name__ == "__main__":
sys.exit(main())

238
utils/port_manager.py Normal file
View File

@ -0,0 +1,238 @@
#!/usr/bin/env python3
"""
Port Management Utility
This script provides utilities to:
1. Find available ports in a specified range
2. Kill stale processes running on specific ports
3. Kill all debug/training instances
Usage:
- As a module: import port_manager and use its functions
- Directly: python port_manager.py --kill-stale --min-port 6000 --max-port 7000
"""
import os
import sys
import socket
import argparse
import psutil
import logging
import time
import signal
from typing import List, Tuple, Optional, Set
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('port_manager')
# Define process names to look for when killing stale instances
DEBUG_PROCESS_KEYWORDS = [
'tensorboard',
'python train_',
'realtime.py',
'train_rl_with_realtime.py'
]
def is_port_in_use(port: int) -> bool:
"""
Check if a port is in use
Args:
port (int): Port number to check
Returns:
bool: True if port is in use, False otherwise
"""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('localhost', port)) == 0
def find_available_port(start_port: int, end_port: int) -> Optional[int]:
"""
Find an available port in the specified range
Args:
start_port (int): Lower bound of port range
end_port (int): Upper bound of port range
Returns:
Optional[int]: Available port number or None if no ports available
"""
for port in range(start_port, end_port + 1):
if not is_port_in_use(port):
return port
return None
def get_process_by_port(port: int) -> List[psutil.Process]:
"""
Get processes using a specific port
Args:
port (int): Port number to check
Returns:
List[psutil.Process]: List of processes using the port
"""
processes = []
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
for conn in proc.connections(kind='inet'):
if conn.laddr.port == port:
processes.append(proc)
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return processes
def kill_process_by_port(port: int) -> Tuple[int, List[str]]:
"""
Kill processes using a specific port
Args:
port (int): Port number to check
Returns:
Tuple[int, List[str]]: Count of killed processes and their names
"""
processes = get_process_by_port(port)
killed = []
for proc in processes:
try:
proc_name = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
logger.info(f"Terminating process {proc.pid}: {proc_name}")
proc.terminate()
killed.append(proc_name)
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
# Give processes time to terminate gracefully
if processes:
time.sleep(0.5)
# Force kill any remaining processes
for proc in processes:
try:
if proc.is_running():
logger.info(f"Force killing process {proc.pid}")
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
return len(killed), killed
def kill_stale_debug_instances() -> Tuple[int, Set[str]]:
"""
Kill all stale debug and training instances based on process names
Returns:
Tuple[int, Set[str]]: Count of killed processes and their names
"""
killed_count = 0
killed_procs = set()
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
# Check if this is a debug/training process we should kill
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS):
logger.info(f"Terminating stale process {proc.pid}: {cmd}")
proc.terminate()
killed_count += 1
killed_procs.add(cmd)
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
# Give processes time to terminate
if killed_count > 0:
time.sleep(1)
# Force kill any remaining processes
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
cmd = " ".join(proc.cmdline()) if proc.cmdline() else proc.name()
if any(keyword in cmd for keyword in DEBUG_PROCESS_KEYWORDS) and proc.is_running():
logger.info(f"Force killing stale process {proc.pid}")
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return killed_count, killed_procs
def get_port_with_fallback(preferred_port: int, min_port: int, max_port: int) -> int:
"""
Try to use preferred port, fall back to any available port in range
Args:
preferred_port (int): Preferred port to use
min_port (int): Minimum port in fallback range
max_port (int): Maximum port in fallback range
Returns:
int: Available port number
"""
# First try the preferred port
if not is_port_in_use(preferred_port):
return preferred_port
# If preferred port is in use, try to free it
logger.info(f"Preferred port {preferred_port} is in use, attempting to free it")
kill_count, _ = kill_process_by_port(preferred_port)
if kill_count > 0 and not is_port_in_use(preferred_port):
logger.info(f"Successfully freed port {preferred_port}")
return preferred_port
# If we couldn't free the preferred port, find another available port
logger.info(f"Looking for available port in range {min_port}-{max_port}")
available_port = find_available_port(min_port, max_port)
if available_port:
logger.info(f"Using alternative port: {available_port}")
return available_port
else:
# If no ports are available, force kill processes in the entire range
logger.warning(f"No available ports in range {min_port}-{max_port}, freeing ports")
for port in range(min_port, max_port + 1):
kill_process_by_port(port)
# Try again
available_port = find_available_port(min_port, max_port)
if available_port:
logger.info(f"Using port {available_port} after freeing")
return available_port
else:
logger.error(f"Could not find available port even after freeing range {min_port}-{max_port}")
raise RuntimeError(f"No available ports in range {min_port}-{max_port}")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Port management utility')
parser.add_argument('--kill-stale', action='store_true', help='Kill all stale debug instances')
parser.add_argument('--free-port', type=int, help='Free a specific port')
parser.add_argument('--find-port', action='store_true', help='Find an available port')
parser.add_argument('--min-port', type=int, default=6000, help='Minimum port in range')
parser.add_argument('--max-port', type=int, default=7000, help='Maximum port in range')
parser.add_argument('--preferred-port', type=int, help='Preferred port to use')
args = parser.parse_args()
if args.kill_stale:
count, procs = kill_stale_debug_instances()
logger.info(f"Killed {count} stale processes")
for proc in procs:
logger.info(f" - {proc}")
if args.free_port:
count, killed = kill_process_by_port(args.free_port)
logger.info(f"Killed {count} processes using port {args.free_port}")
for proc in killed:
logger.info(f" - {proc}")
if args.find_port or args.preferred_port:
preferred = args.preferred_port if args.preferred_port else args.min_port
port = get_port_with_fallback(preferred, args.min_port, args.max_port)
print(port) # Print only the port number for easy capture in scripts