gogo2/utils/launch_tensorboard.py
Dobromir Popov c0872248ab misc
2025-05-13 17:19:52 +03:00

164 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""
TensorBoard Launcher with Automatic Port Management
This script launches TensorBoard with automatic port fallback if the preferred port is in use.
It also kills any stale debug instances that might be running.
Usage:
python launch_tensorboard.py --logdir=path/to/logs --preferred-port=6007 --port-range=6000-7000
"""
import os
import sys
import subprocess
import argparse
import logging
from pathlib import Path
# Add project root to path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from utils.port_manager import get_port_with_fallback, kill_stale_debug_instances
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('tensorboard_launcher')
def launch_tensorboard(logdir, port, host='localhost', open_browser=True):
"""
Launch TensorBoard on the specified port
Args:
logdir (str): Path to log directory
port (int): Port to use
host (str): Host to bind to
open_browser (bool): Whether to open browser automatically
Returns:
subprocess.Popen: Process object
"""
cmd = [
sys.executable, "-m", "tensorboard.main",
f"--logdir={logdir}",
f"--port={port}",
f"--host={host}"
]
# Add --load_fast=false to improve startup times
cmd.append("--load_fast=false")
# Control whether to open browser
if not open_browser:
cmd.append("--window_title=TensorBoard")
logger.info(f"Launching TensorBoard: {' '.join(cmd)}")
# Use subprocess.Popen to start TensorBoard without waiting for it to finish
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
bufsize=1
)
# Log the first few lines of output to confirm it's starting correctly
line_count = 0
for line in process.stdout:
logger.info(f"TensorBoard: {line.strip()}")
line_count += 1
# Check if TensorBoard has started successfully
if "TensorBoard" in line and "http://" in line:
url = line.strip().split("http://")[1].split(" ")[0]
logger.info(f"TensorBoard available at: http://{url}")
# Only log the first few lines
if line_count >= 10:
break
# Continue reading output in background to prevent pipe from filling
def read_output():
for line in process.stdout:
pass
import threading
threading.Thread(target=read_output, daemon=True).start()
return process
def main():
parser = argparse.ArgumentParser(description='Launch TensorBoard with automatic port management')
parser.add_argument('--logdir', type=str, default='NN/models/saved/logs',
help='Directory containing TensorBoard event files')
parser.add_argument('--preferred-port', type=int, default=6007,
help='Preferred port to use')
parser.add_argument('--port-range', type=str, default='6000-7000',
help='Port range to try if preferred port is unavailable (format: min-max)')
parser.add_argument('--host', type=str, default='localhost',
help='Host to bind to')
parser.add_argument('--no-browser', action='store_true',
help='Do not open browser automatically')
parser.add_argument('--kill-stale', action='store_true',
help='Kill stale debug instances before starting')
args = parser.parse_args()
# Parse port range
try:
min_port, max_port = map(int, args.port_range.split('-'))
except ValueError:
logger.error(f"Invalid port range format: {args.port_range}. Use format: min-max")
return 1
# Kill stale instances if requested
if args.kill_stale:
logger.info("Killing stale debug instances...")
count, _ = kill_stale_debug_instances()
logger.info(f"Killed {count} stale instances")
# Get an available port
try:
port = get_port_with_fallback(args.preferred_port, min_port, max_port)
logger.info(f"Using port {port} for TensorBoard")
except RuntimeError as e:
logger.error(str(e))
return 1
# Ensure log directory exists
logdir = os.path.abspath(args.logdir)
os.makedirs(logdir, exist_ok=True)
# Launch TensorBoard
process = launch_tensorboard(
logdir=logdir,
port=port,
host=args.host,
open_browser=not args.no_browser
)
# Wait for process to end (it shouldn't unless there's an error or user kills it)
try:
return_code = process.wait()
if return_code != 0:
logger.error(f"TensorBoard exited with code {return_code}")
return return_code
except KeyboardInterrupt:
logger.info("Received keyboard interrupt, shutting down TensorBoard...")
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
logger.warning("TensorBoard didn't terminate gracefully, forcing kill")
process.kill()
return 0
if __name__ == "__main__":
sys.exit(main())