164 lines
5.3 KiB
Python
164 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
TensorBoard Launcher with Automatic Port Management
|
|
|
|
This script launches TensorBoard with automatic port fallback if the preferred port is in use.
|
|
It also kills any stale debug instances that might be running.
|
|
|
|
Usage:
|
|
python launch_tensorboard.py --logdir=path/to/logs --preferred-port=6007 --port-range=6000-7000
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import argparse
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Add project root to path
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
if project_root not in sys.path:
|
|
sys.path.append(project_root)
|
|
|
|
from utils.port_manager import get_port_with_fallback, kill_stale_debug_instances
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger('tensorboard_launcher')
|
|
|
|
def launch_tensorboard(logdir, port, host='localhost', open_browser=True):
|
|
"""
|
|
Launch TensorBoard on the specified port
|
|
|
|
Args:
|
|
logdir (str): Path to log directory
|
|
port (int): Port to use
|
|
host (str): Host to bind to
|
|
open_browser (bool): Whether to open browser automatically
|
|
|
|
Returns:
|
|
subprocess.Popen: Process object
|
|
"""
|
|
cmd = [
|
|
sys.executable, "-m", "tensorboard.main",
|
|
f"--logdir={logdir}",
|
|
f"--port={port}",
|
|
f"--host={host}"
|
|
]
|
|
|
|
# Add --load_fast=false to improve startup times
|
|
cmd.append("--load_fast=false")
|
|
|
|
# Control whether to open browser
|
|
if not open_browser:
|
|
cmd.append("--window_title=TensorBoard")
|
|
|
|
logger.info(f"Launching TensorBoard: {' '.join(cmd)}")
|
|
|
|
# Use subprocess.Popen to start TensorBoard without waiting for it to finish
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
universal_newlines=True,
|
|
bufsize=1
|
|
)
|
|
|
|
# Log the first few lines of output to confirm it's starting correctly
|
|
line_count = 0
|
|
for line in process.stdout:
|
|
logger.info(f"TensorBoard: {line.strip()}")
|
|
line_count += 1
|
|
|
|
# Check if TensorBoard has started successfully
|
|
if "TensorBoard" in line and "http://" in line:
|
|
url = line.strip().split("http://")[1].split(" ")[0]
|
|
logger.info(f"TensorBoard available at: http://{url}")
|
|
|
|
# Only log the first few lines
|
|
if line_count >= 10:
|
|
break
|
|
|
|
# Continue reading output in background to prevent pipe from filling
|
|
def read_output():
|
|
for line in process.stdout:
|
|
pass
|
|
|
|
import threading
|
|
threading.Thread(target=read_output, daemon=True).start()
|
|
|
|
return process
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Launch TensorBoard with automatic port management')
|
|
parser.add_argument('--logdir', type=str, default='NN/models/saved/logs',
|
|
help='Directory containing TensorBoard event files')
|
|
parser.add_argument('--preferred-port', type=int, default=6007,
|
|
help='Preferred port to use')
|
|
parser.add_argument('--port-range', type=str, default='6000-7000',
|
|
help='Port range to try if preferred port is unavailable (format: min-max)')
|
|
parser.add_argument('--host', type=str, default='localhost',
|
|
help='Host to bind to')
|
|
parser.add_argument('--no-browser', action='store_true',
|
|
help='Do not open browser automatically')
|
|
parser.add_argument('--kill-stale', action='store_true',
|
|
help='Kill stale debug instances before starting')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Parse port range
|
|
try:
|
|
min_port, max_port = map(int, args.port_range.split('-'))
|
|
except ValueError:
|
|
logger.error(f"Invalid port range format: {args.port_range}. Use format: min-max")
|
|
return 1
|
|
|
|
# Kill stale instances if requested
|
|
if args.kill_stale:
|
|
logger.info("Killing stale debug instances...")
|
|
count, _ = kill_stale_debug_instances()
|
|
logger.info(f"Killed {count} stale instances")
|
|
|
|
# Get an available port
|
|
try:
|
|
port = get_port_with_fallback(args.preferred_port, min_port, max_port)
|
|
logger.info(f"Using port {port} for TensorBoard")
|
|
except RuntimeError as e:
|
|
logger.error(str(e))
|
|
return 1
|
|
|
|
# Ensure log directory exists
|
|
logdir = os.path.abspath(args.logdir)
|
|
os.makedirs(logdir, exist_ok=True)
|
|
|
|
# Launch TensorBoard
|
|
process = launch_tensorboard(
|
|
logdir=logdir,
|
|
port=port,
|
|
host=args.host,
|
|
open_browser=not args.no_browser
|
|
)
|
|
|
|
# Wait for process to end (it shouldn't unless there's an error or user kills it)
|
|
try:
|
|
return_code = process.wait()
|
|
if return_code != 0:
|
|
logger.error(f"TensorBoard exited with code {return_code}")
|
|
return return_code
|
|
except KeyboardInterrupt:
|
|
logger.info("Received keyboard interrupt, shutting down TensorBoard...")
|
|
process.terminate()
|
|
try:
|
|
process.wait(timeout=5)
|
|
except subprocess.TimeoutExpired:
|
|
logger.warning("TensorBoard didn't terminate gracefully, forcing kill")
|
|
process.kill()
|
|
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main()) |