#!/usr/bin/env python3 """ TensorBoard Launcher with Automatic Port Management This script launches TensorBoard with automatic port fallback if the preferred port is in use. It also kills any stale debug instances that might be running. Usage: python launch_tensorboard.py --logdir=path/to/logs --preferred-port=6007 --port-range=6000-7000 """ import os import sys import subprocess import argparse import logging from pathlib import Path # Add project root to path project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if project_root not in sys.path: sys.path.append(project_root) from utils.port_manager import get_port_with_fallback, kill_stale_debug_instances # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger('tensorboard_launcher') def launch_tensorboard(logdir, port, host='localhost', open_browser=True): """ Launch TensorBoard on the specified port Args: logdir (str): Path to log directory port (int): Port to use host (str): Host to bind to open_browser (bool): Whether to open browser automatically Returns: subprocess.Popen: Process object """ cmd = [ sys.executable, "-m", "tensorboard.main", f"--logdir={logdir}", f"--port={port}", f"--host={host}" ] # Add --load_fast=false to improve startup times cmd.append("--load_fast=false") # Control whether to open browser if not open_browser: cmd.append("--window_title=TensorBoard") logger.info(f"Launching TensorBoard: {' '.join(cmd)}") # Use subprocess.Popen to start TensorBoard without waiting for it to finish process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1 ) # Log the first few lines of output to confirm it's starting correctly line_count = 0 for line in process.stdout: logger.info(f"TensorBoard: {line.strip()}") line_count += 1 # Check if TensorBoard has started successfully if "TensorBoard" in line and "http://" in line: url = line.strip().split("http://")[1].split(" ")[0] logger.info(f"TensorBoard available at: http://{url}") # Only log the first few lines if line_count >= 10: break # Continue reading output in background to prevent pipe from filling def read_output(): for line in process.stdout: pass import threading threading.Thread(target=read_output, daemon=True).start() return process def main(): parser = argparse.ArgumentParser(description='Launch TensorBoard with automatic port management') parser.add_argument('--logdir', type=str, default='NN/models/saved/logs', help='Directory containing TensorBoard event files') parser.add_argument('--preferred-port', type=int, default=6007, help='Preferred port to use') parser.add_argument('--port-range', type=str, default='6000-7000', help='Port range to try if preferred port is unavailable (format: min-max)') parser.add_argument('--host', type=str, default='localhost', help='Host to bind to') parser.add_argument('--no-browser', action='store_true', help='Do not open browser automatically') parser.add_argument('--kill-stale', action='store_true', help='Kill stale debug instances before starting') args = parser.parse_args() # Parse port range try: min_port, max_port = map(int, args.port_range.split('-')) except ValueError: logger.error(f"Invalid port range format: {args.port_range}. Use format: min-max") return 1 # Kill stale instances if requested if args.kill_stale: logger.info("Killing stale debug instances...") count, _ = kill_stale_debug_instances() logger.info(f"Killed {count} stale instances") # Get an available port try: port = get_port_with_fallback(args.preferred_port, min_port, max_port) logger.info(f"Using port {port} for TensorBoard") except RuntimeError as e: logger.error(str(e)) return 1 # Ensure log directory exists logdir = os.path.abspath(args.logdir) os.makedirs(logdir, exist_ok=True) # Launch TensorBoard process = launch_tensorboard( logdir=logdir, port=port, host=args.host, open_browser=not args.no_browser ) # Wait for process to end (it shouldn't unless there's an error or user kills it) try: return_code = process.wait() if return_code != 0: logger.error(f"TensorBoard exited with code {return_code}") return return_code except KeyboardInterrupt: logger.info("Received keyboard interrupt, shutting down TensorBoard...") process.terminate() try: process.wait(timeout=5) except subprocess.TimeoutExpired: logger.warning("TensorBoard didn't terminate gracefully, forcing kill") process.kill() return 0 if __name__ == "__main__": sys.exit(main())