gogo2/run_tensorboard.py

#!/usr/bin/env python3
"""
TensorBoard Launch Script

Starts TensorBoard server for monitoring training progress.
Visualizes training metrics, rewards, state information, and model performance.

This script can be run standalone or integrated with the dashboard.
"""

import subprocess
import sys
import os
import time
import webbrowser
import argparse
from pathlib import Path
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def start_tensorboard(logdir="runs", port=6006, open_browser=True):
    """
    Start TensorBoard server programmatically

    Args:
        logdir: Directory containing TensorBoard logs
        port: Port to run TensorBoard on
        open_browser: Whether to open browser automatically

    Returns:
        subprocess.Popen: TensorBoard process
    """
    # Set log directory
    runs_dir = Path(logdir)
    if not runs_dir.exists():
        logger.warning(f"No '{logdir}' directory found. Creating it.")
        runs_dir.mkdir(parents=True, exist_ok=True)

    # Check if there are any log directories
    log_dirs = list(runs_dir.glob("*"))
    if not log_dirs:
        logger.warning(f"No training logs found in '{logdir}' directory.")
    else:
        logger.info(f"Found {len(log_dirs)} training sessions")

        # List available sessions
        logger.info("Available training sessions:")
        for i, log_dir in enumerate(sorted(log_dirs), 1):
            logger.info(f"  {i}. {log_dir.name}")

    try:
        logger.info(f"Starting TensorBoard on port {port}...")

        # Try to open browser automatically if requested
        if open_browser:
            try:
                webbrowser.open(f"http://localhost:{port}")
                logger.info("Browser opened automatically")
            except Exception as e:
                logger.warning(f"Could not open browser automatically: {e}")

        # Start TensorBoard process with enhanced options
        cmd = [
            sys.executable,
            "-m",
            "tensorboard.main",
            "--logdir", str(runs_dir),
            "--port", str(port),
            "--samples_per_plugin", "images=100,audio=100,text=100",
            "--reload_interval", "5",  # Reload data every 5 seconds
            "--reload_multifile", "true"  # Better handling of multiple log files
        ]

        logger.info("TensorBoard is running with enhanced training visualization!")
        logger.info(f"View training metrics at: http://localhost:{port}")
        logger.info("Available dashboards:")
        logger.info("   - SCALARS: Training metrics, rewards, and losses")
        logger.info("   - HISTOGRAMS: Feature distributions and model weights")
        logger.info("   - TIME SERIES: Training progress over time")

        # Start TensorBoard process
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )

        # Return process for management
        return process

    except FileNotFoundError:
        logger.error("TensorBoard not found. Install with: pip install tensorboard")
        return None
    except Exception as e:
        logger.error(f"Error starting TensorBoard: {e}")
        return None

def main():
    """Launch TensorBoard with enhanced visualization options"""

    # Parse command line arguments
    parser = argparse.ArgumentParser(description="Launch TensorBoard for training visualization")
    parser.add_argument("--port", type=int, default=6006, help="Port to run TensorBoard on")
    parser.add_argument("--logdir", type=str, default="runs", help="Directory containing TensorBoard logs")
    parser.add_argument("--no-browser", action="store_true", help="Don't open browser automatically")
    parser.add_argument("--dashboard-integration", action="store_true", help="Run in dashboard integration mode")
    args = parser.parse_args()

    # Start TensorBoard
    process = start_tensorboard(
        logdir=args.logdir,
        port=args.port,
        open_browser=not args.no_browser
    )

    if process is None:
        return 1

    # If running in dashboard integration mode, return immediately
    if args.dashboard_integration:
        return 0

    # Otherwise, wait for process to complete
    try:
        print("\n" + "="*70)
        print("🔥 TensorBoard is running with enhanced training visualization!")
        print(f"📈 View training metrics at: http://localhost:{args.port}")
        print("⏹️  Press Ctrl+C to stop TensorBoard")
        print("="*70 + "\n")

        # Wait for process to complete or user interrupt
        process.wait()
        return 0

    except KeyboardInterrupt:
        print("\n🛑 TensorBoard stopped")
        process.terminate()
        try:
            process.wait(timeout=5)
        except subprocess.TimeoutExpired:
            process.kill()
        return 0
    except Exception as e:
        print(f"❌ Error: {e}")
        return 1

if __name__ == "__main__":
    sys.exit(main())