Files
gogo2/run_tensorboard.py
2025-07-22 15:44:59 +03:00

155 lines
5.1 KiB
Python

#!/usr/bin/env python3
"""
TensorBoard Launch Script
Starts TensorBoard server for monitoring training progress.
Visualizes training metrics, rewards, state information, and model performance.
This script can be run standalone or integrated with the dashboard.
"""
import subprocess
import sys
import os
import time
import webbrowser
import argparse
from pathlib import Path
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def start_tensorboard(logdir="runs", port=6006, open_browser=True):
"""
Start TensorBoard server programmatically
Args:
logdir: Directory containing TensorBoard logs
port: Port to run TensorBoard on
open_browser: Whether to open browser automatically
Returns:
subprocess.Popen: TensorBoard process
"""
# Set log directory
runs_dir = Path(logdir)
if not runs_dir.exists():
logger.warning(f"No '{logdir}' directory found. Creating it.")
runs_dir.mkdir(parents=True, exist_ok=True)
# Check if there are any log directories
log_dirs = list(runs_dir.glob("*"))
if not log_dirs:
logger.warning(f"No training logs found in '{logdir}' directory.")
else:
logger.info(f"Found {len(log_dirs)} training sessions")
# List available sessions
logger.info("Available training sessions:")
for i, log_dir in enumerate(sorted(log_dirs), 1):
logger.info(f" {i}. {log_dir.name}")
try:
logger.info(f"Starting TensorBoard on port {port}...")
# Try to open browser automatically if requested
if open_browser:
try:
webbrowser.open(f"http://localhost:{port}")
logger.info("Browser opened automatically")
except Exception as e:
logger.warning(f"Could not open browser automatically: {e}")
# Start TensorBoard process with enhanced options
cmd = [
sys.executable,
"-m",
"tensorboard.main",
"--logdir", str(runs_dir),
"--port", str(port),
"--samples_per_plugin", "images=100,audio=100,text=100",
"--reload_interval", "5", # Reload data every 5 seconds
"--reload_multifile", "true" # Better handling of multiple log files
]
logger.info("TensorBoard is running with enhanced training visualization!")
logger.info(f"View training metrics at: http://localhost:{port}")
logger.info("Available dashboards:")
logger.info(" - SCALARS: Training metrics, rewards, and losses")
logger.info(" - HISTOGRAMS: Feature distributions and model weights")
logger.info(" - TIME SERIES: Training progress over time")
# Start TensorBoard process
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# Return process for management
return process
except FileNotFoundError:
logger.error("TensorBoard not found. Install with: pip install tensorboard")
return None
except Exception as e:
logger.error(f"Error starting TensorBoard: {e}")
return None
def main():
"""Launch TensorBoard with enhanced visualization options"""
# Parse command line arguments
parser = argparse.ArgumentParser(description="Launch TensorBoard for training visualization")
parser.add_argument("--port", type=int, default=6006, help="Port to run TensorBoard on")
parser.add_argument("--logdir", type=str, default="runs", help="Directory containing TensorBoard logs")
parser.add_argument("--no-browser", action="store_true", help="Don't open browser automatically")
parser.add_argument("--dashboard-integration", action="store_true", help="Run in dashboard integration mode")
args = parser.parse_args()
# Start TensorBoard
process = start_tensorboard(
logdir=args.logdir,
port=args.port,
open_browser=not args.no_browser
)
if process is None:
return 1
# If running in dashboard integration mode, return immediately
if args.dashboard_integration:
return 0
# Otherwise, wait for process to complete
try:
print("\n" + "="*70)
print("🔥 TensorBoard is running with enhanced training visualization!")
print(f"📈 View training metrics at: http://localhost:{args.port}")
print("⏹️ Press Ctrl+C to stop TensorBoard")
print("="*70 + "\n")
# Wait for process to complete or user interrupt
process.wait()
return 0
except KeyboardInterrupt:
print("\n🛑 TensorBoard stopped")
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
return 0
except Exception as e:
print(f"❌ Error: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())