155 lines
5.1 KiB
Python
155 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
TensorBoard Launch Script
|
|
|
|
Starts TensorBoard server for monitoring training progress.
|
|
Visualizes training metrics, rewards, state information, and model performance.
|
|
|
|
This script can be run standalone or integrated with the dashboard.
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
import time
|
|
import webbrowser
|
|
import argparse
|
|
from pathlib import Path
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def start_tensorboard(logdir="runs", port=6006, open_browser=True):
|
|
"""
|
|
Start TensorBoard server programmatically
|
|
|
|
Args:
|
|
logdir: Directory containing TensorBoard logs
|
|
port: Port to run TensorBoard on
|
|
open_browser: Whether to open browser automatically
|
|
|
|
Returns:
|
|
subprocess.Popen: TensorBoard process
|
|
"""
|
|
# Set log directory
|
|
runs_dir = Path(logdir)
|
|
if not runs_dir.exists():
|
|
logger.warning(f"No '{logdir}' directory found. Creating it.")
|
|
runs_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Check if there are any log directories
|
|
log_dirs = list(runs_dir.glob("*"))
|
|
if not log_dirs:
|
|
logger.warning(f"No training logs found in '{logdir}' directory.")
|
|
else:
|
|
logger.info(f"Found {len(log_dirs)} training sessions")
|
|
|
|
# List available sessions
|
|
logger.info("Available training sessions:")
|
|
for i, log_dir in enumerate(sorted(log_dirs), 1):
|
|
logger.info(f" {i}. {log_dir.name}")
|
|
|
|
try:
|
|
logger.info(f"Starting TensorBoard on port {port}...")
|
|
|
|
# Try to open browser automatically if requested
|
|
if open_browser:
|
|
try:
|
|
webbrowser.open(f"http://localhost:{port}")
|
|
logger.info("Browser opened automatically")
|
|
except Exception as e:
|
|
logger.warning(f"Could not open browser automatically: {e}")
|
|
|
|
# Start TensorBoard process with enhanced options
|
|
cmd = [
|
|
sys.executable,
|
|
"-m",
|
|
"tensorboard.main",
|
|
"--logdir", str(runs_dir),
|
|
"--port", str(port),
|
|
"--samples_per_plugin", "images=100,audio=100,text=100",
|
|
"--reload_interval", "5", # Reload data every 5 seconds
|
|
"--reload_multifile", "true" # Better handling of multiple log files
|
|
]
|
|
|
|
logger.info("TensorBoard is running with enhanced training visualization!")
|
|
logger.info(f"View training metrics at: http://localhost:{port}")
|
|
logger.info("Available dashboards:")
|
|
logger.info(" - SCALARS: Training metrics, rewards, and losses")
|
|
logger.info(" - HISTOGRAMS: Feature distributions and model weights")
|
|
logger.info(" - TIME SERIES: Training progress over time")
|
|
|
|
# Start TensorBoard process
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True
|
|
)
|
|
|
|
# Return process for management
|
|
return process
|
|
|
|
except FileNotFoundError:
|
|
logger.error("TensorBoard not found. Install with: pip install tensorboard")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Error starting TensorBoard: {e}")
|
|
return None
|
|
|
|
def main():
|
|
"""Launch TensorBoard with enhanced visualization options"""
|
|
|
|
# Parse command line arguments
|
|
parser = argparse.ArgumentParser(description="Launch TensorBoard for training visualization")
|
|
parser.add_argument("--port", type=int, default=6006, help="Port to run TensorBoard on")
|
|
parser.add_argument("--logdir", type=str, default="runs", help="Directory containing TensorBoard logs")
|
|
parser.add_argument("--no-browser", action="store_true", help="Don't open browser automatically")
|
|
parser.add_argument("--dashboard-integration", action="store_true", help="Run in dashboard integration mode")
|
|
args = parser.parse_args()
|
|
|
|
# Start TensorBoard
|
|
process = start_tensorboard(
|
|
logdir=args.logdir,
|
|
port=args.port,
|
|
open_browser=not args.no_browser
|
|
)
|
|
|
|
if process is None:
|
|
return 1
|
|
|
|
# If running in dashboard integration mode, return immediately
|
|
if args.dashboard_integration:
|
|
return 0
|
|
|
|
# Otherwise, wait for process to complete
|
|
try:
|
|
print("\n" + "="*70)
|
|
print("🔥 TensorBoard is running with enhanced training visualization!")
|
|
print(f"📈 View training metrics at: http://localhost:{args.port}")
|
|
print("⏹️ Press Ctrl+C to stop TensorBoard")
|
|
print("="*70 + "\n")
|
|
|
|
# Wait for process to complete or user interrupt
|
|
process.wait()
|
|
return 0
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n🛑 TensorBoard stopped")
|
|
process.terminate()
|
|
try:
|
|
process.wait(timeout=5)
|
|
except subprocess.TimeoutExpired:
|
|
process.kill()
|
|
return 0
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
return 1
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main()) |