gogo2/start_monitoring.py
Dobromir Popov 310f3c5bf9 wip
2025-05-24 09:59:11 +03:00

160 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""
Helper script to start monitoring services for RL training
"""
import subprocess
import sys
import time
import requests
import os
import json
from pathlib import Path
# Available ports to try for TensorBoard
TENSORBOARD_PORTS = [6006, 6007, 6008, 6009, 6010, 6011, 6012]
def check_port(port, service_name):
"""Check if a service is running on the specified port"""
try:
response = requests.get(f"http://localhost:{port}", timeout=3)
print(f"{service_name} is running on port {port}")
return True
except requests.exceptions.RequestException:
return False
def is_port_in_use(port):
"""Check if a port is already in use"""
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
s.bind(('localhost', port))
return False
except OSError:
return True
def find_available_port(ports_list, service_name):
"""Find an available port from the list"""
for port in ports_list:
if not is_port_in_use(port):
print(f"🔍 Found available port {port} for {service_name}")
return port
else:
print(f"⚠️ Port {port} is already in use")
return None
def save_port_config(tensorboard_port):
"""Save the port configuration to a file"""
config = {
"tensorboard_port": tensorboard_port,
"web_dashboard_port": 8051
}
with open("monitoring_ports.json", "w") as f:
json.dump(config, f, indent=2)
print(f"💾 Port configuration saved to monitoring_ports.json")
def start_tensorboard():
"""Start TensorBoard in background on an available port"""
try:
# First check if TensorBoard is already running on any of our ports
for port in TENSORBOARD_PORTS:
if check_port(port, "TensorBoard"):
print(f"✅ TensorBoard already running on port {port}")
save_port_config(port)
return port
# Find an available port
port = find_available_port(TENSORBOARD_PORTS, "TensorBoard")
if port is None:
print(f"❌ No available ports found in range {TENSORBOARD_PORTS}")
return None
print(f"🚀 Starting TensorBoard on port {port}...")
# Create runs directory if it doesn't exist
Path("runs").mkdir(exist_ok=True)
# Start TensorBoard
if os.name == 'nt': # Windows
subprocess.Popen([
sys.executable, "-m", "tensorboard",
"--logdir=runs", f"--port={port}", "--reload_interval=1"
], creationflags=subprocess.CREATE_NEW_CONSOLE)
else: # Linux/Mac
subprocess.Popen([
sys.executable, "-m", "tensorboard",
"--logdir=runs", f"--port={port}", "--reload_interval=1"
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# Wait for TensorBoard to start
print(f"⏳ Waiting for TensorBoard to start on port {port}...")
for i in range(15):
time.sleep(2)
if check_port(port, "TensorBoard"):
save_port_config(port)
return port
print(f"⚠️ TensorBoard failed to start on port {port} within 30 seconds")
return None
except Exception as e:
print(f"❌ Error starting TensorBoard: {e}")
return None
def check_web_dashboard_port():
"""Check if web dashboard port is available"""
port = 8051
if is_port_in_use(port):
print(f"⚠️ Web dashboard port {port} is in use")
# Try alternative ports
for alt_port in [8052, 8053, 8054, 8055]:
if not is_port_in_use(alt_port):
print(f"🔍 Alternative port {alt_port} available for web dashboard")
return alt_port
print("❌ No alternative ports found for web dashboard")
return port
else:
print(f"✅ Web dashboard port {port} is available")
return port
def main():
"""Main function"""
print("=" * 60)
print("🎯 RL TRAINING MONITORING SETUP")
print("=" * 60)
# Check web dashboard port
web_port = check_web_dashboard_port()
# Start TensorBoard
tensorboard_port = start_tensorboard()
print("\n" + "=" * 60)
print("📊 MONITORING STATUS")
print("=" * 60)
if tensorboard_port:
print(f"✅ TensorBoard: http://localhost:{tensorboard_port}")
# Update port config
save_port_config(tensorboard_port)
else:
print("❌ TensorBoard: Failed to start")
print(" Manual start: python -m tensorboard --logdir=runs --port=6007")
if web_port:
print(f"✅ Web Dashboard: Ready on port {web_port}")
print(f"\n🎯 Ready to start RL training!")
if tensorboard_port and web_port != 8051:
print(f"Run: python train_realtime_with_tensorboard.py --episodes 10 --web-port {web_port}")
else:
print("Run: python train_realtime_with_tensorboard.py --episodes 10")
print(f"\n📋 Available URLs:")
if tensorboard_port:
print(f" 📊 TensorBoard: http://localhost:{tensorboard_port}")
if web_port:
print(f" 🌐 Web Dashboard: http://localhost:{web_port} (starts with training)")
if __name__ == "__main__":
main()