#!/usr/bin/env python3 """ Training Monitor Script Quick script to check the status of realtime training and show key metrics. """ import os import time from pathlib import Path from datetime import datetime import glob def check_training_status(): """Check status of training processes and logs""" print("=" * 60) print("REALTIME RL TRAINING STATUS CHECK") print("=" * 60) # Check TensorBoard logs runs_dir = Path("runs") if runs_dir.exists(): log_dirs = list(runs_dir.glob("rl_training_*")) recent_logs = sorted(log_dirs, key=lambda x: x.name)[-3:] # Last 3 sessions print("\nšŸ“Š RECENT TENSORBOARD LOGS:") for log_dir in recent_logs: # Get creation time stat = log_dir.stat() created = datetime.fromtimestamp(stat.st_ctime) # Check for event files event_files = list(log_dir.glob("*.tfevents.*")) print(f" šŸ“ {log_dir.name}") print(f" Created: {created.strftime('%Y-%m-%d %H:%M:%S')}") print(f" Event files: {len(event_files)}") if event_files: latest_event = max(event_files, key=lambda x: x.stat().st_mtime) modified = datetime.fromtimestamp(latest_event.stat().st_mtime) print(f" Last update: {modified.strftime('%Y-%m-%d %H:%M:%S')}") print() # Check running processes print("šŸ” PROCESS STATUS:") try: import subprocess result = subprocess.run(['tasklist'], capture_output=True, text=True, shell=True) python_processes = [line for line in result.stdout.split('\n') if 'python.exe' in line] print(f" Python processes running: {len(python_processes)}") for i, proc in enumerate(python_processes[:5]): # Show first 5 print(f" {i+1}. {proc.strip()}") except Exception as e: print(f" Error checking processes: {e}") # Check web services print("\n🌐 WEB SERVICES:") print(" TensorBoard: http://localhost:6006") print(" Web Dashboard: http://localhost:8051") # Check model saves models_dir = Path("models/rl") if models_dir.exists(): model_files = list(models_dir.glob("realtime_agent_*.pt")) print(f"\nšŸ’¾ SAVED MODELS: {len(model_files)}") for model_file in sorted(model_files, key=lambda x: x.stat().st_mtime)[-3:]: modified = datetime.fromtimestamp(model_file.stat().st_mtime) print(f" šŸ“„ {model_file.name} - {modified.strftime('%Y-%m-%d %H:%M:%S')}") print("\n" + "=" * 60) print("āœ… MONITORING URLs:") print("šŸ“Š TensorBoard: http://localhost:6006") print("🌐 Dashboard: http://localhost:8051") print("=" * 60) if __name__ == "__main__": try: check_training_status() except KeyboardInterrupt: print("\nMonitoring stopped.") except Exception as e: print(f"Error: {e}")