gogo2/web/tensorboard_integration.py

#!/usr/bin/env python3
"""
TensorBoard Integration for Dashboard

This module provides integration between the trading dashboard and TensorBoard,
allowing training metrics to be visualized in real-time.
"""

import os
import sys
import subprocess
import threading
import time
import logging
import webbrowser
from pathlib import Path
from typing import Optional, Dict, Any

logger = logging.getLogger(__name__)

class TensorBoardIntegration:
    """
    TensorBoard integration for dashboard

    Provides methods to start TensorBoard server and access training metrics
    """

    def __init__(self, log_dir: str = "runs", port: int = 6006):
        """
        Initialize TensorBoard integration

        Args:
            log_dir: Directory containing TensorBoard logs
            port: Port to run TensorBoard on
        """
        self.log_dir = log_dir
        self.port = port
        self.process = None
        self.url = f"http://localhost:{port}"
        self.is_running = False
        self.latest_metrics = {}

        # Create log directory if it doesn't exist
        os.makedirs(log_dir, exist_ok=True)

    def start_tensorboard(self, open_browser: bool = False) -> bool:
        """
        Start TensorBoard server in a separate process

        Args:
            open_browser: Whether to open browser automatically

        Returns:
            bool: True if TensorBoard was started successfully
        """
        if self.is_running:
            logger.info("TensorBoard is already running")
            return True

        try:
            # Check if TensorBoard is available
            try:
                import tensorboard
                logger.info(f"TensorBoard version {tensorboard.__version__} available")
            except ImportError:
                logger.warning("TensorBoard not installed. Install with: pip install tensorboard")
                return False

            # Check if log directory exists and has content
            log_dir_path = Path(self.log_dir)
            if not log_dir_path.exists():
                logger.warning(f"Log directory {self.log_dir} does not exist")
                os.makedirs(self.log_dir, exist_ok=True)
                logger.info(f"Created log directory {self.log_dir}")

            # Start TensorBoard process
            cmd = [
                sys.executable,
                "-m",
                "tensorboard.main",
                "--logdir", self.log_dir,
                "--port", str(self.port),
                "--reload_interval", "5",  # Reload data every 5 seconds
                "--reload_multifile", "true"  # Better handling of multiple log files
            ]

            logger.info(f"Starting TensorBoard: {' '.join(cmd)}")

            # Start process without capturing output
            self.process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )

            # Wait a moment for TensorBoard to start
            time.sleep(2)

            # Check if process is running
            if self.process.poll() is None:
                self.is_running = True
                logger.info(f"TensorBoard started at {self.url}")

                # Open browser if requested
                if open_browser:
                    try:
                        webbrowser.open(self.url)
                        logger.info("Browser opened automatically")
                    except Exception as e:
                        logger.warning(f"Could not open browser: {e}")

                # Start monitoring thread
                threading.Thread(target=self._monitor_process, daemon=True).start()

                return True
            else:
                stdout, stderr = self.process.communicate()
                logger.error(f"TensorBoard failed to start: {stderr}")
                return False

        except Exception as e:
            logger.error(f"Error starting TensorBoard: {e}")
            return False

    def _monitor_process(self):
        """Monitor TensorBoard process and capture output"""
        try:
            while self.process and self.process.poll() is None:
                # Read output line by line
                for line in iter(self.process.stdout.readline, ''):
                    if line:
                        line = line.strip()
                        if line:
                            logger.debug(f"TensorBoard: {line}")

                time.sleep(0.1)

            # Process has ended
            self.is_running = False
            logger.info("TensorBoard process has ended")

        except Exception as e:
            logger.error(f"Error monitoring TensorBoard process: {e}")

    def stop_tensorboard(self):
        """Stop TensorBoard server"""
        if self.process and self.process.poll() is None:
            try:
                self.process.terminate()
                self.process.wait(timeout=5)
                logger.info("TensorBoard stopped")
            except subprocess.TimeoutExpired:
                self.process.kill()
                logger.warning("TensorBoard process killed after timeout")
            except Exception as e:
                logger.error(f"Error stopping TensorBoard: {e}")

            self.is_running = False

    def get_tensorboard_url(self) -> str:
        """Get TensorBoard URL"""
        return self.url

    def is_tensorboard_running(self) -> bool:
        """Check if TensorBoard is running"""
        if self.process:
            return self.process.poll() is None
        return False

    def get_latest_metrics(self) -> Dict[str, Any]:
        """
        Get latest training metrics from TensorBoard

        This is a placeholder - in a real implementation, you would
        parse TensorBoard event files to extract metrics
        """
        # In a real implementation, you would parse TensorBoard event files
        # For now, return placeholder data
        return {
            "training_active": self.is_running,
            "tensorboard_url": self.url,
            "metrics_available": self.is_running
        }

# Singleton instance
_tensorboard_integration = None

def get_tensorboard_integration(log_dir: str = "runs", port: int = 6006) -> TensorBoardIntegration:
    """
    Get TensorBoard integration singleton instance

    Args:
        log_dir: Directory containing TensorBoard logs
        port: Port to run TensorBoard on

    Returns:
        TensorBoardIntegration: Singleton instance
    """
    global _tensorboard_integration
    if _tensorboard_integration is None:
        _tensorboard_integration = TensorBoardIntegration(log_dir, port)
    return _tensorboard_integration