203 lines
6.9 KiB
Python
203 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
TensorBoard Integration for Dashboard
|
|
|
|
This module provides integration between the trading dashboard and TensorBoard,
|
|
allowing training metrics to be visualized in real-time.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import threading
|
|
import time
|
|
import logging
|
|
import webbrowser
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class TensorBoardIntegration:
|
|
"""
|
|
TensorBoard integration for dashboard
|
|
|
|
Provides methods to start TensorBoard server and access training metrics
|
|
"""
|
|
|
|
def __init__(self, log_dir: str = "runs", port: int = 6006):
|
|
"""
|
|
Initialize TensorBoard integration
|
|
|
|
Args:
|
|
log_dir: Directory containing TensorBoard logs
|
|
port: Port to run TensorBoard on
|
|
"""
|
|
self.log_dir = log_dir
|
|
self.port = port
|
|
self.process = None
|
|
self.url = f"http://localhost:{port}"
|
|
self.is_running = False
|
|
self.latest_metrics = {}
|
|
|
|
# Create log directory if it doesn't exist
|
|
os.makedirs(log_dir, exist_ok=True)
|
|
|
|
def start_tensorboard(self, open_browser: bool = False) -> bool:
|
|
"""
|
|
Start TensorBoard server in a separate process
|
|
|
|
Args:
|
|
open_browser: Whether to open browser automatically
|
|
|
|
Returns:
|
|
bool: True if TensorBoard was started successfully
|
|
"""
|
|
if self.is_running:
|
|
logger.info("TensorBoard is already running")
|
|
return True
|
|
|
|
try:
|
|
# Check if TensorBoard is available
|
|
try:
|
|
import tensorboard
|
|
logger.info(f"TensorBoard version {tensorboard.__version__} available")
|
|
except ImportError:
|
|
logger.warning("TensorBoard not installed. Install with: pip install tensorboard")
|
|
return False
|
|
|
|
# Check if log directory exists and has content
|
|
log_dir_path = Path(self.log_dir)
|
|
if not log_dir_path.exists():
|
|
logger.warning(f"Log directory {self.log_dir} does not exist")
|
|
os.makedirs(self.log_dir, exist_ok=True)
|
|
logger.info(f"Created log directory {self.log_dir}")
|
|
|
|
# Start TensorBoard process
|
|
cmd = [
|
|
sys.executable,
|
|
"-m",
|
|
"tensorboard.main",
|
|
"--logdir", self.log_dir,
|
|
"--port", str(self.port),
|
|
"--reload_interval", "5", # Reload data every 5 seconds
|
|
"--reload_multifile", "true" # Better handling of multiple log files
|
|
]
|
|
|
|
logger.info(f"Starting TensorBoard: {' '.join(cmd)}")
|
|
|
|
# Start process without capturing output
|
|
self.process = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True
|
|
)
|
|
|
|
# Wait a moment for TensorBoard to start
|
|
time.sleep(2)
|
|
|
|
# Check if process is running
|
|
if self.process.poll() is None:
|
|
self.is_running = True
|
|
logger.info(f"TensorBoard started at {self.url}")
|
|
|
|
# Open browser if requested
|
|
if open_browser:
|
|
try:
|
|
webbrowser.open(self.url)
|
|
logger.info("Browser opened automatically")
|
|
except Exception as e:
|
|
logger.warning(f"Could not open browser: {e}")
|
|
|
|
# Start monitoring thread
|
|
threading.Thread(target=self._monitor_process, daemon=True).start()
|
|
|
|
return True
|
|
else:
|
|
stdout, stderr = self.process.communicate()
|
|
logger.error(f"TensorBoard failed to start: {stderr}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error starting TensorBoard: {e}")
|
|
return False
|
|
|
|
def _monitor_process(self):
|
|
"""Monitor TensorBoard process and capture output"""
|
|
try:
|
|
while self.process and self.process.poll() is None:
|
|
# Read output line by line
|
|
for line in iter(self.process.stdout.readline, ''):
|
|
if line:
|
|
line = line.strip()
|
|
if line:
|
|
logger.debug(f"TensorBoard: {line}")
|
|
|
|
time.sleep(0.1)
|
|
|
|
# Process has ended
|
|
self.is_running = False
|
|
logger.info("TensorBoard process has ended")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error monitoring TensorBoard process: {e}")
|
|
|
|
def stop_tensorboard(self):
|
|
"""Stop TensorBoard server"""
|
|
if self.process and self.process.poll() is None:
|
|
try:
|
|
self.process.terminate()
|
|
self.process.wait(timeout=5)
|
|
logger.info("TensorBoard stopped")
|
|
except subprocess.TimeoutExpired:
|
|
self.process.kill()
|
|
logger.warning("TensorBoard process killed after timeout")
|
|
except Exception as e:
|
|
logger.error(f"Error stopping TensorBoard: {e}")
|
|
|
|
self.is_running = False
|
|
|
|
def get_tensorboard_url(self) -> str:
|
|
"""Get TensorBoard URL"""
|
|
return self.url
|
|
|
|
def is_tensorboard_running(self) -> bool:
|
|
"""Check if TensorBoard is running"""
|
|
if self.process:
|
|
return self.process.poll() is None
|
|
return False
|
|
|
|
def get_latest_metrics(self) -> Dict[str, Any]:
|
|
"""
|
|
Get latest training metrics from TensorBoard
|
|
|
|
This is a placeholder - in a real implementation, you would
|
|
parse TensorBoard event files to extract metrics
|
|
"""
|
|
# In a real implementation, you would parse TensorBoard event files
|
|
# For now, return placeholder data
|
|
return {
|
|
"training_active": self.is_running,
|
|
"tensorboard_url": self.url,
|
|
"metrics_available": self.is_running
|
|
}
|
|
|
|
# Singleton instance
|
|
_tensorboard_integration = None
|
|
|
|
def get_tensorboard_integration(log_dir: str = "runs", port: int = 6006) -> TensorBoardIntegration:
|
|
"""
|
|
Get TensorBoard integration singleton instance
|
|
|
|
Args:
|
|
log_dir: Directory containing TensorBoard logs
|
|
port: Port to run TensorBoard on
|
|
|
|
Returns:
|
|
TensorBoardIntegration: Singleton instance
|
|
"""
|
|
global _tensorboard_integration
|
|
if _tensorboard_integration is None:
|
|
_tensorboard_integration = TensorBoardIntegration(log_dir, port)
|
|
return _tensorboard_integration |