#!/usr/bin/env python3 """ TensorBoard Logger Utility This module provides a centralized way to log training metrics to TensorBoard. It ensures consistent logging across different training components. """ import os import logging from pathlib import Path from datetime import datetime from typing import Dict, Any, Optional, Union, List # Import conditionally to handle missing dependencies gracefully try: from torch.utils.tensorboard import SummaryWriter TENSORBOARD_AVAILABLE = True except ImportError: TENSORBOARD_AVAILABLE = False logger = logging.getLogger(__name__) class TensorBoardLogger: """ Centralized TensorBoard logging utility for training metrics This class provides a consistent interface for logging metrics to TensorBoard across different training components. """ def __init__(self, log_dir: Optional[str] = None, experiment_name: Optional[str] = None, enabled: bool = True): """ Initialize TensorBoard logger Args: log_dir: Base directory for TensorBoard logs (default: 'runs') experiment_name: Name of the experiment (default: timestamp) enabled: Whether TensorBoard logging is enabled """ self.enabled = enabled and TENSORBOARD_AVAILABLE self.writer = None if not self.enabled: if not TENSORBOARD_AVAILABLE: logger.warning("TensorBoard not available. Install with: pip install tensorboard") return # Set up log directory if log_dir is None: log_dir = "runs" # Create experiment name if not provided if experiment_name is None: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") experiment_name = f"training_{timestamp}" # Create full log path self.log_dir = os.path.join(log_dir, experiment_name) # Create writer try: self.writer = SummaryWriter(log_dir=self.log_dir) logger.info(f"TensorBoard logging enabled at: {self.log_dir}") except Exception as e: logger.error(f"Failed to initialize TensorBoard: {e}") self.enabled = False def log_scalar(self, tag: str, value: float, step: int) -> None: """ Log a scalar value to TensorBoard Args: tag: Metric name value: Metric value step: Training step """ if not self.enabled or self.writer is None: return try: self.writer.add_scalar(tag, value, step) except Exception as e: logger.warning(f"Failed to log scalar {tag}: {e}") def log_scalars(self, main_tag: str, tag_value_dict: Dict[str, float], step: int) -> None: """ Log multiple scalar values with the same main tag Args: main_tag: Main tag for the metrics tag_value_dict: Dictionary of tag names to values step: Training step """ if not self.enabled or self.writer is None: return try: self.writer.add_scalars(main_tag, tag_value_dict, step) except Exception as e: logger.warning(f"Failed to log scalars for {main_tag}: {e}") def log_histogram(self, tag: str, values, step: int) -> None: """ Log a histogram to TensorBoard Args: tag: Histogram name values: Values to create histogram from step: Training step """ if not self.enabled or self.writer is None: return try: self.writer.add_histogram(tag, values, step) except Exception as e: logger.warning(f"Failed to log histogram {tag}: {e}") def log_training_metrics(self, metrics: Dict[str, Any], step: int, prefix: str = "Training") -> None: """ Log training metrics to TensorBoard Args: metrics: Dictionary of metric names to values step: Training step prefix: Prefix for metric names """ if not self.enabled or self.writer is None: return for name, value in metrics.items(): if isinstance(value, (int, float)): self.log_scalar(f"{prefix}/{name}", value, step) elif hasattr(value, "shape"): # For numpy arrays or tensors try: self.log_histogram(f"{prefix}/{name}", value, step) except: pass def log_model_metrics(self, model_name: str, metrics: Dict[str, Any], step: int) -> None: """ Log model-specific metrics to TensorBoard Args: model_name: Name of the model metrics: Dictionary of metric names to values step: Training step """ if not self.enabled or self.writer is None: return for name, value in metrics.items(): if isinstance(value, (int, float)): self.log_scalar(f"Model/{model_name}/{name}", value, step) def log_reward_metrics(self, symbol: str, metrics: Dict[str, float], step: int) -> None: """ Log reward-related metrics to TensorBoard Args: symbol: Trading symbol metrics: Dictionary of metric names to values step: Training step """ if not self.enabled or self.writer is None: return for name, value in metrics.items(): self.log_scalar(f"Rewards/{symbol}/{name}", value, step) def log_state_metrics(self, symbol: str, state_info: Dict[str, Any], step: int) -> None: """ Log state-related metrics to TensorBoard Args: symbol: Trading symbol state_info: Dictionary of state information step: Training step """ if not self.enabled or self.writer is None: return # Log state size if "size" in state_info: self.log_scalar(f"State/{symbol}/Size", state_info["size"], step) # Log state quality if "quality" in state_info: self.log_scalar(f"State/{symbol}/Quality", state_info["quality"], step) # Log feature counts if "feature_counts" in state_info: for feature_type, count in state_info["feature_counts"].items(): self.log_scalar(f"State/{symbol}/Features/{feature_type}", count, step) def close(self) -> None: """Close the TensorBoard writer""" if self.enabled and self.writer is not None: try: self.writer.close() logger.info("TensorBoard writer closed") except Exception as e: logger.warning(f"Error closing TensorBoard writer: {e}")