gogo2/core/model_output_manager.py

"""
Model Output Manager

This module provides a centralized storage and management system for model outputs,
enabling cross-model feeding and evaluation.
"""

import os
import json
import logging
import time
from datetime import datetime
from typing import Dict, List, Optional, Any
from threading import Lock

from .data_models import ModelOutput

logger = logging.getLogger(__name__)

class ModelOutputManager:
    """
    Centralized storage and management system for model outputs

    This class:
    1. Stores model outputs for all models
    2. Provides access to current and historical outputs
    3. Handles persistence of outputs to disk
    4. Supports evaluation of model performance
    """

    def __init__(self, cache_dir: str = "cache/model_outputs", max_history: int = 1000):
        """
        Initialize the model output manager

        Args:
            cache_dir: Directory to store model outputs
            max_history: Maximum number of historical outputs to keep per model
        """
        self.cache_dir = cache_dir
        self.max_history = max_history
        self.outputs_lock = Lock()

        # Current outputs for each model and symbol
        # {symbol: {model_name: ModelOutput}}
        self.current_outputs: Dict[str, Dict[str, ModelOutput]] = {}

        # Historical outputs for each model and symbol
        # {symbol: {model_name: List[ModelOutput]}}
        self.historical_outputs: Dict[str, Dict[str, List[ModelOutput]]] = {}

        # Performance metrics for each model and symbol
        # {symbol: {model_name: Dict[str, float]}}
        self.performance_metrics: Dict[str, Dict[str, Dict[str, float]]] = {}

        # Create cache directory if it doesn't exist
        os.makedirs(cache_dir, exist_ok=True)

        logger.info(f"ModelOutputManager initialized with cache_dir: {cache_dir}")

    def store_output(self, model_output: ModelOutput) -> bool:
        """
        Store a model output

        Args:
            model_output: Model output to store

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            symbol = model_output.symbol
            model_name = model_output.model_name

            with self.outputs_lock:
                # Initialize dictionaries if they don't exist
                if symbol not in self.current_outputs:
                    self.current_outputs[symbol] = {}
                if symbol not in self.historical_outputs:
                    self.historical_outputs[symbol] = {}
                if model_name not in self.historical_outputs[symbol]:
                    self.historical_outputs[symbol][model_name] = []

                # Store current output
                self.current_outputs[symbol][model_name] = model_output

                # Add to historical outputs
                self.historical_outputs[symbol][model_name].append(model_output)

                # Limit historical outputs
                if len(self.historical_outputs[symbol][model_name]) > self.max_history:
                    self.historical_outputs[symbol][model_name] = self.historical_outputs[symbol][model_name][-self.max_history:]

            # Persist output to disk
            self._persist_output(model_output)

            return True

        except Exception as e:
            logger.error(f"Error storing model output: {e}")
            return False

    def get_current_output(self, symbol: str, model_name: str) -> Optional[ModelOutput]:
        """
        Get the current output for a model and symbol

        Args:
            symbol: Symbol to get output for
            model_name: Model name to get output for

        Returns:
            ModelOutput: Current output, or None if not available
        """
        try:
            with self.outputs_lock:
                if symbol in self.current_outputs and model_name in self.current_outputs[symbol]:
                    return self.current_outputs[symbol][model_name]
            return None

        except Exception as e:
            logger.error(f"Error getting current output: {e}")
            return None

    def get_all_current_outputs(self, symbol: str) -> Dict[str, ModelOutput]:
        """
        Get all current outputs for a symbol

        Args:
            symbol: Symbol to get outputs for

        Returns:
            Dict[str, ModelOutput]: Dictionary of model name to output
        """
        try:
            with self.outputs_lock:
                if symbol in self.current_outputs:
                    return self.current_outputs[symbol].copy()
            return {}

        except Exception as e:
            logger.error(f"Error getting all current outputs: {e}")
            return {}

    def get_historical_outputs(self, symbol: str, model_name: str, limit: int = None) -> List[ModelOutput]:
        """
        Get historical outputs for a model and symbol

        Args:
            symbol: Symbol to get outputs for
            model_name: Model name to get outputs for
            limit: Maximum number of outputs to return, None for all

        Returns:
            List[ModelOutput]: List of historical outputs
        """
        try:
            with self.outputs_lock:
                if symbol in self.historical_outputs and model_name in self.historical_outputs[symbol]:
                    outputs = self.historical_outputs[symbol][model_name]
                    if limit is not None:
                        outputs = outputs[-limit:]
                    return outputs.copy()
            return []

        except Exception as e:
            logger.error(f"Error getting historical outputs: {e}")
            return []

    def evaluate_model_performance(self, symbol: str, model_name: str) -> Dict[str, float]:
        """
        Evaluate model performance based on historical outputs

        Args:
            symbol: Symbol to evaluate
            model_name: Model name to evaluate

        Returns:
            Dict[str, float]: Performance metrics
        """
        try:
            # Get historical outputs
            outputs = self.get_historical_outputs(symbol, model_name)

            if not outputs:
                return {'accuracy': 0.0, 'confidence': 0.0, 'samples': 0}

            # Calculate metrics
            total_outputs = len(outputs)
            total_confidence = sum(output.confidence for output in outputs)
            avg_confidence = total_confidence / total_outputs if total_outputs > 0 else 0.0

            # For now, we don't have ground truth to calculate accuracy
            # In the future, we can add this by comparing predictions to actual market movements

            metrics = {
                'confidence': avg_confidence,
                'samples': total_outputs,
                'last_update': datetime.now().isoformat()
            }

            # Store metrics
            with self.outputs_lock:
                if symbol not in self.performance_metrics:
                    self.performance_metrics[symbol] = {}
                self.performance_metrics[symbol][model_name] = metrics

            return metrics

        except Exception as e:
            logger.error(f"Error evaluating model performance: {e}")
            return {'error': str(e)}

    def get_performance_metrics(self, symbol: str, model_name: str) -> Dict[str, float]:
        """
        Get performance metrics for a model and symbol

        Args:
            symbol: Symbol to get metrics for
            model_name: Model name to get metrics for

        Returns:
            Dict[str, float]: Performance metrics
        """
        try:
            with self.outputs_lock:
                if symbol in self.performance_metrics and model_name in self.performance_metrics[symbol]:
                    return self.performance_metrics[symbol][model_name].copy()

            # If no metrics are available, calculate them
            return self.evaluate_model_performance(symbol, model_name)

        except Exception as e:
            logger.error(f"Error getting performance metrics: {e}")
            return {'error': str(e)}

    def _persist_output(self, model_output: ModelOutput) -> bool:
        """
        Persist a model output to disk

        Args:
            model_output: Model output to persist

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            # Create directory if it doesn't exist
            symbol_dir = os.path.join(self.cache_dir, model_output.symbol.replace('/', '_'))
            os.makedirs(symbol_dir, exist_ok=True)

            # Create filename with timestamp
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{model_output.model_name}_{model_output.symbol.replace('/', '_')}_{timestamp}.json"
            filepath = os.path.join(self.cache_dir, filename)

            # Convert ModelOutput to dictionary
            output_dict = {
                'model_type': model_output.model_type,
                'model_name': model_output.model_name,
                'symbol': model_output.symbol,
                'timestamp': model_output.timestamp.isoformat(),
                'confidence': model_output.confidence,
                'predictions': model_output.predictions,
                'metadata': model_output.metadata
            }

            # Don't store hidden states in file (too large)

            # Write to file
            with open(filepath, 'w') as f:
                json.dump(output_dict, f, indent=2)

            return True

        except Exception as e:
            logger.error(f"Error persisting model output: {e}")
            return False

    def load_outputs_from_disk(self, symbol: str = None, model_name: str = None) -> int:
        """
        Load model outputs from disk

        Args:
            symbol: Symbol to load outputs for, None for all
            model_name: Model name to load outputs for, None for all

        Returns:
            int: Number of outputs loaded
        """
        try:
            # Find all output files
            import glob

            if symbol and model_name:
                pattern = os.path.join(self.cache_dir, f"{model_name}_{symbol.replace('/', '_')}*.json")
            elif symbol:
                pattern = os.path.join(self.cache_dir, f"*_{symbol.replace('/', '_')}*.json")
            elif model_name:
                pattern = os.path.join(self.cache_dir, f"{model_name}_*.json")
            else:
                pattern = os.path.join(self.cache_dir, "*.json")

            output_files = glob.glob(pattern)

            if not output_files:
                logger.info(f"No output files found for pattern: {pattern}")
                return 0

            # Load each file
            loaded_count = 0
            for filepath in output_files:
                try:
                    with open(filepath, 'r') as f:
                        output_dict = json.load(f)

                    # Create ModelOutput
                    model_output = ModelOutput(
                        model_type=output_dict['model_type'],
                        model_name=output_dict['model_name'],
                        symbol=output_dict['symbol'],
                        timestamp=datetime.fromisoformat(output_dict['timestamp']),
                        confidence=output_dict['confidence'],
                        predictions=output_dict['predictions'],
                        hidden_states={},  # Don't load hidden states from disk
                        metadata=output_dict.get('metadata', {})
                    )

                    # Store output
                    self.store_output(model_output)
                    loaded_count += 1

                except Exception as e:
                    logger.error(f"Error loading output file {filepath}: {e}")

            logger.info(f"Loaded {loaded_count} model outputs from disk")
            return loaded_count

        except Exception as e:
            logger.error(f"Error loading outputs from disk: {e}")
            return 0

    def cleanup_old_outputs(self, max_age_days: int = 30) -> int:
        """
        Clean up old output files

        Args:
            max_age_days: Maximum age of files to keep in days

        Returns:
            int: Number of files deleted
        """
        try:
            # Find all output files
            import glob
            output_files = glob.glob(os.path.join(self.cache_dir, "*.json"))

            if not output_files:
                return 0

            # Calculate cutoff time
            cutoff_time = time.time() - (max_age_days * 24 * 60 * 60)

            # Delete old files
            deleted_count = 0
            for filepath in output_files:
                try:
                    # Get file modification time
                    mtime = os.path.getmtime(filepath)

                    # Delete if older than cutoff
                    if mtime < cutoff_time:
                        os.remove(filepath)
                        deleted_count += 1

                except Exception as e:
                    logger.error(f"Error deleting file {filepath}: {e}")

            logger.info(f"Deleted {deleted_count} old model output files")
            return deleted_count

        except Exception as e:
            logger.error(f"Error cleaning up old outputs: {e}")
            return 0