gogo2/model_manager.py

"""
Enhanced Model Management System for Trading Dashboard

This system provides:
- Automatic cleanup of old model checkpoints
- Best model tracking with performance metrics
- Configurable retention policies
- Startup model loading
- Performance-based model selection
"""

import os
import json
import shutil
import logging
import torch
import glob
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, asdict
from pathlib import Path
import numpy as np

logger = logging.getLogger(__name__)

@dataclass
class ModelMetrics:
    """Performance metrics for model evaluation"""
    accuracy: float = 0.0
    profit_factor: float = 0.0
    win_rate: float = 0.0
    sharpe_ratio: float = 0.0
    max_drawdown: float = 0.0
    total_trades: int = 0
    avg_trade_duration: float = 0.0
    confidence_score: float = 0.0

    def get_composite_score(self) -> float:
        """Calculate composite performance score"""
        # Weighted composite score
        weights = {
            'profit_factor': 0.3,
            'sharpe_ratio': 0.25,
            'win_rate': 0.2,
            'accuracy': 0.15,
            'confidence_score': 0.1
        }

        # Normalize values to 0-1 range
        normalized_pf = min(max(self.profit_factor / 3.0, 0), 1)  # PF of 3+ = 1.0
        normalized_sharpe = min(max((self.sharpe_ratio + 2) / 4, 0), 1)  # Sharpe -2 to 2 -> 0 to 1
        normalized_win_rate = self.win_rate
        normalized_accuracy = self.accuracy
        normalized_confidence = self.confidence_score

        # Apply penalties for poor performance
        drawdown_penalty = max(0, 1 - self.max_drawdown / 0.2)  # Penalty for >20% drawdown

        score = (
            weights['profit_factor'] * normalized_pf +
            weights['sharpe_ratio'] * normalized_sharpe +
            weights['win_rate'] * normalized_win_rate +
            weights['accuracy'] * normalized_accuracy +
            weights['confidence_score'] * normalized_confidence
        ) * drawdown_penalty

        return min(max(score, 0), 1)

@dataclass
class ModelInfo:
    """Complete model information and metadata"""
    model_type: str  # 'cnn', 'rl', 'transformer'
    model_name: str
    file_path: str
    creation_time: datetime
    last_updated: datetime
    file_size_mb: float
    metrics: ModelMetrics
    training_episodes: int = 0
    model_version: str = "1.0"

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for JSON serialization"""
        data = asdict(self)
        data['creation_time'] = self.creation_time.isoformat()
        data['last_updated'] = self.last_updated.isoformat()
        return data

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'ModelInfo':
        """Create from dictionary"""
        data['creation_time'] = datetime.fromisoformat(data['creation_time'])
        data['last_updated'] = datetime.fromisoformat(data['last_updated'])
        data['metrics'] = ModelMetrics(**data['metrics'])
        return cls(**data)

class ModelManager:
    """Enhanced model management system"""

    def __init__(self, base_dir: str = ".", config: Optional[Dict[str, Any]] = None):
        self.base_dir = Path(base_dir)
        self.config = config or self._get_default_config()

        # Model directories
        self.models_dir = self.base_dir / "models"
        self.nn_models_dir = self.base_dir / "NN" / "models"
        self.registry_file = self.models_dir / "model_registry.json"
        self.best_models_dir = self.models_dir / "best_models"

        # Create directories
        self.best_models_dir.mkdir(parents=True, exist_ok=True)

        # Model registry
        self.model_registry: Dict[str, ModelInfo] = {}
        self._load_registry()

        logger.info(f"Model Manager initialized - Base: {self.base_dir}")
        logger.info(f"Retention policy: Keep {self.config['max_models_per_type']} best models per type")

    def _get_default_config(self) -> Dict[str, Any]:
        """Get default configuration"""
        return {
            'max_models_per_type': 3,  # Keep top 3 models per type
            'max_total_models': 10,    # Maximum total models to keep
            'cleanup_frequency_hours': 24,  # Cleanup every 24 hours
            'min_performance_threshold': 0.3,  # Minimum composite score
            'max_checkpoint_age_days': 7,      # Delete checkpoints older than 7 days
            'auto_cleanup_enabled': True,
            'backup_before_cleanup': True,
            'model_size_limit_mb': 100,        # Individual model size limit
            'total_storage_limit_gb': 5.0      # Total storage limit
        }

    def _load_registry(self):
        """Load model registry from file"""
        try:
            if self.registry_file.exists():
                with open(self.registry_file, 'r') as f:
                    data = json.load(f)
                    self.model_registry = {
                        k: ModelInfo.from_dict(v) for k, v in data.items()
                    }
                logger.info(f"Loaded {len(self.model_registry)} models from registry")
            else:
                logger.info("No existing model registry found")
        except Exception as e:
            logger.error(f"Error loading model registry: {e}")
            self.model_registry = {}

    def _save_registry(self):
        """Save model registry to file"""
        try:
            self.models_dir.mkdir(parents=True, exist_ok=True)
            with open(self.registry_file, 'w') as f:
                data = {k: v.to_dict() for k, v in self.model_registry.items()}
                json.dump(data, f, indent=2, default=str)
            logger.info(f"Saved registry with {len(self.model_registry)} models")
        except Exception as e:
            logger.error(f"Error saving model registry: {e}")

    def cleanup_all_existing_models(self, confirm: bool = False) -> Dict[str, Any]:
        """
        Clean up all existing model files and prepare for 2-action system training

        Args:
            confirm: If True, perform the cleanup. If False, return what would be cleaned

        Returns:
            Dict with cleanup statistics
        """
        cleanup_stats = {
            'files_found': 0,
            'files_deleted': 0,
            'directories_cleaned': 0,
            'space_freed_mb': 0.0,
            'errors': []
        }

        # Model file patterns for both 2-action and legacy 3-action systems
        model_patterns = [
            "**/*.pt", "**/*.pth", "**/*.h5", "**/*.pkl", "**/*.joblib", "**/*.model",
            "**/checkpoint_*", "**/model_*", "**/cnn_*", "**/dqn_*", "**/rl_*"
        ]

        # Directories to clean
        model_directories = [
            "models/saved",
            "NN/models/saved",
            "NN/models/saved/checkpoints",
            "NN/models/saved/realtime_checkpoints",
            "NN/models/saved/realtime_ticks_checkpoints",
            "model_backups"
        ]

        try:
            # Scan for files to be cleaned
            for directory in model_directories:
                dir_path = Path(self.base_dir) / directory
                if dir_path.exists():
                    for pattern in model_patterns:
                        for file_path in dir_path.glob(pattern):
                            if file_path.is_file():
                                cleanup_stats['files_found'] += 1
                                file_size = file_path.stat().st_size / (1024 * 1024)  # MB
                                cleanup_stats['space_freed_mb'] += file_size

                                if confirm:
                                    try:
                                        file_path.unlink()
                                        cleanup_stats['files_deleted'] += 1
                                        logger.info(f"Deleted model file: {file_path}")
                                    except Exception as e:
                                        cleanup_stats['errors'].append(f"Failed to delete {file_path}: {e}")

            # Clean up empty checkpoint directories
            for directory in model_directories:
                dir_path = Path(self.base_dir) / directory
                if dir_path.exists():
                    for subdir in dir_path.rglob("*"):
                        if subdir.is_dir() and not any(subdir.iterdir()):
                            if confirm:
                                try:
                                    subdir.rmdir()
                                    cleanup_stats['directories_cleaned'] += 1
                                    logger.info(f"Removed empty directory: {subdir}")
                                except Exception as e:
                                    cleanup_stats['errors'].append(f"Failed to remove directory {subdir}: {e}")

            if confirm:
                # Clear the registry for fresh start with 2-action system
                self.model_registry = {
                    'models': {},
                    'metadata': {
                        'last_updated': datetime.now().isoformat(),
                        'total_models': 0,
                        'system_type': '2_action',  # Mark as 2-action system
                        'action_space': ['SELL', 'BUY'],
                        'version': '2.0'
                    }
                }
                self._save_registry()

                logger.info("=" * 60)
                logger.info("MODEL CLEANUP COMPLETED - 2-ACTION SYSTEM READY")
                logger.info(f"Files deleted: {cleanup_stats['files_deleted']}")
                logger.info(f"Space freed: {cleanup_stats['space_freed_mb']:.2f} MB")
                logger.info(f"Directories cleaned: {cleanup_stats['directories_cleaned']}")
                logger.info("Registry reset for 2-action system (BUY/SELL)")
                logger.info("Ready for fresh training with intelligent position management")
                logger.info("=" * 60)
            else:
                logger.info("=" * 60)
                logger.info("MODEL CLEANUP PREVIEW - 2-ACTION SYSTEM MIGRATION")
                logger.info(f"Files to delete: {cleanup_stats['files_found']}")
                logger.info(f"Space to free: {cleanup_stats['space_freed_mb']:.2f} MB")
                logger.info("Run with confirm=True to perform cleanup")
                logger.info("=" * 60)

        except Exception as e:
            cleanup_stats['errors'].append(f"Cleanup error: {e}")
            logger.error(f"Error during model cleanup: {e}")

        return cleanup_stats

    def register_model(self, model_path: str, model_type: str, metrics: Optional[ModelMetrics] = None) -> str:
        """
        Register a new model in the 2-action system

        Args:
            model_path: Path to the model file
            model_type: Type of model ('cnn', 'rl', 'transformer')
            metrics: Performance metrics

        Returns:
            str: Unique model name/ID
        """
        if not Path(model_path).exists():
            raise FileNotFoundError(f"Model file not found: {model_path}")

        # Generate unique model name
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        model_name = f"{model_type}_2action_{timestamp}"

        # Get file info
        file_path = Path(model_path)
        file_size_mb = file_path.stat().st_size / (1024 * 1024)

        # Default metrics for 2-action system
        if metrics is None:
            metrics = ModelMetrics(
                accuracy=0.0,
                profit_factor=1.0,
                win_rate=0.5,
                sharpe_ratio=0.0,
                max_drawdown=0.0,
                confidence_score=0.5
            )

        # Create model info
        model_info = ModelInfo(
            model_type=model_type,
            model_name=model_name,
            file_path=str(file_path.absolute()),
            creation_time=datetime.now(),
            last_updated=datetime.now(),
            file_size_mb=file_size_mb,
            metrics=metrics,
            model_version="2.0"  # 2-action system version
        )

        # Add to registry
        self.model_registry['models'][model_name] = model_info.to_dict()
        self.model_registry['metadata']['total_models'] = len(self.model_registry['models'])
        self.model_registry['metadata']['last_updated'] = datetime.now().isoformat()
        self.model_registry['metadata']['system_type'] = '2_action'
        self.model_registry['metadata']['action_space'] = ['SELL', 'BUY']

        self._save_registry()

        # Cleanup old models if necessary
        self._cleanup_models_by_type(model_type)

        logger.info(f"Registered 2-action model: {model_name}")
        logger.info(f"Model type: {model_type}, Size: {file_size_mb:.2f} MB")
        logger.info(f"Performance score: {metrics.get_composite_score():.4f}")

        return model_name

    def _should_keep_model(self, model_info: ModelInfo) -> bool:
        """Determine if model should be kept based on performance"""
        score = model_info.metrics.get_composite_score()

        # Check minimum threshold
        if score < self.config['min_performance_threshold']:
            return False

        # Check size limit
        if model_info.file_size_mb > self.config['model_size_limit_mb']:
            logger.warning(f"Model too large: {model_info.file_size_mb:.1f}MB > {self.config['model_size_limit_mb']}MB")
            return False

        # Check if better than existing models of same type
        existing_models = self.get_models_by_type(model_info.model_type)
        if len(existing_models) >= self.config['max_models_per_type']:
            # Find worst performing model
            worst_model = min(existing_models.values(), key=lambda m: m.metrics.get_composite_score())
            if score <= worst_model.metrics.get_composite_score():
                return False

        return True

    def _cleanup_models_by_type(self, model_type: str):
        """Cleanup old models of specific type, keeping only the best ones"""
        models_of_type = self.get_models_by_type(model_type)
        max_keep = self.config['max_models_per_type']

        if len(models_of_type) <= max_keep:
            return

        # Sort by performance score
        sorted_models = sorted(
            models_of_type.items(),
            key=lambda x: x[1].metrics.get_composite_score(),
            reverse=True
        )

        # Keep only the best models
        models_to_keep = sorted_models[:max_keep]
        models_to_remove = sorted_models[max_keep:]

        for model_name, model_info in models_to_remove:
            try:
                # Remove file
                model_path = Path(model_info.file_path)
                if model_path.exists():
                    model_path.unlink()

                # Remove from registry
                del self.model_registry[model_name]

                logger.info(f"Removed old model: {model_name} (Score: {model_info.metrics.get_composite_score():.3f})")

            except Exception as e:
                logger.error(f"Error removing model {model_name}: {e}")

    def get_models_by_type(self, model_type: str) -> Dict[str, ModelInfo]:
        """Get all models of a specific type"""
        return {
            name: info for name, info in self.model_registry.items()
            if info.model_type == model_type
        }

    def get_best_model(self, model_type: str) -> Optional[ModelInfo]:
        """Get the best performing model of a specific type"""
        models_of_type = self.get_models_by_type(model_type)

        if not models_of_type:
            return None

        return max(models_of_type.values(), key=lambda m: m.metrics.get_composite_score())

    def load_best_models(self) -> Dict[str, Any]:
        """Load the best models for each type"""
        loaded_models = {}

        for model_type in ['cnn', 'rl', 'transformer']:
            best_model = self.get_best_model(model_type)

            if best_model:
                try:
                    model_path = Path(best_model.file_path)
                    if model_path.exists():
                        # Load the model
                        model_data = torch.load(model_path, map_location='cpu')
                        loaded_models[model_type] = {
                            'model': model_data,
                            'info': best_model,
                            'path': str(model_path)
                        }
                        logger.info(f"Loaded best {model_type} model: {best_model.model_name} "
                                   f"(Score: {best_model.metrics.get_composite_score():.3f})")
                    else:
                        logger.warning(f"Best {model_type} model file not found: {model_path}")
                except Exception as e:
                    logger.error(f"Error loading {model_type} model: {e}")
            else:
                logger.info(f"No {model_type} model available")

        return loaded_models

    def update_model_performance(self, model_name: str, metrics: ModelMetrics):
        """Update performance metrics for a model"""
        if model_name in self.model_registry:
            self.model_registry[model_name].metrics = metrics
            self.model_registry[model_name].last_updated = datetime.now()
            self._save_registry()

            logger.info(f"Updated metrics for {model_name}: Score {metrics.get_composite_score():.3f}")
        else:
            logger.warning(f"Model {model_name} not found in registry")

    def get_storage_stats(self) -> Dict[str, Any]:
        """Get storage usage statistics"""
        total_size_mb = 0
        model_count = 0

        for model_info in self.model_registry.values():
            total_size_mb += model_info.file_size_mb
            model_count += 1

        # Check actual storage usage
        actual_size_mb = 0
        if self.best_models_dir.exists():
            actual_size_mb = sum(
                f.stat().st_size for f in self.best_models_dir.rglob('*') if f.is_file()
            ) / 1024 / 1024

        return {
            'total_models': model_count,
            'registered_size_mb': total_size_mb,
            'actual_size_mb': actual_size_mb,
            'storage_limit_gb': self.config['total_storage_limit_gb'],
            'utilization_percent': (actual_size_mb / 1024) / self.config['total_storage_limit_gb'] * 100,
            'models_by_type': {
                model_type: len(self.get_models_by_type(model_type))
                for model_type in ['cnn', 'rl', 'transformer']
            }
        }

    def get_model_leaderboard(self) -> List[Dict[str, Any]]:
        """Get model performance leaderboard"""
        leaderboard = []

        for model_name, model_info in self.model_registry.items():
            leaderboard.append({
                'name': model_name,
                'type': model_info.model_type,
                'score': model_info.metrics.get_composite_score(),
                'profit_factor': model_info.metrics.profit_factor,
                'win_rate': model_info.metrics.win_rate,
                'sharpe_ratio': model_info.metrics.sharpe_ratio,
                'size_mb': model_info.file_size_mb,
                'age_days': (datetime.now() - model_info.creation_time).days,
                'last_updated': model_info.last_updated.strftime('%Y-%m-%d %H:%M')
            })

        # Sort by score
        leaderboard.sort(key=lambda x: x['score'], reverse=True)

        return leaderboard

    def cleanup_checkpoints(self) -> Dict[str, Any]:
        """Clean up old checkpoint files"""
        cleanup_summary = {
            'deleted_files': 0,
            'freed_space_mb': 0,
            'errors': []
        }

        cutoff_date = datetime.now() - timedelta(days=self.config['max_checkpoint_age_days'])

        # Search for checkpoint files
        checkpoint_patterns = [
            "**/checkpoint_*.pt",
            "**/model_*.pt",
            "**/*checkpoint*",
            "**/epoch_*.pt"
        ]

        for pattern in checkpoint_patterns:
            for file_path in self.base_dir.rglob(pattern):
                if "best_models" not in str(file_path) and file_path.is_file():
                    try:
                        file_time = datetime.fromtimestamp(file_path.stat().st_mtime)
                        if file_time < cutoff_date:
                            size_mb = file_path.stat().st_size / 1024 / 1024
                            file_path.unlink()
                            cleanup_summary['deleted_files'] += 1
                            cleanup_summary['freed_space_mb'] += size_mb
                    except Exception as e:
                        error_msg = f"Error deleting checkpoint {file_path}: {e}"
                        logger.error(error_msg)
                        cleanup_summary['errors'].append(error_msg)

        if cleanup_summary['deleted_files'] > 0:
            logger.info(f"Checkpoint cleanup: Deleted {cleanup_summary['deleted_files']} files, "
                       f"freed {cleanup_summary['freed_space_mb']:.1f}MB")

        return cleanup_summary

def create_model_manager() -> ModelManager:
    """Create and initialize the global model manager"""
    return ModelManager()

# Example usage
if __name__ == "__main__":
    # Configure logging
    logging.basicConfig(level=logging.INFO)

    # Create model manager
    manager = ModelManager()

    # Clean up all existing models (with confirmation)
    print("WARNING: This will delete ALL existing models!")
    print("Type 'CONFIRM' to proceed:")
    user_input = input().strip()

    if user_input == "CONFIRM":
        cleanup_result = manager.cleanup_all_existing_models(confirm=True)
        print(f"\nCleanup complete:")
        print(f"- Deleted {cleanup_result['files_deleted']} files")
        print(f"- Freed {cleanup_result['space_freed_mb']:.1f}MB of space")
        print(f"- Cleaned {cleanup_result['directories_cleaned']} directories")

        if cleanup_result['errors']:
            print(f"- {len(cleanup_result['errors'])} errors occurred")
    else:
        print("Cleanup cancelled")