models cleanup

2025-05-30 03:20:05 +03:00
parent 75dbac1761
commit 2a148b0ac6
21 changed files with 937 additions and 570 deletions
--- a/cleanup_and_setup_models.py
+++ b/cleanup_and_setup_models.py
@ -0,0 +1,285 @@
+#!/usr/bin/env python3
+"""
+Model Cleanup and Training Setup Script
+
+This script:
+1. Backs up current models
+2. Cleans old/conflicting models  
+3. Sets up proper training progression system
+4. Initializes fresh model training
+"""
+
+import os
+import shutil
+import json
+import logging
+from datetime import datetime
+from pathlib import Path
+import torch
+
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+class ModelCleanupManager:
+    """Manager for cleaning up and organizing model files"""
+    
+    def __init__(self):
+        self.root_dir = Path(".")
+        self.models_dir = self.root_dir / "models"
+        self.backup_dir = self.root_dir / "model_backups" / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        self.training_progress_file = self.models_dir / "training_progress.json"
+        
+        # Create backup directory
+        self.backup_dir.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Created backup directory: {self.backup_dir}")
+    
+    def backup_existing_models(self):
+        """Backup all existing models before cleanup"""
+        logger.info("🔄 Backing up existing models...")
+        
+        model_files = [
+            # CNN models
+            "models/cnn_final_20250331_001817.pt.pt",
+            "models/cnn_best.pt.pt", 
+            "models/cnn_BTC_USDT_*.pt",
+            "models/cnn_BTC_USD_*.pt",
+            
+            # RL models
+            "models/trading_agent_*.pt",
+            "models/trading_agent_*.backup",
+            
+            # Other models
+            "models/saved/cnn_model_best.pt"
+        ]
+        
+        # Backup model files
+        backup_count = 0
+        for pattern in model_files:
+            for file_path in self.root_dir.glob(pattern):
+                if file_path.is_file():
+                    backup_path = self.backup_dir / file_path.relative_to(self.root_dir)
+                    backup_path.parent.mkdir(parents=True, exist_ok=True)
+                    shutil.copy2(file_path, backup_path)
+                    backup_count += 1
+                    logger.info(f"  📁 Backed up: {file_path}")
+        
+        logger.info(f"✅ Backed up {backup_count} model files to {self.backup_dir}")
+    
+    def clean_old_models(self):
+        """Remove old/conflicting model files"""
+        logger.info("🧹 Cleaning old model files...")
+        
+        files_to_remove = [
+            # Old CNN models with architecture conflicts
+            "models/cnn_final_20250331_001817.pt.pt",
+            "models/cnn_best.pt.pt",
+            "models/cnn_BTC_USDT_20250329_021800.pt",
+            "models/cnn_BTC_USDT_20250329_021448.pt", 
+            "models/cnn_BTC_USD_20250329_020711.pt",
+            "models/cnn_BTC_USD_20250329_020430.pt",
+            "models/cnn_BTC_USD_20250329_015217.pt",
+            
+            # Old RL models
+            "models/trading_agent_final.pt",
+            "models/trading_agent_best_pnl.pt",
+            "models/trading_agent_best_reward.pt",
+            "models/trading_agent_final.pt.backup",
+            "models/trading_agent_best_net_pnl.pt",
+            "models/trading_agent_best_net_pnl.pt.backup",
+            "models/trading_agent_best_pnl.pt.backup",
+            "models/trading_agent_best_reward.pt.backup",
+            "models/trading_agent_live_trained.pt",
+            
+            # Checkpoint files
+            "models/trading_agent_checkpoint_1650.pt.minimal",
+            "models/trading_agent_checkpoint_1650.pt.params.json",
+            "models/trading_agent_best_net_pnl.pt.policy.jit",
+            "models/trading_agent_best_net_pnl.pt.params.json",
+            "models/trading_agent_best_pnl.pt.params.json"
+        ]
+        
+        removed_count = 0
+        for file_path in files_to_remove:
+            path = Path(file_path)
+            if path.exists():
+                path.unlink()
+                removed_count += 1
+                logger.info(f"  🗑️ Removed: {path}")
+        
+        logger.info(f"✅ Removed {removed_count} old model files")
+    
+    def setup_training_progression(self):
+        """Set up training progression tracking system"""
+        logger.info("📊 Setting up training progression system...")
+        
+        # Create training progress structure
+        training_progress = {
+            "created": datetime.now().isoformat(),
+            "version": "1.0",
+            "models": {
+                "cnn": {
+                    "current_version": 1,
+                    "best_model": None,
+                    "training_history": [],
+                    "architecture": {
+                        "input_channels": 5,
+                        "window_size": 20,
+                        "output_classes": 3
+                    }
+                },
+                "rl": {
+                    "current_version": 1,
+                    "best_model": None,
+                    "training_history": [],
+                    "architecture": {
+                        "state_size": 100,
+                        "action_space": 3,
+                        "hidden_size": 256
+                    }
+                },
+                "williams_cnn": {
+                    "current_version": 1,
+                    "best_model": None,
+                    "training_history": [],
+                    "architecture": {
+                        "input_shape": [900, 50],
+                        "output_size": 10,
+                        "enabled": False  # Disabled until TensorFlow available
+                    }
+                }
+            },
+            "training_stats": {
+                "total_sessions": 0,
+                "best_accuracy": 0.0,
+                "best_pnl": 0.0,
+                "last_training": None
+            }
+        }
+        
+        # Save training progress
+        with open(self.training_progress_file, 'w') as f:
+            json.dump(training_progress, f, indent=2)
+        
+        logger.info(f"✅ Created training progress file: {self.training_progress_file}")
+    
+    def create_model_directories(self):
+        """Create clean model directory structure"""
+        logger.info("📁 Creating clean model directory structure...")
+        
+        directories = [
+            "models/cnn/current",
+            "models/cnn/training", 
+            "models/cnn/best",
+            "models/rl/current",
+            "models/rl/training",
+            "models/rl/best", 
+            "models/williams_cnn/current",
+            "models/williams_cnn/training",
+            "models/williams_cnn/best",
+            "models/checkpoints",
+            "models/training_logs"
+        ]
+        
+        for directory in directories:
+            Path(directory).mkdir(parents=True, exist_ok=True)
+            logger.info(f"  📂 Created: {directory}")
+        
+        logger.info("✅ Model directory structure created")
+    
+    def initialize_fresh_models(self):
+        """Initialize fresh model files for training"""
+        logger.info("🆕 Initializing fresh models...")
+        
+        # Keep only the essential saved model
+        essential_models = ["models/saved/cnn_model_best.pt"]
+        
+        for model_path in essential_models:
+            if Path(model_path).exists():
+                logger.info(f"  ✅ Keeping essential model: {model_path}")
+            else:
+                logger.warning(f"  ⚠️ Essential model not found: {model_path}")
+        
+        logger.info("✅ Fresh model initialization complete")
+    
+    def update_model_registry(self):
+        """Update model registry to use new structure"""
+        logger.info("⚙️ Updating model registry configuration...")
+        
+        registry_config = {
+            "model_paths": {
+                "cnn_current": "models/cnn/current/",
+                "cnn_best": "models/cnn/best/",
+                "rl_current": "models/rl/current/",
+                "rl_best": "models/rl/best/",
+                "williams_current": "models/williams_cnn/current/",
+                "williams_best": "models/williams_cnn/best/"
+            },
+            "auto_load_best": True,
+            "memory_limit_gb": 8.0,
+            "training_enabled": True
+        }
+        
+        config_path = Path("models/registry_config.json")
+        with open(config_path, 'w') as f:
+            json.dump(registry_config, f, indent=2)
+        
+        logger.info(f"✅ Model registry config saved: {config_path}")
+    
+    def run_cleanup(self):
+        """Execute complete cleanup and setup process"""
+        logger.info("🚀 Starting model cleanup and setup process...")
+        logger.info("=" * 60)
+        
+        try:
+            # Step 1: Backup existing models
+            self.backup_existing_models()
+            
+            # Step 2: Clean old conflicting models
+            self.clean_old_models()
+            
+            # Step 3: Setup training progression system
+            self.setup_training_progression()
+            
+            # Step 4: Create clean directory structure
+            self.create_model_directories()
+            
+            # Step 5: Initialize fresh models
+            self.initialize_fresh_models()
+            
+            # Step 6: Update model registry
+            self.update_model_registry()
+            
+            logger.info("=" * 60)
+            logger.info("✅ Model cleanup and setup completed successfully!")
+            logger.info(f"📁 Backup created at: {self.backup_dir}")
+            logger.info("🔄 Ready for fresh training with enhanced RL!")
+            
+        except Exception as e:
+            logger.error(f"❌ Error during cleanup: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            raise
+
+def main():
+    """Main execution function"""
+    print("🧹 MODEL CLEANUP AND TRAINING SETUP")
+    print("=" * 50)
+    print("This script will:")
+    print("1. Backup existing models")  
+    print("2. Remove old/conflicting models")
+    print("3. Set up training progression tracking")
+    print("4. Create clean directory structure")
+    print("5. Initialize fresh training environment")
+    print("=" * 50)
+    
+    response = input("Continue? (y/N): ").strip().lower()
+    if response != 'y':
+        print("❌ Cleanup cancelled")
+        return
+    
+    cleanup_manager = ModelCleanupManager()
+    cleanup_manager.run_cleanup()
+
+if __name__ == "__main__":
+    main()