models cleanup
This commit is contained in:
285
cleanup_and_setup_models.py
Normal file
285
cleanup_and_setup_models.py
Normal file
@ -0,0 +1,285 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Model Cleanup and Training Setup Script
|
||||
|
||||
This script:
|
||||
1. Backs up current models
|
||||
2. Cleans old/conflicting models
|
||||
3. Sets up proper training progression system
|
||||
4. Initializes fresh model training
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import torch
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ModelCleanupManager:
|
||||
"""Manager for cleaning up and organizing model files"""
|
||||
|
||||
def __init__(self):
|
||||
self.root_dir = Path(".")
|
||||
self.models_dir = self.root_dir / "models"
|
||||
self.backup_dir = self.root_dir / "model_backups" / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
self.training_progress_file = self.models_dir / "training_progress.json"
|
||||
|
||||
# Create backup directory
|
||||
self.backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Created backup directory: {self.backup_dir}")
|
||||
|
||||
def backup_existing_models(self):
|
||||
"""Backup all existing models before cleanup"""
|
||||
logger.info("🔄 Backing up existing models...")
|
||||
|
||||
model_files = [
|
||||
# CNN models
|
||||
"models/cnn_final_20250331_001817.pt.pt",
|
||||
"models/cnn_best.pt.pt",
|
||||
"models/cnn_BTC_USDT_*.pt",
|
||||
"models/cnn_BTC_USD_*.pt",
|
||||
|
||||
# RL models
|
||||
"models/trading_agent_*.pt",
|
||||
"models/trading_agent_*.backup",
|
||||
|
||||
# Other models
|
||||
"models/saved/cnn_model_best.pt"
|
||||
]
|
||||
|
||||
# Backup model files
|
||||
backup_count = 0
|
||||
for pattern in model_files:
|
||||
for file_path in self.root_dir.glob(pattern):
|
||||
if file_path.is_file():
|
||||
backup_path = self.backup_dir / file_path.relative_to(self.root_dir)
|
||||
backup_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(file_path, backup_path)
|
||||
backup_count += 1
|
||||
logger.info(f" 📁 Backed up: {file_path}")
|
||||
|
||||
logger.info(f"✅ Backed up {backup_count} model files to {self.backup_dir}")
|
||||
|
||||
def clean_old_models(self):
|
||||
"""Remove old/conflicting model files"""
|
||||
logger.info("🧹 Cleaning old model files...")
|
||||
|
||||
files_to_remove = [
|
||||
# Old CNN models with architecture conflicts
|
||||
"models/cnn_final_20250331_001817.pt.pt",
|
||||
"models/cnn_best.pt.pt",
|
||||
"models/cnn_BTC_USDT_20250329_021800.pt",
|
||||
"models/cnn_BTC_USDT_20250329_021448.pt",
|
||||
"models/cnn_BTC_USD_20250329_020711.pt",
|
||||
"models/cnn_BTC_USD_20250329_020430.pt",
|
||||
"models/cnn_BTC_USD_20250329_015217.pt",
|
||||
|
||||
# Old RL models
|
||||
"models/trading_agent_final.pt",
|
||||
"models/trading_agent_best_pnl.pt",
|
||||
"models/trading_agent_best_reward.pt",
|
||||
"models/trading_agent_final.pt.backup",
|
||||
"models/trading_agent_best_net_pnl.pt",
|
||||
"models/trading_agent_best_net_pnl.pt.backup",
|
||||
"models/trading_agent_best_pnl.pt.backup",
|
||||
"models/trading_agent_best_reward.pt.backup",
|
||||
"models/trading_agent_live_trained.pt",
|
||||
|
||||
# Checkpoint files
|
||||
"models/trading_agent_checkpoint_1650.pt.minimal",
|
||||
"models/trading_agent_checkpoint_1650.pt.params.json",
|
||||
"models/trading_agent_best_net_pnl.pt.policy.jit",
|
||||
"models/trading_agent_best_net_pnl.pt.params.json",
|
||||
"models/trading_agent_best_pnl.pt.params.json"
|
||||
]
|
||||
|
||||
removed_count = 0
|
||||
for file_path in files_to_remove:
|
||||
path = Path(file_path)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
removed_count += 1
|
||||
logger.info(f" 🗑️ Removed: {path}")
|
||||
|
||||
logger.info(f"✅ Removed {removed_count} old model files")
|
||||
|
||||
def setup_training_progression(self):
|
||||
"""Set up training progression tracking system"""
|
||||
logger.info("📊 Setting up training progression system...")
|
||||
|
||||
# Create training progress structure
|
||||
training_progress = {
|
||||
"created": datetime.now().isoformat(),
|
||||
"version": "1.0",
|
||||
"models": {
|
||||
"cnn": {
|
||||
"current_version": 1,
|
||||
"best_model": None,
|
||||
"training_history": [],
|
||||
"architecture": {
|
||||
"input_channels": 5,
|
||||
"window_size": 20,
|
||||
"output_classes": 3
|
||||
}
|
||||
},
|
||||
"rl": {
|
||||
"current_version": 1,
|
||||
"best_model": None,
|
||||
"training_history": [],
|
||||
"architecture": {
|
||||
"state_size": 100,
|
||||
"action_space": 3,
|
||||
"hidden_size": 256
|
||||
}
|
||||
},
|
||||
"williams_cnn": {
|
||||
"current_version": 1,
|
||||
"best_model": None,
|
||||
"training_history": [],
|
||||
"architecture": {
|
||||
"input_shape": [900, 50],
|
||||
"output_size": 10,
|
||||
"enabled": False # Disabled until TensorFlow available
|
||||
}
|
||||
}
|
||||
},
|
||||
"training_stats": {
|
||||
"total_sessions": 0,
|
||||
"best_accuracy": 0.0,
|
||||
"best_pnl": 0.0,
|
||||
"last_training": None
|
||||
}
|
||||
}
|
||||
|
||||
# Save training progress
|
||||
with open(self.training_progress_file, 'w') as f:
|
||||
json.dump(training_progress, f, indent=2)
|
||||
|
||||
logger.info(f"✅ Created training progress file: {self.training_progress_file}")
|
||||
|
||||
def create_model_directories(self):
|
||||
"""Create clean model directory structure"""
|
||||
logger.info("📁 Creating clean model directory structure...")
|
||||
|
||||
directories = [
|
||||
"models/cnn/current",
|
||||
"models/cnn/training",
|
||||
"models/cnn/best",
|
||||
"models/rl/current",
|
||||
"models/rl/training",
|
||||
"models/rl/best",
|
||||
"models/williams_cnn/current",
|
||||
"models/williams_cnn/training",
|
||||
"models/williams_cnn/best",
|
||||
"models/checkpoints",
|
||||
"models/training_logs"
|
||||
]
|
||||
|
||||
for directory in directories:
|
||||
Path(directory).mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f" 📂 Created: {directory}")
|
||||
|
||||
logger.info("✅ Model directory structure created")
|
||||
|
||||
def initialize_fresh_models(self):
|
||||
"""Initialize fresh model files for training"""
|
||||
logger.info("🆕 Initializing fresh models...")
|
||||
|
||||
# Keep only the essential saved model
|
||||
essential_models = ["models/saved/cnn_model_best.pt"]
|
||||
|
||||
for model_path in essential_models:
|
||||
if Path(model_path).exists():
|
||||
logger.info(f" ✅ Keeping essential model: {model_path}")
|
||||
else:
|
||||
logger.warning(f" ⚠️ Essential model not found: {model_path}")
|
||||
|
||||
logger.info("✅ Fresh model initialization complete")
|
||||
|
||||
def update_model_registry(self):
|
||||
"""Update model registry to use new structure"""
|
||||
logger.info("⚙️ Updating model registry configuration...")
|
||||
|
||||
registry_config = {
|
||||
"model_paths": {
|
||||
"cnn_current": "models/cnn/current/",
|
||||
"cnn_best": "models/cnn/best/",
|
||||
"rl_current": "models/rl/current/",
|
||||
"rl_best": "models/rl/best/",
|
||||
"williams_current": "models/williams_cnn/current/",
|
||||
"williams_best": "models/williams_cnn/best/"
|
||||
},
|
||||
"auto_load_best": True,
|
||||
"memory_limit_gb": 8.0,
|
||||
"training_enabled": True
|
||||
}
|
||||
|
||||
config_path = Path("models/registry_config.json")
|
||||
with open(config_path, 'w') as f:
|
||||
json.dump(registry_config, f, indent=2)
|
||||
|
||||
logger.info(f"✅ Model registry config saved: {config_path}")
|
||||
|
||||
def run_cleanup(self):
|
||||
"""Execute complete cleanup and setup process"""
|
||||
logger.info("🚀 Starting model cleanup and setup process...")
|
||||
logger.info("=" * 60)
|
||||
|
||||
try:
|
||||
# Step 1: Backup existing models
|
||||
self.backup_existing_models()
|
||||
|
||||
# Step 2: Clean old conflicting models
|
||||
self.clean_old_models()
|
||||
|
||||
# Step 3: Setup training progression system
|
||||
self.setup_training_progression()
|
||||
|
||||
# Step 4: Create clean directory structure
|
||||
self.create_model_directories()
|
||||
|
||||
# Step 5: Initialize fresh models
|
||||
self.initialize_fresh_models()
|
||||
|
||||
# Step 6: Update model registry
|
||||
self.update_model_registry()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("✅ Model cleanup and setup completed successfully!")
|
||||
logger.info(f"📁 Backup created at: {self.backup_dir}")
|
||||
logger.info("🔄 Ready for fresh training with enhanced RL!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Error during cleanup: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
raise
|
||||
|
||||
def main():
|
||||
"""Main execution function"""
|
||||
print("🧹 MODEL CLEANUP AND TRAINING SETUP")
|
||||
print("=" * 50)
|
||||
print("This script will:")
|
||||
print("1. Backup existing models")
|
||||
print("2. Remove old/conflicting models")
|
||||
print("3. Set up training progression tracking")
|
||||
print("4. Create clean directory structure")
|
||||
print("5. Initialize fresh training environment")
|
||||
print("=" * 50)
|
||||
|
||||
response = input("Continue? (y/N): ").strip().lower()
|
||||
if response != 'y':
|
||||
print("❌ Cleanup cancelled")
|
||||
return
|
||||
|
||||
cleanup_manager = ModelCleanupManager()
|
||||
cleanup_manager.run_cleanup()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user