285 lines
10 KiB
Python
285 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Model Cleanup and Training Setup Script
|
|
|
|
This script:
|
|
1. Backs up current models
|
|
2. Cleans old/conflicting models
|
|
3. Sets up proper training progression system
|
|
4. Initializes fresh model training
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import json
|
|
import logging
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import torch
|
|
|
|
# Setup logging
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ModelCleanupManager:
|
|
"""Manager for cleaning up and organizing model files"""
|
|
|
|
def __init__(self):
|
|
self.root_dir = Path(".")
|
|
self.models_dir = self.root_dir / "models"
|
|
self.backup_dir = self.root_dir / "model_backups" / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
self.training_progress_file = self.models_dir / "training_progress.json"
|
|
|
|
# Create backup directory
|
|
self.backup_dir.mkdir(parents=True, exist_ok=True)
|
|
logger.info(f"Created backup directory: {self.backup_dir}")
|
|
|
|
def backup_existing_models(self):
|
|
"""Backup all existing models before cleanup"""
|
|
logger.info("🔄 Backing up existing models...")
|
|
|
|
model_files = [
|
|
# CNN models
|
|
"models/cnn_final_20250331_001817.pt.pt",
|
|
"models/cnn_best.pt.pt",
|
|
"models/cnn_BTC_USDT_*.pt",
|
|
"models/cnn_BTC_USD_*.pt",
|
|
|
|
# RL models
|
|
"models/trading_agent_*.pt",
|
|
"models/trading_agent_*.backup",
|
|
|
|
# Other models
|
|
"models/saved/cnn_model_best.pt"
|
|
]
|
|
|
|
# Backup model files
|
|
backup_count = 0
|
|
for pattern in model_files:
|
|
for file_path in self.root_dir.glob(pattern):
|
|
if file_path.is_file():
|
|
backup_path = self.backup_dir / file_path.relative_to(self.root_dir)
|
|
backup_path.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(file_path, backup_path)
|
|
backup_count += 1
|
|
logger.info(f" 📁 Backed up: {file_path}")
|
|
|
|
logger.info(f"✅ Backed up {backup_count} model files to {self.backup_dir}")
|
|
|
|
def clean_old_models(self):
|
|
"""Remove old/conflicting model files"""
|
|
logger.info("🧹 Cleaning old model files...")
|
|
|
|
files_to_remove = [
|
|
# Old CNN models with architecture conflicts
|
|
"models/cnn_final_20250331_001817.pt.pt",
|
|
"models/cnn_best.pt.pt",
|
|
"models/cnn_BTC_USDT_20250329_021800.pt",
|
|
"models/cnn_BTC_USDT_20250329_021448.pt",
|
|
"models/cnn_BTC_USD_20250329_020711.pt",
|
|
"models/cnn_BTC_USD_20250329_020430.pt",
|
|
"models/cnn_BTC_USD_20250329_015217.pt",
|
|
|
|
# Old RL models
|
|
"models/trading_agent_final.pt",
|
|
"models/trading_agent_best_pnl.pt",
|
|
"models/trading_agent_best_reward.pt",
|
|
"models/trading_agent_final.pt.backup",
|
|
"models/trading_agent_best_net_pnl.pt",
|
|
"models/trading_agent_best_net_pnl.pt.backup",
|
|
"models/trading_agent_best_pnl.pt.backup",
|
|
"models/trading_agent_best_reward.pt.backup",
|
|
"models/trading_agent_live_trained.pt",
|
|
|
|
# Checkpoint files
|
|
"models/trading_agent_checkpoint_1650.pt.minimal",
|
|
"models/trading_agent_checkpoint_1650.pt.params.json",
|
|
"models/trading_agent_best_net_pnl.pt.policy.jit",
|
|
"models/trading_agent_best_net_pnl.pt.params.json",
|
|
"models/trading_agent_best_pnl.pt.params.json"
|
|
]
|
|
|
|
removed_count = 0
|
|
for file_path in files_to_remove:
|
|
path = Path(file_path)
|
|
if path.exists():
|
|
path.unlink()
|
|
removed_count += 1
|
|
logger.info(f" 🗑️ Removed: {path}")
|
|
|
|
logger.info(f"✅ Removed {removed_count} old model files")
|
|
|
|
def setup_training_progression(self):
|
|
"""Set up training progression tracking system"""
|
|
logger.info("📊 Setting up training progression system...")
|
|
|
|
# Create training progress structure
|
|
training_progress = {
|
|
"created": datetime.now().isoformat(),
|
|
"version": "1.0",
|
|
"models": {
|
|
"cnn": {
|
|
"current_version": 1,
|
|
"best_model": None,
|
|
"training_history": [],
|
|
"architecture": {
|
|
"input_channels": 5,
|
|
"window_size": 20,
|
|
"output_classes": 3
|
|
}
|
|
},
|
|
"rl": {
|
|
"current_version": 1,
|
|
"best_model": None,
|
|
"training_history": [],
|
|
"architecture": {
|
|
"state_size": 100,
|
|
"action_space": 3,
|
|
"hidden_size": 256
|
|
}
|
|
},
|
|
"williams_cnn": {
|
|
"current_version": 1,
|
|
"best_model": None,
|
|
"training_history": [],
|
|
"architecture": {
|
|
"input_shape": [900, 50],
|
|
"output_size": 10,
|
|
"enabled": False # Disabled until TensorFlow available
|
|
}
|
|
}
|
|
},
|
|
"training_stats": {
|
|
"total_sessions": 0,
|
|
"best_accuracy": 0.0,
|
|
"best_pnl": 0.0,
|
|
"last_training": None
|
|
}
|
|
}
|
|
|
|
# Save training progress
|
|
with open(self.training_progress_file, 'w') as f:
|
|
json.dump(training_progress, f, indent=2)
|
|
|
|
logger.info(f"✅ Created training progress file: {self.training_progress_file}")
|
|
|
|
def create_model_directories(self):
|
|
"""Create clean model directory structure"""
|
|
logger.info("📁 Creating clean model directory structure...")
|
|
|
|
directories = [
|
|
"models/cnn/current",
|
|
"models/cnn/training",
|
|
"models/cnn/best",
|
|
"models/rl/current",
|
|
"models/rl/training",
|
|
"models/rl/best",
|
|
"models/williams_cnn/current",
|
|
"models/williams_cnn/training",
|
|
"models/williams_cnn/best",
|
|
"models/checkpoints",
|
|
"models/training_logs"
|
|
]
|
|
|
|
for directory in directories:
|
|
Path(directory).mkdir(parents=True, exist_ok=True)
|
|
logger.info(f" 📂 Created: {directory}")
|
|
|
|
logger.info("✅ Model directory structure created")
|
|
|
|
def initialize_fresh_models(self):
|
|
"""Initialize fresh model files for training"""
|
|
logger.info("🆕 Initializing fresh models...")
|
|
|
|
# Keep only the essential saved model
|
|
essential_models = ["models/saved/cnn_model_best.pt"]
|
|
|
|
for model_path in essential_models:
|
|
if Path(model_path).exists():
|
|
logger.info(f" ✅ Keeping essential model: {model_path}")
|
|
else:
|
|
logger.warning(f" ⚠️ Essential model not found: {model_path}")
|
|
|
|
logger.info("✅ Fresh model initialization complete")
|
|
|
|
def update_model_registry(self):
|
|
"""Update model registry to use new structure"""
|
|
logger.info("⚙️ Updating model registry configuration...")
|
|
|
|
registry_config = {
|
|
"model_paths": {
|
|
"cnn_current": "models/cnn/current/",
|
|
"cnn_best": "models/cnn/best/",
|
|
"rl_current": "models/rl/current/",
|
|
"rl_best": "models/rl/best/",
|
|
"williams_current": "models/williams_cnn/current/",
|
|
"williams_best": "models/williams_cnn/best/"
|
|
},
|
|
"auto_load_best": True,
|
|
"memory_limit_gb": 8.0,
|
|
"training_enabled": True
|
|
}
|
|
|
|
config_path = Path("models/registry_config.json")
|
|
with open(config_path, 'w') as f:
|
|
json.dump(registry_config, f, indent=2)
|
|
|
|
logger.info(f"✅ Model registry config saved: {config_path}")
|
|
|
|
def run_cleanup(self):
|
|
"""Execute complete cleanup and setup process"""
|
|
logger.info("🚀 Starting model cleanup and setup process...")
|
|
logger.info("=" * 60)
|
|
|
|
try:
|
|
# Step 1: Backup existing models
|
|
self.backup_existing_models()
|
|
|
|
# Step 2: Clean old conflicting models
|
|
self.clean_old_models()
|
|
|
|
# Step 3: Setup training progression system
|
|
self.setup_training_progression()
|
|
|
|
# Step 4: Create clean directory structure
|
|
self.create_model_directories()
|
|
|
|
# Step 5: Initialize fresh models
|
|
self.initialize_fresh_models()
|
|
|
|
# Step 6: Update model registry
|
|
self.update_model_registry()
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("✅ Model cleanup and setup completed successfully!")
|
|
logger.info(f"📁 Backup created at: {self.backup_dir}")
|
|
logger.info("🔄 Ready for fresh training with enhanced RL!")
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Error during cleanup: {e}")
|
|
import traceback
|
|
logger.error(traceback.format_exc())
|
|
raise
|
|
|
|
def main():
|
|
"""Main execution function"""
|
|
print("🧹 MODEL CLEANUP AND TRAINING SETUP")
|
|
print("=" * 50)
|
|
print("This script will:")
|
|
print("1. Backup existing models")
|
|
print("2. Remove old/conflicting models")
|
|
print("3. Set up training progression tracking")
|
|
print("4. Create clean directory structure")
|
|
print("5. Initialize fresh training environment")
|
|
print("=" * 50)
|
|
|
|
response = input("Continue? (y/N): ").strip().lower()
|
|
if response != 'y':
|
|
print("❌ Cleanup cancelled")
|
|
return
|
|
|
|
cleanup_manager = ModelCleanupManager()
|
|
cleanup_manager.run_cleanup()
|
|
|
|
if __name__ == "__main__":
|
|
main() |