scalping dash also works initially
This commit is contained in:
268
increase_gpu_utilization.py
Normal file
268
increase_gpu_utilization.py
Normal file
@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Increase GPU Utilization for Training
|
||||
|
||||
This script provides optimizations to maximize GPU usage during training.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
# Add project root to path
|
||||
project_root = Path(__file__).parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def optimize_training_for_gpu():
|
||||
"""Optimize training settings for maximum GPU utilization"""
|
||||
|
||||
print("🚀 GPU TRAINING OPTIMIZATION GUIDE")
|
||||
print("=" * 50)
|
||||
|
||||
# Check current GPU setup
|
||||
if torch.cuda.is_available():
|
||||
gpu_name = torch.cuda.get_device_name(0)
|
||||
gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
|
||||
print(f"GPU: {gpu_name}")
|
||||
print(f"VRAM: {gpu_memory:.1f} GB")
|
||||
print()
|
||||
|
||||
# Calculate optimal batch sizes
|
||||
print("📊 OPTIMAL BATCH SIZES:")
|
||||
print("Current batch sizes:")
|
||||
print(" - DQN Agent: 128")
|
||||
print(" - CNN Model: 32")
|
||||
print()
|
||||
|
||||
# For RTX 4060 with 8GB VRAM, we can increase batch sizes
|
||||
if gpu_memory >= 7.5: # RTX 4060 has ~8GB
|
||||
print("🔥 RECOMMENDED OPTIMIZATIONS:")
|
||||
print(" 1. Increase DQN batch size: 128 → 256 or 512")
|
||||
print(" 2. Increase CNN batch size: 32 → 64 or 128")
|
||||
print(" 3. Use larger model variants")
|
||||
print(" 4. Enable gradient accumulation")
|
||||
print()
|
||||
|
||||
# Show memory usage estimates
|
||||
print("💾 MEMORY USAGE ESTIMATES:")
|
||||
print(" - Current DQN (24M params): ~1.5GB")
|
||||
print(" - Current CNN (168M params): ~3.2GB")
|
||||
print(" - Available for larger batches: ~3GB")
|
||||
print()
|
||||
|
||||
print("⚡ PERFORMANCE OPTIMIZATIONS:")
|
||||
print(" 1. ✅ Mixed precision training (already enabled)")
|
||||
print(" 2. ✅ GPU tensors (already enabled)")
|
||||
print(" 3. 🔧 Increase batch sizes")
|
||||
print(" 4. 🔧 Use DataLoader with multiple workers")
|
||||
print(" 5. 🔧 Pin memory for faster transfers")
|
||||
print(" 6. 🔧 Compile models with torch.compile()")
|
||||
print()
|
||||
|
||||
else:
|
||||
print("❌ No GPU available")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def create_optimized_training_config():
|
||||
"""Create optimized training configuration"""
|
||||
|
||||
config = {
|
||||
# DQN Optimizations
|
||||
'dqn': {
|
||||
'batch_size': 512, # Increased from 128
|
||||
'buffer_size': 100000, # Increased from 20000
|
||||
'learning_rate': 0.0003, # Slightly reduced for stability
|
||||
'target_update': 10, # More frequent updates
|
||||
'gradient_accumulation_steps': 2, # Accumulate gradients
|
||||
},
|
||||
|
||||
# CNN Optimizations
|
||||
'cnn': {
|
||||
'batch_size': 128, # Increased from 32
|
||||
'learning_rate': 0.001,
|
||||
'epochs': 200, # More epochs for better learning
|
||||
'gradient_accumulation_steps': 4,
|
||||
},
|
||||
|
||||
# Data Loading Optimizations
|
||||
'data_loading': {
|
||||
'num_workers': 4, # Parallel data loading
|
||||
'pin_memory': True, # Faster CPU->GPU transfers
|
||||
'persistent_workers': True, # Keep workers alive
|
||||
},
|
||||
|
||||
# GPU Optimizations
|
||||
'gpu': {
|
||||
'mixed_precision': True,
|
||||
'compile_model': True, # Use torch.compile for speed
|
||||
'channels_last': True, # Memory layout optimization
|
||||
}
|
||||
}
|
||||
|
||||
return config
|
||||
|
||||
def apply_gpu_optimizations():
|
||||
"""Apply GPU optimizations to existing models"""
|
||||
|
||||
print("🔧 APPLYING GPU OPTIMIZATIONS...")
|
||||
print()
|
||||
|
||||
try:
|
||||
# Test optimized DQN training
|
||||
from NN.models.dqn_agent import DQNAgent
|
||||
|
||||
print("1. Testing optimized DQN Agent...")
|
||||
|
||||
# Create agent with larger batch size
|
||||
agent = DQNAgent(
|
||||
state_shape=(100,),
|
||||
n_actions=3,
|
||||
batch_size=512, # Increased batch size
|
||||
buffer_size=100000, # Larger memory
|
||||
learning_rate=0.0003
|
||||
)
|
||||
|
||||
print(f" ✅ DQN Agent with batch size {agent.batch_size}")
|
||||
print(f" ✅ Memory buffer size: {agent.buffer_size:,}")
|
||||
|
||||
# Test larger batch training
|
||||
print(" Testing larger batch training...")
|
||||
|
||||
# Add many experiences
|
||||
for i in range(1000):
|
||||
state = np.random.randn(100).astype(np.float32)
|
||||
action = np.random.randint(0, 3)
|
||||
reward = np.random.randn() * 0.1
|
||||
next_state = np.random.randn(100).astype(np.float32)
|
||||
done = np.random.random() < 0.1
|
||||
agent.remember(state, action, reward, next_state, done)
|
||||
|
||||
# Train with larger batch
|
||||
loss = agent.replay()
|
||||
if loss > 0:
|
||||
print(f" ✅ Large batch training successful, loss: {loss:.4f}")
|
||||
|
||||
print()
|
||||
|
||||
# Test optimized CNN
|
||||
from NN.models.enhanced_cnn import EnhancedCNN
|
||||
|
||||
print("2. Testing optimized CNN...")
|
||||
|
||||
model = EnhancedCNN((3, 20, 26), 3)
|
||||
|
||||
# Test larger batch
|
||||
batch_size = 128 # Increased from 32
|
||||
x = torch.randn(batch_size, 3, 20, 26, device=model.device)
|
||||
|
||||
print(f" Testing batch size: {batch_size}")
|
||||
|
||||
# Forward pass
|
||||
outputs = model(x)
|
||||
if isinstance(outputs, tuple):
|
||||
print(f" ✅ Large batch forward pass successful")
|
||||
print(f" ✅ Output shape: {outputs[0].shape}")
|
||||
|
||||
print()
|
||||
|
||||
# Memory usage check
|
||||
if torch.cuda.is_available():
|
||||
memory_used = torch.cuda.memory_allocated() / 1024**3
|
||||
memory_total = torch.cuda.get_device_properties(0).total_memory / 1024**3
|
||||
memory_percent = (memory_used / memory_total) * 100
|
||||
|
||||
print(f"📊 GPU Memory Usage:")
|
||||
print(f" Used: {memory_used:.2f} GB / {memory_total:.1f} GB ({memory_percent:.1f}%)")
|
||||
|
||||
if memory_percent < 70:
|
||||
print(f" 💡 You can increase batch sizes further!")
|
||||
elif memory_percent > 90:
|
||||
print(f" ⚠️ Consider reducing batch sizes")
|
||||
else:
|
||||
print(f" ✅ Good memory utilization")
|
||||
|
||||
print()
|
||||
print("🎉 GPU OPTIMIZATIONS APPLIED SUCCESSFULLY!")
|
||||
print()
|
||||
print("📝 NEXT STEPS:")
|
||||
print(" 1. Update your training scripts with larger batch sizes")
|
||||
print(" 2. Use the optimized configurations")
|
||||
print(" 3. Monitor GPU utilization during training")
|
||||
print(" 4. Adjust batch sizes based on memory usage")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error applying optimizations: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def monitor_gpu_during_training():
|
||||
"""Show how to monitor GPU during training"""
|
||||
|
||||
print("📊 GPU MONITORING DURING TRAINING")
|
||||
print("=" * 40)
|
||||
print()
|
||||
print("Use these commands to monitor GPU utilization:")
|
||||
print()
|
||||
print("1. NVIDIA System Management Interface:")
|
||||
print(" nvidia-smi -l 1")
|
||||
print(" (Updates every 1 second)")
|
||||
print()
|
||||
print("2. Continuous monitoring:")
|
||||
print(" watch -n 1 nvidia-smi")
|
||||
print()
|
||||
print("3. Python GPU monitoring:")
|
||||
print(" python -c \"import GPUtil; GPUtil.showUtilization()\"")
|
||||
print()
|
||||
print("4. Memory monitoring in your training script:")
|
||||
print(" if torch.cuda.is_available():")
|
||||
print(" print(f'GPU Memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB')")
|
||||
print()
|
||||
|
||||
def main():
|
||||
"""Main optimization function"""
|
||||
|
||||
print("🚀 GPU TRAINING OPTIMIZATION TOOL")
|
||||
print("=" * 50)
|
||||
print()
|
||||
|
||||
# Check GPU setup
|
||||
if not optimize_training_for_gpu():
|
||||
return 1
|
||||
|
||||
# Show optimized config
|
||||
config = create_optimized_training_config()
|
||||
print("⚙️ OPTIMIZED CONFIGURATION:")
|
||||
for section, settings in config.items():
|
||||
print(f" {section.upper()}:")
|
||||
for key, value in settings.items():
|
||||
print(f" {key}: {value}")
|
||||
print()
|
||||
|
||||
# Apply optimizations
|
||||
if not apply_gpu_optimizations():
|
||||
return 1
|
||||
|
||||
# Show monitoring info
|
||||
monitor_gpu_during_training()
|
||||
|
||||
print("✅ OPTIMIZATION COMPLETE!")
|
||||
print()
|
||||
print("Your training is working correctly with GPU!")
|
||||
print("Use the optimizations above to increase GPU utilization.")
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = main()
|
||||
sys.exit(exit_code)
|
Reference in New Issue
Block a user