Add AMD GPU compatibility fix for gfx1151, including fallback to CPU mode and environment variable setup

This commit is contained in:
Dobromir Popov
2025-11-22 16:06:32 +02:00
parent 8b784412b6
commit 539bd68110
10 changed files with 366 additions and 18 deletions

View File

@@ -322,12 +322,18 @@ class TradingOrchestrator:
# Initialize device - force CPU mode to avoid CUDA errors
if torch.cuda.is_available():
try:
# Test CUDA availability
test_tensor = torch.tensor([1.0]).cuda()
# Test CUDA availability with actual Linear layer operation
# This catches architecture-specific issues like gfx1151 incompatibility
test_tensor = torch.randn(2, 10).cuda()
test_linear = torch.nn.Linear(10, 5).cuda()
test_result = test_linear(test_tensor)
logger.info(f"GPU compatibility test passed: {torch.cuda.get_device_name(0)}")
self.device = torch.device("cuda")
logger.info("CUDA device initialized successfully")
logger.info("CUDA/ROCm device initialized successfully")
except Exception as e:
logger.warning(f"CUDA initialization failed: {e}, falling back to CPU")
logger.warning(f"CUDA/ROCm initialization failed: {e}")
logger.warning("GPU architecture may not be supported - falling back to CPU")
logger.warning("This is common with newer AMD GPUs (gfx1151+) that require specific PyTorch builds")
self.device = torch.device("cpu")
else:
self.device = torch.device("cpu")