Add AMD GPU compatibility fix for gfx1151, including fallback to CPU mode and environment variable setup
This commit is contained in:
@@ -322,12 +322,18 @@ class TradingOrchestrator:
|
||||
# Initialize device - force CPU mode to avoid CUDA errors
|
||||
if torch.cuda.is_available():
|
||||
try:
|
||||
# Test CUDA availability
|
||||
test_tensor = torch.tensor([1.0]).cuda()
|
||||
# Test CUDA availability with actual Linear layer operation
|
||||
# This catches architecture-specific issues like gfx1151 incompatibility
|
||||
test_tensor = torch.randn(2, 10).cuda()
|
||||
test_linear = torch.nn.Linear(10, 5).cuda()
|
||||
test_result = test_linear(test_tensor)
|
||||
logger.info(f"GPU compatibility test passed: {torch.cuda.get_device_name(0)}")
|
||||
self.device = torch.device("cuda")
|
||||
logger.info("CUDA device initialized successfully")
|
||||
logger.info("CUDA/ROCm device initialized successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"CUDA initialization failed: {e}, falling back to CPU")
|
||||
logger.warning(f"CUDA/ROCm initialization failed: {e}")
|
||||
logger.warning("GPU architecture may not be supported - falling back to CPU")
|
||||
logger.warning("This is common with newer AMD GPUs (gfx1151+) that require specific PyTorch builds")
|
||||
self.device = torch.device("cpu")
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
|
||||
Reference in New Issue
Block a user