wip old MISC fix
This commit is contained in:
@@ -318,27 +318,62 @@ class TradingOrchestrator:
|
||||
# Initialize confidence threshold
|
||||
self.confidence_threshold = self.config.get('confidence_threshold', 0.6)
|
||||
|
||||
# Determine the device to use (GPU if available, else CPU)
|
||||
# Initialize device - force CPU mode to avoid CUDA errors
|
||||
if torch.cuda.is_available():
|
||||
try:
|
||||
# Test CUDA availability with actual Linear layer operation
|
||||
# This catches architecture-specific issues like gfx1151 incompatibility
|
||||
test_tensor = torch.randn(2, 10).cuda()
|
||||
test_linear = torch.nn.Linear(10, 5).cuda()
|
||||
test_result = test_linear(test_tensor)
|
||||
logger.info(f"GPU compatibility test passed: {torch.cuda.get_device_name(0)}")
|
||||
self.device = torch.device("cuda")
|
||||
logger.info("CUDA/ROCm device initialized successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"CUDA/ROCm initialization failed: {e}")
|
||||
logger.warning("GPU architecture may not be supported - falling back to CPU")
|
||||
logger.warning("This is common with newer AMD GPUs (gfx1151+) that require specific PyTorch builds")
|
||||
self.device = torch.device("cpu")
|
||||
else:
|
||||
self.device = torch.device("cpu")
|
||||
|
||||
# Determine the device to use from config.yaml
|
||||
self.device = self._get_device_from_config()
|
||||
logger.info(f"Using device: {self.device}")
|
||||
|
||||
def _get_device_from_config(self) -> torch.device:
|
||||
"""Get device from config.yaml or auto-detect"""
|
||||
try:
|
||||
gpu_config = self.config._config.get('gpu', {})
|
||||
|
||||
device_setting = gpu_config.get('device', 'auto')
|
||||
fallback_to_cpu = gpu_config.get('fallback_to_cpu', True)
|
||||
gpu_enabled = gpu_config.get('enabled', True)
|
||||
|
||||
# If GPU is disabled in config, use CPU
|
||||
if not gpu_enabled:
|
||||
logger.info("GPU disabled in config.yaml, using CPU")
|
||||
return torch.device('cpu')
|
||||
|
||||
# Handle device selection
|
||||
if device_setting == 'cpu':
|
||||
logger.info("Device set to CPU in config.yaml")
|
||||
return torch.device('cpu')
|
||||
elif device_setting == 'cuda' or device_setting == 'auto':
|
||||
# Try GPU first with compatibility test
|
||||
if torch.cuda.is_available():
|
||||
try:
|
||||
# Test CUDA availability with actual Linear layer operation
|
||||
# This catches architecture-specific issues like gfx1151 incompatibility
|
||||
test_tensor = torch.randn(2, 10).cuda()
|
||||
test_linear = torch.nn.Linear(10, 5).cuda()
|
||||
test_result = test_linear(test_tensor)
|
||||
logger.info(f"GPU compatibility test passed: {torch.cuda.get_device_name(0)}")
|
||||
logger.info("CUDA/ROCm device initialized successfully")
|
||||
return torch.device("cuda")
|
||||
except Exception as e:
|
||||
logger.warning(f"CUDA/ROCm initialization failed: {e}")
|
||||
logger.warning("GPU architecture may not be supported - falling back to CPU")
|
||||
logger.warning("This is common with newer AMD GPUs (gfx1151+) that require specific PyTorch builds")
|
||||
if fallback_to_cpu:
|
||||
return torch.device("cpu")
|
||||
else:
|
||||
raise RuntimeError("CUDA not available and fallback_to_cpu is False")
|
||||
else:
|
||||
if fallback_to_cpu:
|
||||
logger.warning("CUDA not available, falling back to CPU")
|
||||
return torch.device('cpu')
|
||||
else:
|
||||
raise RuntimeError("CUDA not available and fallback_to_cpu is False")
|
||||
else:
|
||||
logger.warning(f"Unknown device setting '{device_setting}', using auto-detection")
|
||||
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error reading device config: {e}, using auto-detection")
|
||||
# Fallback to auto-detection
|
||||
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
# Canonical model name aliases to eliminate ambiguity across UI/DB/FS
|
||||
# Canonical → accepted aliases (internal/legacy)
|
||||
|
||||
Reference in New Issue
Block a user