wip old MISC fix

2025-12-08 16:56:37 +02:00
parent 81e7e6bfe6
commit 03888b6200
5 changed files with 719 additions and 343 deletions
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -318,27 +318,62 @@ class TradingOrchestrator:
        # Initialize confidence threshold
        self.confidence_threshold = self.config.get('confidence_threshold', 0.6)
        
-        # Determine the device to use (GPU if available, else CPU)
-        # Initialize device - force CPU mode to avoid CUDA errors
-        if torch.cuda.is_available():
-            try:
-                # Test CUDA availability with actual Linear layer operation
-                # This catches architecture-specific issues like gfx1151 incompatibility
-                test_tensor = torch.randn(2, 10).cuda()
-                test_linear = torch.nn.Linear(10, 5).cuda()
-                test_result = test_linear(test_tensor)
-                logger.info(f"GPU compatibility test passed: {torch.cuda.get_device_name(0)}")
-                self.device = torch.device("cuda")
-                logger.info("CUDA/ROCm device initialized successfully")
-            except Exception as e:
-                logger.warning(f"CUDA/ROCm initialization failed: {e}")
-                logger.warning("GPU architecture may not be supported - falling back to CPU")
-                logger.warning("This is common with newer AMD GPUs (gfx1151+) that require specific PyTorch builds")
-                self.device = torch.device("cpu")
-        else:
-            self.device = torch.device("cpu")
-        
+        # Determine the device to use from config.yaml
+        self.device = self._get_device_from_config()
        logger.info(f"Using device: {self.device}")
+    
+    def _get_device_from_config(self) -> torch.device:
+        """Get device from config.yaml or auto-detect"""
+        try:
+            gpu_config = self.config._config.get('gpu', {})
+            
+            device_setting = gpu_config.get('device', 'auto')
+            fallback_to_cpu = gpu_config.get('fallback_to_cpu', True)
+            gpu_enabled = gpu_config.get('enabled', True)
+            
+            # If GPU is disabled in config, use CPU
+            if not gpu_enabled:
+                logger.info("GPU disabled in config.yaml, using CPU")
+                return torch.device('cpu')
+            
+            # Handle device selection
+            if device_setting == 'cpu':
+                logger.info("Device set to CPU in config.yaml")
+                return torch.device('cpu')
+            elif device_setting == 'cuda' or device_setting == 'auto':
+                # Try GPU first with compatibility test
+                if torch.cuda.is_available():
+                    try:
+                        # Test CUDA availability with actual Linear layer operation
+                        # This catches architecture-specific issues like gfx1151 incompatibility
+                        test_tensor = torch.randn(2, 10).cuda()
+                        test_linear = torch.nn.Linear(10, 5).cuda()
+                        test_result = test_linear(test_tensor)
+                        logger.info(f"GPU compatibility test passed: {torch.cuda.get_device_name(0)}")
+                        logger.info("CUDA/ROCm device initialized successfully")
+                        return torch.device("cuda")
+                    except Exception as e:
+                        logger.warning(f"CUDA/ROCm initialization failed: {e}")
+                        logger.warning("GPU architecture may not be supported - falling back to CPU")
+                        logger.warning("This is common with newer AMD GPUs (gfx1151+) that require specific PyTorch builds")
+                        if fallback_to_cpu:
+                            return torch.device("cpu")
+                        else:
+                            raise RuntimeError("CUDA not available and fallback_to_cpu is False")
+                else:
+                    if fallback_to_cpu:
+                        logger.warning("CUDA not available, falling back to CPU")
+                        return torch.device('cpu')
+                    else:
+                        raise RuntimeError("CUDA not available and fallback_to_cpu is False")
+            else:
+                logger.warning(f"Unknown device setting '{device_setting}', using auto-detection")
+                return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+                
+        except Exception as e:
+            logger.warning(f"Error reading device config: {e}, using auto-detection")
+            # Fallback to auto-detection
+            return torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Canonical model name aliases to eliminate ambiguity across UI/DB/FS
        # Canonical → accepted aliases (internal/legacy)