Add AMD GPU compatibility fix for gfx1151, including fallback to CPU mode and environment variable setup

This commit is contained in:
Dobromir Popov
2025-11-22 16:06:32 +02:00
parent 8b784412b6
commit 539bd68110
10 changed files with 366 additions and 18 deletions

View File

@@ -238,6 +238,7 @@ class ModelManager:
def _load_metadata(self) -> Dict[str, Any]:
"""Load model metadata with legacy support"""
metadata = {'models': {}, 'last_updated': datetime.now().isoformat()}
migration_needed = False
# First try to load from new unified metadata
if self.metadata_file.exists():
@@ -248,7 +249,7 @@ class ModelManager:
except Exception as e:
logger.error(f"Error loading unified metadata: {e}")
# Also load legacy metadata for backward compatibility
# Also load legacy metadata for backward compatibility (one-time migration)
if self.legacy_registry_file.exists():
try:
with open(self.legacy_registry_file, 'r') as f:
@@ -295,12 +296,19 @@ class ModelManager:
'checkpoints': model_info.get('checkpoints', [])
}
logger.info(f"Migrated legacy metadata for {model_name}: {legacy_path}")
migration_needed = True
logger.info(f"Loaded legacy metadata from {self.legacy_registry_file}")
if migration_needed:
logger.info(f"Loaded legacy metadata from {self.legacy_registry_file}")
except Exception as e:
logger.error(f"Error loading legacy metadata: {e}")
# Save metadata to persist migration
if migration_needed:
self._save_metadata(metadata)
logger.info("Legacy metadata migration completed and saved to unified format")
return metadata
def _load_checkpoint_metadata(self) -> Dict[str, List[Dict[str, Any]]]:
@@ -443,6 +451,18 @@ class ModelManager:
self.checkpoint_metadata[model_name] = checkpoints[:max_checkpoints]
self._save_checkpoint_metadata()
def _save_metadata(self, metadata: Optional[Dict[str, Any]] = None):
"""Save model metadata to file"""
try:
data = metadata or self.metadata
data['last_updated'] = datetime.now().isoformat()
with open(self.metadata_file, 'w') as f:
json.dump(data, f, indent=2)
logger.debug(f"Saved model metadata to {self.metadata_file}")
except Exception as e:
logger.error(f"Error saving model metadata: {e}")
def _save_checkpoint_metadata(self):
"""Save checkpoint metadata to file"""
try: