Best checkpoint file not found

This commit is contained in:
Dobromir Popov
2025-07-03 00:44:31 +03:00
parent d15ebf54ca
commit 568ec049db
5 changed files with 340 additions and 5 deletions

View File

@ -87,7 +87,7 @@ class CheckpointManager:
performance_score = self._calculate_performance_score(performance_metrics)
if not force_save and not self._should_save_checkpoint(model_name, performance_score):
logger.info(f"Skipping checkpoint save for {model_name} - performance not improved")
logger.debug(f"Skipping checkpoint save for {model_name} - performance not improved")
return None
success = self._save_model_file(model, checkpoint_path, model_type)
@ -140,10 +140,11 @@ class CheckpointManager:
best_checkpoint = max(self.checkpoints[model_name], key=lambda x: x.performance_score)
if not Path(best_checkpoint.file_path).exists():
logger.error(f"Best checkpoint file not found: {best_checkpoint.file_path}")
# temporary disable logging to avoid spam
# logger.error(f"Best checkpoint file not found: {best_checkpoint.file_path}")
return None
logger.info(f"Loading best checkpoint for {model_name}: {best_checkpoint.checkpoint_id}")
logger.debug(f"Loading best checkpoint for {model_name}: {best_checkpoint.checkpoint_id}")
return best_checkpoint.file_path, best_checkpoint
except Exception as e: