diff --git a/ANNOTATE/web/app.py b/ANNOTATE/web/app.py index e8b0cc3..547fdc0 100644 --- a/ANNOTATE/web/app.py +++ b/ANNOTATE/web/app.py @@ -245,7 +245,9 @@ class AnnotationDashboard: return best_checkpoint except Exception as e: - logger.debug(f"Error getting checkpoint info for {model_name}: {e}") + logger.error(f"Error getting checkpoint info for {model_name}: {e}") + import traceback + logger.error(traceback.format_exc()) return None def _load_model_lazy(self, model_name: str) -> dict: @@ -1280,6 +1282,7 @@ class AnnotationDashboard: # Don't call training_adapter.get_available_models() as it may return objects # Build model state dict with checkpoint info + logger.info(f"Building model states for {len(self.available_models)} models: {self.available_models}") model_states = [] for model_name in self.available_models: is_loaded = model_name in self.loaded_models @@ -1303,9 +1306,13 @@ class AnnotationDashboard: # If not loaded, try to read best checkpoint from disk if not checkpoint_info: - checkpoint_info = self._get_best_checkpoint_info(model_name) - if checkpoint_info: - checkpoint_info['source'] = 'disk' + try: + checkpoint_info = self._get_best_checkpoint_info(model_name) + if checkpoint_info: + checkpoint_info['source'] = 'disk' + except Exception as e: + logger.error(f"Error reading checkpoint for {model_name}: {e}") + # Continue without checkpoint info model_states.append({ 'name': model_name, @@ -1315,6 +1322,7 @@ class AnnotationDashboard: 'checkpoint': checkpoint_info # Checkpoint metadata (loaded or from disk) }) + logger.info(f"Returning {len(model_states)} model states") return jsonify({ 'success': True, 'models': model_states, diff --git a/test_checkpoint_reading.py b/test_checkpoint_reading.py new file mode 100644 index 0000000..9ceb49a --- /dev/null +++ b/test_checkpoint_reading.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +"""Test checkpoint reading without loading models""" + +import os +import glob +import torch + +def test_checkpoint_reading(): + """Test reading checkpoint info from disk""" + + checkpoint_dirs = { + 'Transformer': 'models/checkpoints/transformer', + 'CNN': 'models/checkpoints/enhanced_cnn', + 'DQN': 'models/checkpoints/dqn_agent' + } + + for model_name, checkpoint_dir in checkpoint_dirs.items(): + print(f"\n{model_name}:") + print(f" Directory: {checkpoint_dir}") + print(f" Exists: {os.path.exists(checkpoint_dir)}") + + if not os.path.exists(checkpoint_dir): + print(f" ❌ Directory not found") + continue + + # Find checkpoint files + checkpoint_files = glob.glob(os.path.join(checkpoint_dir, '*.pt')) + print(f" Checkpoints found: {len(checkpoint_files)}") + + if not checkpoint_files: + print(f" ❌ No checkpoint files") + continue + + # Try to load best checkpoint + best_checkpoint = None + best_accuracy = -1 + + for cp_file in checkpoint_files: + try: + print(f" Loading: {os.path.basename(cp_file)}") + checkpoint = torch.load(cp_file, map_location='cpu') + + epoch = checkpoint.get('epoch', 0) + loss = checkpoint.get('loss', 0.0) + accuracy = checkpoint.get('accuracy', 0.0) + + print(f" Epoch: {epoch}, Loss: {loss:.6f}, Accuracy: {accuracy:.2%}") + + if accuracy > best_accuracy: + best_accuracy = accuracy + best_checkpoint = { + 'filename': os.path.basename(cp_file), + 'epoch': epoch, + 'loss': loss, + 'accuracy': accuracy + } + except Exception as e: + print(f" ❌ Error: {e}") + + if best_checkpoint: + print(f" ✅ Best: {best_checkpoint['filename']}") + print(f" Epoch: {best_checkpoint['epoch']}, Loss: {best_checkpoint['loss']:.6f}, Accuracy: {best_checkpoint['accuracy']:.2%}") + else: + print(f" ❌ No valid checkpoint found") + +if __name__ == '__main__': + test_checkpoint_reading()