reduce logging. actual training

2025-10-31 03:52:41 +02:00
parent 6ac324289c
commit 1bf41e06a8
9 changed files with 1700 additions and 74 deletions
--- a/ANNOTATE/LOGGING_CONFIGURATION.md
+++ b/ANNOTATE/LOGGING_CONFIGURATION.md
@@ -0,0 +1,184 @@
+# Logging Configuration
+
+## Issue: Excessive Werkzeug Logs
+
+### Problem
+```
+2025-10-31 03:23:53,478 - werkzeug - INFO - 127.0.0.1 - - [31/Oct/2025 03:23:53] "POST /api/training-progress HTTP/1.1" 200 -
+2025-10-31 03:23:55,519 - werkzeug - INFO - 127.0.0.1 - - [31/Oct/2025 03:23:55] "POST /api/training-progress HTTP/1.1" 200 -
+2025-10-31 03:23:56,533 - werkzeug - INFO - 127.0.0.1 - - [31/Oct/2025 03:23:56] "POST /api/training-progress HTTP/1.1" 200 -
+...
+```
+
+**Cause**: The frontend polls `/api/training-progress` every 1-2 seconds, and Flask's werkzeug logger logs every request at INFO level.
+
+---
+
+## Solution
+
+### Fixed in `ANNOTATE/web/app.py`
+
+```python
+# Initialize Flask app
+self.server = Flask(
+    __name__,
+    template_folder='templates',
+    static_folder='static'
+)
+
+# Suppress werkzeug request logs (reduce noise from polling endpoints)
+werkzeug_logger = logging.getLogger('werkzeug')
+werkzeug_logger.setLevel(logging.WARNING)  # Only show warnings and errors, not INFO
+```
+
+**Result**: Werkzeug will now only log warnings and errors, not every request.
+
+---
+
+## Logging Levels
+
+### Before (Noisy)
+```
+INFO - Every request logged
+INFO - GET /api/chart-data
+INFO - POST /api/training-progress
+INFO - GET /static/css/style.css
+... (hundreds of lines per minute)
+```
+
+### After (Clean)
+```
+WARNING - Only important events
+ERROR - Only errors
+... (quiet unless something is wrong)
+```
+
+---
+
+## Customization
+
+### Show Only Errors
+```python
+werkzeug_logger.setLevel(logging.ERROR)  # Only errors
+```
+
+### Show All Requests (Debug Mode)
+```python
+werkzeug_logger.setLevel(logging.INFO)  # All requests (default)
+```
+
+### Selective Filtering
+```python
+# Custom filter to exclude specific endpoints
+class ExcludeEndpointFilter(logging.Filter):
+    def filter(self, record):
+        # Exclude training-progress endpoint
+        return '/api/training-progress' not in record.getMessage()
+
+werkzeug_logger.addFilter(ExcludeEndpointFilter())
+```
+
+---
+
+## Other Loggers
+
+### Application Logger
+```python
+# Your application logs (keep at INFO)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+```
+
+### Third-Party Libraries
+```python
+# Suppress noisy third-party loggers
+logging.getLogger('urllib3').setLevel(logging.WARNING)
+logging.getLogger('requests').setLevel(logging.WARNING)
+logging.getLogger('matplotlib').setLevel(logging.WARNING)
+```
+
+---
+
+## Log File Configuration
+
+### Current Setup
+```python
+log_file = Path(__file__).parent.parent / 'logs' / f'annotate_{datetime.now().strftime("%Y%m%d")}.log'
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(log_file),
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+```
+
+### Recommended: Separate Log Files
+```python
+# Application logs
+app_log = 'logs/annotate_app.log'
+app_handler = logging.FileHandler(app_log)
+app_handler.setLevel(logging.INFO)
+
+# Request logs (if needed)
+request_log = 'logs/annotate_requests.log'
+request_handler = logging.FileHandler(request_log)
+request_handler.setLevel(logging.DEBUG)
+
+# Configure werkzeug to use separate file
+werkzeug_logger = logging.getLogger('werkzeug')
+werkzeug_logger.addHandler(request_handler)
+werkzeug_logger.setLevel(logging.WARNING)  # Still suppress in main log
+```
+
+---
+
+## Summary
+
+### What Changed
+- ✅ Werkzeug logger set to WARNING level
+- ✅ No more INFO logs for every request
+- ✅ Still logs errors and warnings
+- ✅ Application logs unchanged
+
+### Result
+```
+Before: 100+ log lines per minute (polling)
+After: 0-5 log lines per minute (only important events)
+```
+
+### To Revert
+```python
+# Show all requests again
+werkzeug_logger.setLevel(logging.INFO)
+```
+
+---
+
+## Best Practices
+
+1. **Production**: Use WARNING or ERROR for werkzeug
+2. **Development**: Use INFO for debugging
+3. **Polling Endpoints**: Always suppress or use separate log file
+4. **Application Logs**: Keep at INFO or DEBUG as needed
+5. **Third-Party**: Suppress noisy libraries
+
+---
+
+## Testing
+
+After the change, you should see:
+```
+✅ No more werkzeug INFO logs
+✅ Application logs still visible
+✅ Errors still logged
+✅ Clean console output
+```
+
+If you need to see requests for debugging:
+```python
+# Temporarily enable
+logging.getLogger('werkzeug').setLevel(logging.INFO)
+```
--- a/ANNOTATE/core/real_training_adapter.py
+++ b/ANNOTATE/core/real_training_adapter.py
@@ -615,7 +615,7 @@ class RealTrainingAdapter:
                    # Show breakdown of before/after
                    before_count = sum(1 for s in negative_samples if 'before' in str(s.get('timestamp', '')))
                    after_count = len(negative_samples) - before_count
-                    logger.info(f"      -> {before_count} beforesignal, {after_count} after signal")
+                    logger.info(f"      -> {before_count} before signal, {after_count} after signal")
                
            except Exception as e:
                logger.error(f" Error preparing test case {i+1}: {e}")
@@ -1413,12 +1413,17 @@ class RealTrainingAdapter:
                        result = trainer.train_step(batch)
                        
                        if result is not None:
-                            epoch_loss += result.get('total_loss', 0.0)
-                            epoch_accuracy += result.get('accuracy', 0.0)
+                            batch_loss = result.get('total_loss', 0.0)
+                            batch_accuracy = result.get('accuracy', 0.0)
+                            epoch_loss += batch_loss
+                            epoch_accuracy += batch_accuracy
                            num_batches += 1
-                        
-                        if (i + 1) % 100 == 0:
-                            logger.info(f"      Batch {i + 1}/{len(converted_batches)}, Loss: {result.get('total_loss', 0.0):.6f}, Accuracy: {result.get('accuracy', 0.0):.2%}")
+                            
+                            # Log first batch and every 100th batch for debugging
+                            if (i + 1) == 1 or (i + 1) % 100 == 0:
+                                logger.info(f"      Batch {i + 1}/{len(converted_batches)}, Loss: {batch_loss:.6f}, Accuracy: {batch_accuracy:.4f}")
+                        else:
+                            logger.warning(f"   Batch {i + 1} returned None result - skipping")
                            
                    except Exception as e:
                        logger.error(f"   Error in batch {i + 1}: {e}")
--- a/ANNOTATE/web/app.py
+++ b/ANNOTATE/web/app.py
@@ -130,6 +130,10 @@ class AnnotationDashboard:
            static_folder='static'
        )
        
+        # Suppress werkzeug request logs (reduce noise from polling endpoints)
+        werkzeug_logger = logging.getLogger('werkzeug')
+        werkzeug_logger.setLevel(logging.WARNING)  # Only show warnings and errors, not INFO
+        
        # Initialize Dash app (optional component)
        self.app = Dash(
            __name__,
@@ -1125,6 +1129,90 @@ class AnnotationDashboard:
                    }
                })
        
+        # Live Training API Endpoints
+        @self.server.route('/api/live-training/start', methods=['POST'])
+        def start_live_training():
+            """Start live inference and training mode"""
+            try:
+                if not self.orchestrator:
+                    return jsonify({
+                        'success': False,
+                        'error': 'Orchestrator not available'
+                    }), 500
+                
+                if self.orchestrator.start_live_training():
+                    return jsonify({
+                        'success': True,
+                        'status': 'started',
+                        'message': 'Live training mode started'
+                    })
+                else:
+                    return jsonify({
+                        'success': False,
+                        'error': 'Failed to start live training'
+                    }), 500
+            except Exception as e:
+                logger.error(f"Error starting live training: {e}")
+                return jsonify({
+                    'success': False,
+                    'error': str(e)
+                }), 500
+        
+        @self.server.route('/api/live-training/stop', methods=['POST'])
+        def stop_live_training():
+            """Stop live inference and training mode"""
+            try:
+                if not self.orchestrator:
+                    return jsonify({
+                        'success': False,
+                        'error': 'Orchestrator not available'
+                    }), 500
+                
+                if self.orchestrator.stop_live_training():
+                    return jsonify({
+                        'success': True,
+                        'status': 'stopped',
+                        'message': 'Live training mode stopped'
+                    })
+                else:
+                    return jsonify({
+                        'success': False,
+                        'error': 'Failed to stop live training'
+                    }), 500
+            except Exception as e:
+                logger.error(f"Error stopping live training: {e}")
+                return jsonify({
+                    'success': False,
+                    'error': str(e)
+                }), 500
+        
+        @self.server.route('/api/live-training/status', methods=['GET'])
+        def get_live_training_status():
+            """Get live training status and statistics"""
+            try:
+                if not self.orchestrator:
+                    return jsonify({
+                        'success': False,
+                        'active': False,
+                        'error': 'Orchestrator not available'
+                    })
+                
+                is_active = self.orchestrator.is_live_training_active()
+                stats = self.orchestrator.get_live_training_stats() if is_active else {}
+                
+                return jsonify({
+                    'success': True,
+                    'active': is_active,
+                    'stats': stats
+                })
+            except Exception as e:
+                logger.error(f"Error getting live training status: {e}")
+                return jsonify({
+                    'success': False,
+                    'active': False,
+                    'error': str(e)
+                })
+        
        @self.server.route('/api/available-models', methods=['GET'])
        def get_available_models():
            """Get list of available models with their load status"""