reduce logging. actual training

This commit is contained in:
Dobromir Popov
2025-10-31 03:52:41 +02:00
parent 6ac324289c
commit 1bf41e06a8
9 changed files with 1700 additions and 74 deletions

View File

@@ -0,0 +1,184 @@
# Logging Configuration
## Issue: Excessive Werkzeug Logs
### Problem
```
2025-10-31 03:23:53,478 - werkzeug - INFO - 127.0.0.1 - - [31/Oct/2025 03:23:53] "POST /api/training-progress HTTP/1.1" 200 -
2025-10-31 03:23:55,519 - werkzeug - INFO - 127.0.0.1 - - [31/Oct/2025 03:23:55] "POST /api/training-progress HTTP/1.1" 200 -
2025-10-31 03:23:56,533 - werkzeug - INFO - 127.0.0.1 - - [31/Oct/2025 03:23:56] "POST /api/training-progress HTTP/1.1" 200 -
...
```
**Cause**: The frontend polls `/api/training-progress` every 1-2 seconds, and Flask's werkzeug logger logs every request at INFO level.
---
## Solution
### Fixed in `ANNOTATE/web/app.py`
```python
# Initialize Flask app
self.server = Flask(
__name__,
template_folder='templates',
static_folder='static'
)
# Suppress werkzeug request logs (reduce noise from polling endpoints)
werkzeug_logger = logging.getLogger('werkzeug')
werkzeug_logger.setLevel(logging.WARNING) # Only show warnings and errors, not INFO
```
**Result**: Werkzeug will now only log warnings and errors, not every request.
---
## Logging Levels
### Before (Noisy)
```
INFO - Every request logged
INFO - GET /api/chart-data
INFO - POST /api/training-progress
INFO - GET /static/css/style.css
... (hundreds of lines per minute)
```
### After (Clean)
```
WARNING - Only important events
ERROR - Only errors
... (quiet unless something is wrong)
```
---
## Customization
### Show Only Errors
```python
werkzeug_logger.setLevel(logging.ERROR) # Only errors
```
### Show All Requests (Debug Mode)
```python
werkzeug_logger.setLevel(logging.INFO) # All requests (default)
```
### Selective Filtering
```python
# Custom filter to exclude specific endpoints
class ExcludeEndpointFilter(logging.Filter):
def filter(self, record):
# Exclude training-progress endpoint
return '/api/training-progress' not in record.getMessage()
werkzeug_logger.addFilter(ExcludeEndpointFilter())
```
---
## Other Loggers
### Application Logger
```python
# Your application logs (keep at INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
```
### Third-Party Libraries
```python
# Suppress noisy third-party loggers
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)
logging.getLogger('matplotlib').setLevel(logging.WARNING)
```
---
## Log File Configuration
### Current Setup
```python
log_file = Path(__file__).parent.parent / 'logs' / f'annotate_{datetime.now().strftime("%Y%m%d")}.log'
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler(sys.stdout)
]
)
```
### Recommended: Separate Log Files
```python
# Application logs
app_log = 'logs/annotate_app.log'
app_handler = logging.FileHandler(app_log)
app_handler.setLevel(logging.INFO)
# Request logs (if needed)
request_log = 'logs/annotate_requests.log'
request_handler = logging.FileHandler(request_log)
request_handler.setLevel(logging.DEBUG)
# Configure werkzeug to use separate file
werkzeug_logger = logging.getLogger('werkzeug')
werkzeug_logger.addHandler(request_handler)
werkzeug_logger.setLevel(logging.WARNING) # Still suppress in main log
```
---
## Summary
### What Changed
- ✅ Werkzeug logger set to WARNING level
- ✅ No more INFO logs for every request
- ✅ Still logs errors and warnings
- ✅ Application logs unchanged
### Result
```
Before: 100+ log lines per minute (polling)
After: 0-5 log lines per minute (only important events)
```
### To Revert
```python
# Show all requests again
werkzeug_logger.setLevel(logging.INFO)
```
---
## Best Practices
1. **Production**: Use WARNING or ERROR for werkzeug
2. **Development**: Use INFO for debugging
3. **Polling Endpoints**: Always suppress or use separate log file
4. **Application Logs**: Keep at INFO or DEBUG as needed
5. **Third-Party**: Suppress noisy libraries
---
## Testing
After the change, you should see:
```
✅ No more werkzeug INFO logs
✅ Application logs still visible
✅ Errors still logged
✅ Clean console output
```
If you need to see requests for debugging:
```python
# Temporarily enable
logging.getLogger('werkzeug').setLevel(logging.INFO)
```

View File

@@ -615,7 +615,7 @@ class RealTrainingAdapter:
# Show breakdown of before/after
before_count = sum(1 for s in negative_samples if 'before' in str(s.get('timestamp', '')))
after_count = len(negative_samples) - before_count
logger.info(f" -> {before_count} beforesignal, {after_count} after signal")
logger.info(f" -> {before_count} before signal, {after_count} after signal")
except Exception as e:
logger.error(f" Error preparing test case {i+1}: {e}")
@@ -1413,12 +1413,17 @@ class RealTrainingAdapter:
result = trainer.train_step(batch)
if result is not None:
epoch_loss += result.get('total_loss', 0.0)
epoch_accuracy += result.get('accuracy', 0.0)
batch_loss = result.get('total_loss', 0.0)
batch_accuracy = result.get('accuracy', 0.0)
epoch_loss += batch_loss
epoch_accuracy += batch_accuracy
num_batches += 1
if (i + 1) % 100 == 0:
logger.info(f" Batch {i + 1}/{len(converted_batches)}, Loss: {result.get('total_loss', 0.0):.6f}, Accuracy: {result.get('accuracy', 0.0):.2%}")
# Log first batch and every 100th batch for debugging
if (i + 1) == 1 or (i + 1) % 100 == 0:
logger.info(f" Batch {i + 1}/{len(converted_batches)}, Loss: {batch_loss:.6f}, Accuracy: {batch_accuracy:.4f}")
else:
logger.warning(f" Batch {i + 1} returned None result - skipping")
except Exception as e:
logger.error(f" Error in batch {i + 1}: {e}")

View File

@@ -130,6 +130,10 @@ class AnnotationDashboard:
static_folder='static'
)
# Suppress werkzeug request logs (reduce noise from polling endpoints)
werkzeug_logger = logging.getLogger('werkzeug')
werkzeug_logger.setLevel(logging.WARNING) # Only show warnings and errors, not INFO
# Initialize Dash app (optional component)
self.app = Dash(
__name__,
@@ -1125,6 +1129,90 @@ class AnnotationDashboard:
}
})
# Live Training API Endpoints
@self.server.route('/api/live-training/start', methods=['POST'])
def start_live_training():
"""Start live inference and training mode"""
try:
if not self.orchestrator:
return jsonify({
'success': False,
'error': 'Orchestrator not available'
}), 500
if self.orchestrator.start_live_training():
return jsonify({
'success': True,
'status': 'started',
'message': 'Live training mode started'
})
else:
return jsonify({
'success': False,
'error': 'Failed to start live training'
}), 500
except Exception as e:
logger.error(f"Error starting live training: {e}")
return jsonify({
'success': False,
'error': str(e)
}), 500
@self.server.route('/api/live-training/stop', methods=['POST'])
def stop_live_training():
"""Stop live inference and training mode"""
try:
if not self.orchestrator:
return jsonify({
'success': False,
'error': 'Orchestrator not available'
}), 500
if self.orchestrator.stop_live_training():
return jsonify({
'success': True,
'status': 'stopped',
'message': 'Live training mode stopped'
})
else:
return jsonify({
'success': False,
'error': 'Failed to stop live training'
}), 500
except Exception as e:
logger.error(f"Error stopping live training: {e}")
return jsonify({
'success': False,
'error': str(e)
}), 500
@self.server.route('/api/live-training/status', methods=['GET'])
def get_live_training_status():
"""Get live training status and statistics"""
try:
if not self.orchestrator:
return jsonify({
'success': False,
'active': False,
'error': 'Orchestrator not available'
})
is_active = self.orchestrator.is_live_training_active()
stats = self.orchestrator.get_live_training_stats() if is_active else {}
return jsonify({
'success': True,
'active': is_active,
'stats': stats
})
except Exception as e:
logger.error(f"Error getting live training status: {e}")
return jsonify({
'success': False,
'active': False,
'error': str(e)
})
@self.server.route('/api/available-models', methods=['GET'])
def get_available_models():
"""Get list of available models with their load status"""