memory leak fixes

This commit is contained in:
Dobromir Popov
2025-11-13 16:05:15 +02:00
parent 13b6fafaf8
commit b0b24f36b2
2 changed files with 52 additions and 24 deletions

View File

@@ -118,20 +118,24 @@ class MemoryGuard:
if usage['at_limit']:
self.limit_exceeded_count += 1
logger.error(f"🔴 MEMORY LIMIT EXCEEDED: {usage['rss_gb']:.2f}GB / {usage['max_gb']:.1f}GB")
logger.error(f"MEMORY LIMIT EXCEEDED: {usage['rss_gb']:.2f}GB / {usage['max_gb']:.1f}GB")
# Aggressive cleanup
self._aggressive_cleanup()
# Check again after cleanup
usage_after = self.get_memory_usage()
if raise_on_limit:
raise MemoryError(
f"Memory limit exceeded: {usage['rss_gb']:.2f}GB / {usage['max_gb']:.1f}GB. "
f"Increase max_memory_gb or reduce batch size."
f"After cleanup: {usage_after['rss_gb']:.2f}GB. "
f"STOP TRAINING - Memory limit enforced!"
)
elif usage['at_warning']:
self.warning_count += 1
logger.warning(f"⚠️ Memory warning: {usage['rss_gb']:.2f}GB / {usage['max_gb']:.1f}GB ({usage['usage_percent']:.1f}%)")
logger.warning(f"Memory warning: {usage['rss_gb']:.2f}GB / {usage['max_gb']:.1f}GB ({usage['usage_percent']:.1f}%)")
if self.auto_cleanup:
self._trigger_cleanup()