debugging web ui

This commit is contained in:
Dobromir Popov
2025-08-05 15:58:51 +03:00
parent 622d059aae
commit bf4d43f6f7
8 changed files with 571 additions and 58 deletions

View File

@ -52,7 +52,7 @@ class MemoryMonitor:
Provides detailed memory analytics and automatic GC optimization.
"""
def __init__(self, enable_tracemalloc: bool = True, snapshot_interval: float = 30.0):
def __init__(self, enable_tracemalloc: bool = False, snapshot_interval: float = 60.0):
"""
Initialize memory monitor.
@ -132,6 +132,15 @@ class MemoryMonitor:
if self.auto_gc_enabled:
self._optimize_gc()
# Periodic cleanup to prevent memory leaks in the monitor itself
if hasattr(self, '_cleanup_counter'):
self._cleanup_counter += 1
else:
self._cleanup_counter = 1
if self._cleanup_counter % 10 == 0: # Every 10 cycles
self._cleanup_monitor_data()
time.sleep(self.snapshot_interval)
except Exception as e:
@ -195,30 +204,47 @@ class MemoryMonitor:
logger.error(f"Error taking memory snapshot: {e}")
def _update_object_counts(self) -> None:
"""Update object counts by type"""
"""Update object counts by type (limited to prevent memory leaks)"""
try:
# Count objects by type
object_counts = defaultdict(int)
# Only track specific object types to avoid creating too many objects
tracked_types = {
'dict', 'list', 'tuple', 'str', 'function', 'type',
'SystemMetrics', 'MetricPoint', 'MemorySnapshot'
}
for obj in gc.get_objects():
obj_type = type(obj).__name__
object_counts[obj_type] += 1
# Count only tracked object types
object_counts = {}
all_objects = gc.get_objects()
# Store counts with timestamp
for tracked_type in tracked_types:
count = sum(1 for obj in all_objects if type(obj).__name__ == tracked_type)
if count > 0:
object_counts[tracked_type] = count
# Store counts with timestamp (only for tracked types)
timestamp = get_current_timestamp()
for obj_type, count in object_counts.items():
self.object_counts[obj_type].append((timestamp, count))
# Update metrics for common types
# Clean up old entries to prevent memory growth
for obj_type in list(self.object_counts.keys()):
if len(self.object_counts[obj_type]) > 50: # Keep only last 50 entries
# Remove oldest entries
while len(self.object_counts[obj_type]) > 50:
self.object_counts[obj_type].popleft()
# Update metrics for common types (less frequently)
try:
from .metrics_collector import metrics_collector
common_types = ['dict', 'list', 'tuple', 'str', 'function', 'type']
for obj_type in common_types:
if obj_type in object_counts:
metrics_collector.set_gauge(
f'memory_objects_{obj_type}',
object_counts[obj_type]
)
# Only update metrics every 5th call to reduce object creation
if not hasattr(self, '_metrics_update_counter'):
self._metrics_update_counter = 0
self._metrics_update_counter += 1
if self._metrics_update_counter % 5 == 0:
for obj_type, count in object_counts.items():
metrics_collector.set_gauge(f'memory_objects_{obj_type}', count)
except ImportError:
pass # Metrics collector not available
@ -226,22 +252,36 @@ class MemoryMonitor:
logger.error(f"Error updating object counts: {e}")
def _check_for_leaks(self) -> None:
"""Check for potential memory leaks"""
"""Check for potential memory leaks (less aggressive)"""
try:
if len(self.memory_snapshots) < 10:
return # Need more data
if len(self.memory_snapshots) < 20: # Need more data for reliable detection
return
# Check for consistent memory growth
recent_snapshots = list(self.memory_snapshots)[-10:]
# Only check every 10th call to reduce overhead
if not hasattr(self, '_leak_check_counter'):
self._leak_check_counter = 0
self._leak_check_counter += 1
if self._leak_check_counter % 10 != 0:
return
# Check for consistent memory growth over longer period
recent_snapshots = list(self.memory_snapshots)[-20:]
memory_values = [s.process_memory_mb for s in recent_snapshots]
# Simple linear regression to detect growth trend
if self._is_memory_growing(memory_values):
# More conservative growth detection
if self._is_memory_growing(memory_values, threshold=20.0): # Increased threshold
# Check object count growth
potential_leaks = self._analyze_object_growth()
for leak in potential_leaks:
if leak not in self.detected_leaks:
# Check if we already reported this leak recently
existing_leak = next(
(l for l in self.detected_leaks if l.object_type == leak.object_type),
None
)
if not existing_leak and leak.severity in ['medium', 'high']:
self.detected_leaks.append(leak)
logger.warning(f"Potential memory leak detected: {leak.object_type}")
@ -252,6 +292,10 @@ class MemoryMonitor:
except ImportError:
pass
# Clean up old leak reports (keep only last 10)
if len(self.detected_leaks) > 10:
self.detected_leaks = self.detected_leaks[-10:]
except Exception as e:
logger.error(f"Error checking for leaks: {e}")
@ -265,42 +309,53 @@ class MemoryMonitor:
return growth > threshold
def _analyze_object_growth(self) -> List[MemoryLeak]:
"""Analyze object count growth to identify potential leaks"""
"""Analyze object count growth to identify potential leaks (more conservative)"""
leaks = []
for obj_type, counts in self.object_counts.items():
if len(counts) < 10:
if len(counts) < 20: # Need more data points
continue
# Get recent counts
recent_counts = list(counts)[-10:]
# Get recent counts over longer period
recent_counts = list(counts)[-20:]
timestamps = [item[0] for item in recent_counts]
count_values = [item[1] for item in recent_counts]
# Check for growth
if len(count_values) >= 2:
growth = count_values[-1] - count_values[0]
# Check for sustained growth
if len(count_values) >= 10:
# Calculate growth over the period
start_avg = sum(count_values[:5]) / 5 # Average of first 5 values
end_avg = sum(count_values[-5:]) / 5 # Average of last 5 values
growth = end_avg - start_avg
time_diff = (timestamps[-1] - timestamps[0]).total_seconds() / 3600 # hours
if growth > 100 and time_diff > 0: # More than 100 objects growth
# More conservative thresholds
if growth > 500 and time_diff > 0.5: # More than 500 objects growth over 30+ minutes
growth_rate = growth / time_diff
# Determine severity
if growth_rate > 1000:
# Skip common types that naturally fluctuate
if obj_type in ['dict', 'list', 'tuple', 'str']:
continue
# Determine severity with higher thresholds
if growth_rate > 2000:
severity = 'high'
elif growth_rate > 100:
elif growth_rate > 500:
severity = 'medium'
else:
severity = 'low'
leak = MemoryLeak(
object_type=obj_type,
count_increase=growth,
size_increase_mb=growth * 0.001, # Rough estimate
growth_rate_per_hour=growth_rate,
severity=severity
)
leaks.append(leak)
# Only report medium and high severity leaks
if severity in ['medium', 'high']:
leak = MemoryLeak(
object_type=obj_type,
count_increase=int(growth),
size_increase_mb=growth * 0.001, # Rough estimate
growth_rate_per_hour=growth_rate,
severity=severity
)
leaks.append(leak)
return leaks
@ -346,6 +401,38 @@ class MemoryMonitor:
except Exception as e:
logger.error(f"Error optimizing GC: {e}")
def _cleanup_monitor_data(self) -> None:
"""Clean up monitor data to prevent memory leaks"""
try:
# Limit memory snapshots
if len(self.memory_snapshots) > 500:
# Keep only the most recent 300 snapshots
while len(self.memory_snapshots) > 300:
self.memory_snapshots.popleft()
# Clean up object counts
for obj_type in list(self.object_counts.keys()):
if len(self.object_counts[obj_type]) > 30:
# Keep only the most recent 20 entries
while len(self.object_counts[obj_type]) > 20:
self.object_counts[obj_type].popleft()
# Remove empty deques
if len(self.object_counts[obj_type]) == 0:
del self.object_counts[obj_type]
# Limit detected leaks
if len(self.detected_leaks) > 5:
self.detected_leaks = self.detected_leaks[-5:]
# Force a small garbage collection
gc.collect()
logger.debug("Cleaned up memory monitor data")
except Exception as e:
logger.error(f"Error cleaning up monitor data: {e}")
def force_garbage_collection(self) -> Dict[str, int]:
"""Force garbage collection and return statistics"""
try: