debugging web ui
This commit is contained in:
@ -52,7 +52,7 @@ class MemoryMonitor:
|
||||
Provides detailed memory analytics and automatic GC optimization.
|
||||
"""
|
||||
|
||||
def __init__(self, enable_tracemalloc: bool = True, snapshot_interval: float = 30.0):
|
||||
def __init__(self, enable_tracemalloc: bool = False, snapshot_interval: float = 60.0):
|
||||
"""
|
||||
Initialize memory monitor.
|
||||
|
||||
@ -132,6 +132,15 @@ class MemoryMonitor:
|
||||
if self.auto_gc_enabled:
|
||||
self._optimize_gc()
|
||||
|
||||
# Periodic cleanup to prevent memory leaks in the monitor itself
|
||||
if hasattr(self, '_cleanup_counter'):
|
||||
self._cleanup_counter += 1
|
||||
else:
|
||||
self._cleanup_counter = 1
|
||||
|
||||
if self._cleanup_counter % 10 == 0: # Every 10 cycles
|
||||
self._cleanup_monitor_data()
|
||||
|
||||
time.sleep(self.snapshot_interval)
|
||||
|
||||
except Exception as e:
|
||||
@ -195,30 +204,47 @@ class MemoryMonitor:
|
||||
logger.error(f"Error taking memory snapshot: {e}")
|
||||
|
||||
def _update_object_counts(self) -> None:
|
||||
"""Update object counts by type"""
|
||||
"""Update object counts by type (limited to prevent memory leaks)"""
|
||||
try:
|
||||
# Count objects by type
|
||||
object_counts = defaultdict(int)
|
||||
# Only track specific object types to avoid creating too many objects
|
||||
tracked_types = {
|
||||
'dict', 'list', 'tuple', 'str', 'function', 'type',
|
||||
'SystemMetrics', 'MetricPoint', 'MemorySnapshot'
|
||||
}
|
||||
|
||||
for obj in gc.get_objects():
|
||||
obj_type = type(obj).__name__
|
||||
object_counts[obj_type] += 1
|
||||
# Count only tracked object types
|
||||
object_counts = {}
|
||||
all_objects = gc.get_objects()
|
||||
|
||||
# Store counts with timestamp
|
||||
for tracked_type in tracked_types:
|
||||
count = sum(1 for obj in all_objects if type(obj).__name__ == tracked_type)
|
||||
if count > 0:
|
||||
object_counts[tracked_type] = count
|
||||
|
||||
# Store counts with timestamp (only for tracked types)
|
||||
timestamp = get_current_timestamp()
|
||||
for obj_type, count in object_counts.items():
|
||||
self.object_counts[obj_type].append((timestamp, count))
|
||||
|
||||
# Update metrics for common types
|
||||
# Clean up old entries to prevent memory growth
|
||||
for obj_type in list(self.object_counts.keys()):
|
||||
if len(self.object_counts[obj_type]) > 50: # Keep only last 50 entries
|
||||
# Remove oldest entries
|
||||
while len(self.object_counts[obj_type]) > 50:
|
||||
self.object_counts[obj_type].popleft()
|
||||
|
||||
# Update metrics for common types (less frequently)
|
||||
try:
|
||||
from .metrics_collector import metrics_collector
|
||||
common_types = ['dict', 'list', 'tuple', 'str', 'function', 'type']
|
||||
for obj_type in common_types:
|
||||
if obj_type in object_counts:
|
||||
metrics_collector.set_gauge(
|
||||
f'memory_objects_{obj_type}',
|
||||
object_counts[obj_type]
|
||||
)
|
||||
# Only update metrics every 5th call to reduce object creation
|
||||
if not hasattr(self, '_metrics_update_counter'):
|
||||
self._metrics_update_counter = 0
|
||||
|
||||
self._metrics_update_counter += 1
|
||||
if self._metrics_update_counter % 5 == 0:
|
||||
for obj_type, count in object_counts.items():
|
||||
metrics_collector.set_gauge(f'memory_objects_{obj_type}', count)
|
||||
|
||||
except ImportError:
|
||||
pass # Metrics collector not available
|
||||
|
||||
@ -226,22 +252,36 @@ class MemoryMonitor:
|
||||
logger.error(f"Error updating object counts: {e}")
|
||||
|
||||
def _check_for_leaks(self) -> None:
|
||||
"""Check for potential memory leaks"""
|
||||
"""Check for potential memory leaks (less aggressive)"""
|
||||
try:
|
||||
if len(self.memory_snapshots) < 10:
|
||||
return # Need more data
|
||||
if len(self.memory_snapshots) < 20: # Need more data for reliable detection
|
||||
return
|
||||
|
||||
# Check for consistent memory growth
|
||||
recent_snapshots = list(self.memory_snapshots)[-10:]
|
||||
# Only check every 10th call to reduce overhead
|
||||
if not hasattr(self, '_leak_check_counter'):
|
||||
self._leak_check_counter = 0
|
||||
|
||||
self._leak_check_counter += 1
|
||||
if self._leak_check_counter % 10 != 0:
|
||||
return
|
||||
|
||||
# Check for consistent memory growth over longer period
|
||||
recent_snapshots = list(self.memory_snapshots)[-20:]
|
||||
memory_values = [s.process_memory_mb for s in recent_snapshots]
|
||||
|
||||
# Simple linear regression to detect growth trend
|
||||
if self._is_memory_growing(memory_values):
|
||||
# More conservative growth detection
|
||||
if self._is_memory_growing(memory_values, threshold=20.0): # Increased threshold
|
||||
# Check object count growth
|
||||
potential_leaks = self._analyze_object_growth()
|
||||
|
||||
for leak in potential_leaks:
|
||||
if leak not in self.detected_leaks:
|
||||
# Check if we already reported this leak recently
|
||||
existing_leak = next(
|
||||
(l for l in self.detected_leaks if l.object_type == leak.object_type),
|
||||
None
|
||||
)
|
||||
|
||||
if not existing_leak and leak.severity in ['medium', 'high']:
|
||||
self.detected_leaks.append(leak)
|
||||
logger.warning(f"Potential memory leak detected: {leak.object_type}")
|
||||
|
||||
@ -252,6 +292,10 @@ class MemoryMonitor:
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Clean up old leak reports (keep only last 10)
|
||||
if len(self.detected_leaks) > 10:
|
||||
self.detected_leaks = self.detected_leaks[-10:]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking for leaks: {e}")
|
||||
|
||||
@ -265,42 +309,53 @@ class MemoryMonitor:
|
||||
return growth > threshold
|
||||
|
||||
def _analyze_object_growth(self) -> List[MemoryLeak]:
|
||||
"""Analyze object count growth to identify potential leaks"""
|
||||
"""Analyze object count growth to identify potential leaks (more conservative)"""
|
||||
leaks = []
|
||||
|
||||
for obj_type, counts in self.object_counts.items():
|
||||
if len(counts) < 10:
|
||||
if len(counts) < 20: # Need more data points
|
||||
continue
|
||||
|
||||
# Get recent counts
|
||||
recent_counts = list(counts)[-10:]
|
||||
# Get recent counts over longer period
|
||||
recent_counts = list(counts)[-20:]
|
||||
timestamps = [item[0] for item in recent_counts]
|
||||
count_values = [item[1] for item in recent_counts]
|
||||
|
||||
# Check for growth
|
||||
if len(count_values) >= 2:
|
||||
growth = count_values[-1] - count_values[0]
|
||||
# Check for sustained growth
|
||||
if len(count_values) >= 10:
|
||||
# Calculate growth over the period
|
||||
start_avg = sum(count_values[:5]) / 5 # Average of first 5 values
|
||||
end_avg = sum(count_values[-5:]) / 5 # Average of last 5 values
|
||||
growth = end_avg - start_avg
|
||||
|
||||
time_diff = (timestamps[-1] - timestamps[0]).total_seconds() / 3600 # hours
|
||||
|
||||
if growth > 100 and time_diff > 0: # More than 100 objects growth
|
||||
# More conservative thresholds
|
||||
if growth > 500 and time_diff > 0.5: # More than 500 objects growth over 30+ minutes
|
||||
growth_rate = growth / time_diff
|
||||
|
||||
# Determine severity
|
||||
if growth_rate > 1000:
|
||||
# Skip common types that naturally fluctuate
|
||||
if obj_type in ['dict', 'list', 'tuple', 'str']:
|
||||
continue
|
||||
|
||||
# Determine severity with higher thresholds
|
||||
if growth_rate > 2000:
|
||||
severity = 'high'
|
||||
elif growth_rate > 100:
|
||||
elif growth_rate > 500:
|
||||
severity = 'medium'
|
||||
else:
|
||||
severity = 'low'
|
||||
|
||||
leak = MemoryLeak(
|
||||
object_type=obj_type,
|
||||
count_increase=growth,
|
||||
size_increase_mb=growth * 0.001, # Rough estimate
|
||||
growth_rate_per_hour=growth_rate,
|
||||
severity=severity
|
||||
)
|
||||
leaks.append(leak)
|
||||
# Only report medium and high severity leaks
|
||||
if severity in ['medium', 'high']:
|
||||
leak = MemoryLeak(
|
||||
object_type=obj_type,
|
||||
count_increase=int(growth),
|
||||
size_increase_mb=growth * 0.001, # Rough estimate
|
||||
growth_rate_per_hour=growth_rate,
|
||||
severity=severity
|
||||
)
|
||||
leaks.append(leak)
|
||||
|
||||
return leaks
|
||||
|
||||
@ -346,6 +401,38 @@ class MemoryMonitor:
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing GC: {e}")
|
||||
|
||||
def _cleanup_monitor_data(self) -> None:
|
||||
"""Clean up monitor data to prevent memory leaks"""
|
||||
try:
|
||||
# Limit memory snapshots
|
||||
if len(self.memory_snapshots) > 500:
|
||||
# Keep only the most recent 300 snapshots
|
||||
while len(self.memory_snapshots) > 300:
|
||||
self.memory_snapshots.popleft()
|
||||
|
||||
# Clean up object counts
|
||||
for obj_type in list(self.object_counts.keys()):
|
||||
if len(self.object_counts[obj_type]) > 30:
|
||||
# Keep only the most recent 20 entries
|
||||
while len(self.object_counts[obj_type]) > 20:
|
||||
self.object_counts[obj_type].popleft()
|
||||
|
||||
# Remove empty deques
|
||||
if len(self.object_counts[obj_type]) == 0:
|
||||
del self.object_counts[obj_type]
|
||||
|
||||
# Limit detected leaks
|
||||
if len(self.detected_leaks) > 5:
|
||||
self.detected_leaks = self.detected_leaks[-5:]
|
||||
|
||||
# Force a small garbage collection
|
||||
gc.collect()
|
||||
|
||||
logger.debug("Cleaned up memory monitor data")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up monitor data: {e}")
|
||||
|
||||
def force_garbage_collection(self) -> Dict[str, int]:
|
||||
"""Force garbage collection and return statistics"""
|
||||
try:
|
||||
|
Reference in New Issue
Block a user