debugging web ui

2025-08-05 15:58:51 +03:00
parent 622d059aae
commit bf4d43f6f7
8 changed files with 571 additions and 58 deletions
--- a/COBY/monitoring/memory_monitor.py
+++ b/COBY/monitoring/memory_monitor.py
@@ -52,7 +52,7 @@ class MemoryMonitor:
    Provides detailed memory analytics and automatic GC optimization.
    """
    
-    def __init__(self, enable_tracemalloc: bool = True, snapshot_interval: float = 30.0):
+    def __init__(self, enable_tracemalloc: bool = False, snapshot_interval: float = 60.0):
        """
        Initialize memory monitor.
        
@@ -132,6 +132,15 @@ class MemoryMonitor:
                if self.auto_gc_enabled:
                    self._optimize_gc()
                
+                # Periodic cleanup to prevent memory leaks in the monitor itself
+                if hasattr(self, '_cleanup_counter'):
+                    self._cleanup_counter += 1
+                else:
+                    self._cleanup_counter = 1
+                
+                if self._cleanup_counter % 10 == 0:  # Every 10 cycles
+                    self._cleanup_monitor_data()
+                
                time.sleep(self.snapshot_interval)
                
            except Exception as e:
@@ -195,30 +204,47 @@ class MemoryMonitor:
            logger.error(f"Error taking memory snapshot: {e}")
    
    def _update_object_counts(self) -> None:
-        """Update object counts by type"""
+        """Update object counts by type (limited to prevent memory leaks)"""
        try:
-            # Count objects by type
-            object_counts = defaultdict(int)
+            # Only track specific object types to avoid creating too many objects
+            tracked_types = {
+                'dict', 'list', 'tuple', 'str', 'function', 'type', 
+                'SystemMetrics', 'MetricPoint', 'MemorySnapshot'
+            }
            
-            for obj in gc.get_objects():
-                obj_type = type(obj).__name__
-                object_counts[obj_type] += 1
+            # Count only tracked object types
+            object_counts = {}
+            all_objects = gc.get_objects()
            
-            # Store counts with timestamp
+            for tracked_type in tracked_types:
+                count = sum(1 for obj in all_objects if type(obj).__name__ == tracked_type)
+                if count > 0:
+                    object_counts[tracked_type] = count
+            
+            # Store counts with timestamp (only for tracked types)
            timestamp = get_current_timestamp()
            for obj_type, count in object_counts.items():
                self.object_counts[obj_type].append((timestamp, count))
            
-            # Update metrics for common types
+            # Clean up old entries to prevent memory growth
+            for obj_type in list(self.object_counts.keys()):
+                if len(self.object_counts[obj_type]) > 50:  # Keep only last 50 entries
+                    # Remove oldest entries
+                    while len(self.object_counts[obj_type]) > 50:
+                        self.object_counts[obj_type].popleft()
+            
+            # Update metrics for common types (less frequently)
            try:
                from .metrics_collector import metrics_collector
-                common_types = ['dict', 'list', 'tuple', 'str', 'function', 'type']
-                for obj_type in common_types:
-                    if obj_type in object_counts:
-                        metrics_collector.set_gauge(
-                            f'memory_objects_{obj_type}',
-                            object_counts[obj_type]
-                        )
+                # Only update metrics every 5th call to reduce object creation
+                if not hasattr(self, '_metrics_update_counter'):
+                    self._metrics_update_counter = 0
+                
+                self._metrics_update_counter += 1
+                if self._metrics_update_counter % 5 == 0:
+                    for obj_type, count in object_counts.items():
+                        metrics_collector.set_gauge(f'memory_objects_{obj_type}', count)
+                    
            except ImportError:
                pass  # Metrics collector not available
            
@@ -226,22 +252,36 @@ class MemoryMonitor:
            logger.error(f"Error updating object counts: {e}")
    
    def _check_for_leaks(self) -> None:
-        """Check for potential memory leaks"""
+        """Check for potential memory leaks (less aggressive)"""
        try:
-            if len(self.memory_snapshots) < 10:
-                return  # Need more data
+            if len(self.memory_snapshots) < 20:  # Need more data for reliable detection
+                return
            
-            # Check for consistent memory growth
-            recent_snapshots = list(self.memory_snapshots)[-10:]
+            # Only check every 10th call to reduce overhead
+            if not hasattr(self, '_leak_check_counter'):
+                self._leak_check_counter = 0
+            
+            self._leak_check_counter += 1
+            if self._leak_check_counter % 10 != 0:
+                return
+            
+            # Check for consistent memory growth over longer period
+            recent_snapshots = list(self.memory_snapshots)[-20:]
            memory_values = [s.process_memory_mb for s in recent_snapshots]
            
-            # Simple linear regression to detect growth trend
-            if self._is_memory_growing(memory_values):
+            # More conservative growth detection
+            if self._is_memory_growing(memory_values, threshold=20.0):  # Increased threshold
                # Check object count growth
                potential_leaks = self._analyze_object_growth()
                
                for leak in potential_leaks:
-                    if leak not in self.detected_leaks:
+                    # Check if we already reported this leak recently
+                    existing_leak = next(
+                        (l for l in self.detected_leaks if l.object_type == leak.object_type), 
+                        None
+                    )
+                    
+                    if not existing_leak and leak.severity in ['medium', 'high']:
                        self.detected_leaks.append(leak)
                        logger.warning(f"Potential memory leak detected: {leak.object_type}")
                        
@@ -252,6 +292,10 @@ class MemoryMonitor:
                        except ImportError:
                            pass
            
+            # Clean up old leak reports (keep only last 10)
+            if len(self.detected_leaks) > 10:
+                self.detected_leaks = self.detected_leaks[-10:]
+            
        except Exception as e:
            logger.error(f"Error checking for leaks: {e}")
    
@@ -265,42 +309,53 @@ class MemoryMonitor:
        return growth > threshold
    
    def _analyze_object_growth(self) -> List[MemoryLeak]:
-        """Analyze object count growth to identify potential leaks"""
+        """Analyze object count growth to identify potential leaks (more conservative)"""
        leaks = []
        
        for obj_type, counts in self.object_counts.items():
-            if len(counts) < 10:
+            if len(counts) < 20:  # Need more data points
                continue
            
-            # Get recent counts
-            recent_counts = list(counts)[-10:]
+            # Get recent counts over longer period
+            recent_counts = list(counts)[-20:]
            timestamps = [item[0] for item in recent_counts]
            count_values = [item[1] for item in recent_counts]
            
-            # Check for growth
-            if len(count_values) >= 2:
-                growth = count_values[-1] - count_values[0]
+            # Check for sustained growth
+            if len(count_values) >= 10:
+                # Calculate growth over the period
+                start_avg = sum(count_values[:5]) / 5  # Average of first 5 values
+                end_avg = sum(count_values[-5:]) / 5   # Average of last 5 values
+                growth = end_avg - start_avg
+                
                time_diff = (timestamps[-1] - timestamps[0]).total_seconds() / 3600  # hours
                
-                if growth > 100 and time_diff > 0:  # More than 100 objects growth
+                # More conservative thresholds
+                if growth > 500 and time_diff > 0.5:  # More than 500 objects growth over 30+ minutes
                    growth_rate = growth / time_diff
                    
-                    # Determine severity
-                    if growth_rate > 1000:
+                    # Skip common types that naturally fluctuate
+                    if obj_type in ['dict', 'list', 'tuple', 'str']:
+                        continue
+                    
+                    # Determine severity with higher thresholds
+                    if growth_rate > 2000:
                        severity = 'high'
-                    elif growth_rate > 100:
+                    elif growth_rate > 500:
                        severity = 'medium'
                    else:
                        severity = 'low'
                    
-                    leak = MemoryLeak(
-                        object_type=obj_type,
-                        count_increase=growth,
-                        size_increase_mb=growth * 0.001,  # Rough estimate
-                        growth_rate_per_hour=growth_rate,
-                        severity=severity
-                    )
-                    leaks.append(leak)
+                    # Only report medium and high severity leaks
+                    if severity in ['medium', 'high']:
+                        leak = MemoryLeak(
+                            object_type=obj_type,
+                            count_increase=int(growth),
+                            size_increase_mb=growth * 0.001,  # Rough estimate
+                            growth_rate_per_hour=growth_rate,
+                            severity=severity
+                        )
+                        leaks.append(leak)
        
        return leaks
    
@@ -346,6 +401,38 @@ class MemoryMonitor:
        except Exception as e:
            logger.error(f"Error optimizing GC: {e}")
    
+    def _cleanup_monitor_data(self) -> None:
+        """Clean up monitor data to prevent memory leaks"""
+        try:
+            # Limit memory snapshots
+            if len(self.memory_snapshots) > 500:
+                # Keep only the most recent 300 snapshots
+                while len(self.memory_snapshots) > 300:
+                    self.memory_snapshots.popleft()
+            
+            # Clean up object counts
+            for obj_type in list(self.object_counts.keys()):
+                if len(self.object_counts[obj_type]) > 30:
+                    # Keep only the most recent 20 entries
+                    while len(self.object_counts[obj_type]) > 20:
+                        self.object_counts[obj_type].popleft()
+                
+                # Remove empty deques
+                if len(self.object_counts[obj_type]) == 0:
+                    del self.object_counts[obj_type]
+            
+            # Limit detected leaks
+            if len(self.detected_leaks) > 5:
+                self.detected_leaks = self.detected_leaks[-5:]
+            
+            # Force a small garbage collection
+            gc.collect()
+            
+            logger.debug("Cleaned up memory monitor data")
+            
+        except Exception as e:
+            logger.error(f"Error cleaning up monitor data: {e}")
+    
    def force_garbage_collection(self) -> Dict[str, int]:
        """Force garbage collection and return statistics"""
        try: