16. monitoring
This commit is contained in:
671
COBY/monitoring/alert_manager.py
Normal file
671
COBY/monitoring/alert_manager.py
Normal file
@ -0,0 +1,671 @@
|
||||
"""
|
||||
Alert management system for performance degradation and system issues.
|
||||
"""
|
||||
|
||||
import smtplib
|
||||
import json
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
from collections import defaultdict, deque
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from dataclasses import dataclass, field
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from enum import Enum
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.timing import get_current_timestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class AlertSeverity(Enum):
|
||||
"""Alert severity levels"""
|
||||
INFO = "info"
|
||||
WARNING = "warning"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
class AlertStatus(Enum):
|
||||
"""Alert status"""
|
||||
ACTIVE = "active"
|
||||
RESOLVED = "resolved"
|
||||
ACKNOWLEDGED = "acknowledged"
|
||||
SUPPRESSED = "suppressed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Alert:
|
||||
"""Alert definition"""
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
severity: AlertSeverity
|
||||
metric_name: str
|
||||
threshold: float
|
||||
comparison: str # 'gt', 'lt', 'eq', 'ne'
|
||||
duration_seconds: int
|
||||
status: AlertStatus = AlertStatus.ACTIVE
|
||||
triggered_at: Optional[datetime] = None
|
||||
resolved_at: Optional[datetime] = None
|
||||
acknowledged_at: Optional[datetime] = None
|
||||
acknowledged_by: Optional[str] = None
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert alert to dictionary"""
|
||||
return {
|
||||
'id': self.id,
|
||||
'name': self.name,
|
||||
'description': self.description,
|
||||
'severity': self.severity.value,
|
||||
'metric_name': self.metric_name,
|
||||
'threshold': self.threshold,
|
||||
'comparison': self.comparison,
|
||||
'duration_seconds': self.duration_seconds,
|
||||
'status': self.status.value,
|
||||
'triggered_at': self.triggered_at.isoformat() if self.triggered_at else None,
|
||||
'resolved_at': self.resolved_at.isoformat() if self.resolved_at else None,
|
||||
'acknowledged_at': self.acknowledged_at.isoformat() if self.acknowledged_at else None,
|
||||
'acknowledged_by': self.acknowledged_by,
|
||||
'metadata': self.metadata
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class AlertRule:
|
||||
"""Alert rule configuration"""
|
||||
name: str
|
||||
metric_name: str
|
||||
threshold: float
|
||||
comparison: str
|
||||
duration_seconds: int
|
||||
severity: AlertSeverity
|
||||
description: str = ""
|
||||
enabled: bool = True
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationChannel:
|
||||
"""Notification channel configuration"""
|
||||
name: str
|
||||
type: str # 'email', 'webhook', 'slack'
|
||||
config: Dict[str, Any]
|
||||
enabled: bool = True
|
||||
severity_filter: List[AlertSeverity] = field(default_factory=list)
|
||||
|
||||
|
||||
class AlertManager:
|
||||
"""
|
||||
Manages alerts, notifications, and alert lifecycle.
|
||||
|
||||
Provides comprehensive alerting with multiple notification channels
|
||||
and alert suppression capabilities.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize alert manager"""
|
||||
# Alert storage
|
||||
self.alert_rules: Dict[str, AlertRule] = {}
|
||||
self.active_alerts: Dict[str, Alert] = {}
|
||||
self.alert_history: deque = deque(maxlen=10000)
|
||||
|
||||
# Notification channels
|
||||
self.notification_channels: Dict[str, NotificationChannel] = {}
|
||||
|
||||
# Alert state tracking
|
||||
self.metric_values: Dict[str, deque] = defaultdict(lambda: deque(maxlen=100))
|
||||
self.alert_triggers: Dict[str, datetime] = {}
|
||||
|
||||
# Suppression rules
|
||||
self.suppression_rules: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# Callbacks
|
||||
self.alert_callbacks: List[Callable[[Alert], None]] = []
|
||||
|
||||
# Statistics
|
||||
self.alerts_triggered = 0
|
||||
self.alerts_resolved = 0
|
||||
self.notifications_sent = 0
|
||||
|
||||
logger.info("Alert manager initialized")
|
||||
|
||||
def add_alert_rule(self, rule: AlertRule) -> None:
|
||||
"""Add an alert rule"""
|
||||
self.alert_rules[rule.name] = rule
|
||||
logger.info(f"Added alert rule: {rule.name}")
|
||||
|
||||
def remove_alert_rule(self, rule_name: str) -> None:
|
||||
"""Remove an alert rule"""
|
||||
if rule_name in self.alert_rules:
|
||||
del self.alert_rules[rule_name]
|
||||
# Also remove any active alerts for this rule
|
||||
alerts_to_remove = [
|
||||
alert_id for alert_id, alert in self.active_alerts.items()
|
||||
if alert.name == rule_name
|
||||
]
|
||||
for alert_id in alerts_to_remove:
|
||||
del self.active_alerts[alert_id]
|
||||
logger.info(f"Removed alert rule: {rule_name}")
|
||||
|
||||
def add_notification_channel(self, channel: NotificationChannel) -> None:
|
||||
"""Add a notification channel"""
|
||||
self.notification_channels[channel.name] = channel
|
||||
logger.info(f"Added notification channel: {channel.name} ({channel.type})")
|
||||
|
||||
def remove_notification_channel(self, channel_name: str) -> None:
|
||||
"""Remove a notification channel"""
|
||||
if channel_name in self.notification_channels:
|
||||
del self.notification_channels[channel_name]
|
||||
logger.info(f"Removed notification channel: {channel_name}")
|
||||
|
||||
def update_metric_value(self, metric_name: str, value: float) -> None:
|
||||
"""Update metric value and check alerts"""
|
||||
timestamp = get_current_timestamp()
|
||||
self.metric_values[metric_name].append((timestamp, value))
|
||||
|
||||
# Check all alert rules for this metric
|
||||
for rule_name, rule in self.alert_rules.items():
|
||||
if rule.metric_name == metric_name and rule.enabled:
|
||||
self._check_alert_rule(rule, value, timestamp)
|
||||
|
||||
def _check_alert_rule(self, rule: AlertRule, value: float, timestamp: datetime) -> None:
|
||||
"""Check if an alert rule should be triggered"""
|
||||
try:
|
||||
# Check if condition is met
|
||||
condition_met = self._evaluate_condition(rule.comparison, value, rule.threshold)
|
||||
|
||||
alert_id = f"{rule.name}_{rule.metric_name}"
|
||||
|
||||
if condition_met:
|
||||
# Check if we need to wait for duration
|
||||
if alert_id not in self.alert_triggers:
|
||||
self.alert_triggers[alert_id] = timestamp
|
||||
return
|
||||
|
||||
# Check if duration has passed
|
||||
trigger_time = self.alert_triggers[alert_id]
|
||||
if (timestamp - trigger_time).total_seconds() >= rule.duration_seconds:
|
||||
# Trigger alert if not already active
|
||||
if alert_id not in self.active_alerts:
|
||||
self._trigger_alert(rule, value, timestamp)
|
||||
else:
|
||||
# Condition not met - clear trigger time and resolve alert if active
|
||||
self.alert_triggers.pop(alert_id, None)
|
||||
if alert_id in self.active_alerts:
|
||||
self._resolve_alert(alert_id, timestamp)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking alert rule {rule.name}: {e}")
|
||||
|
||||
def _evaluate_condition(self, comparison: str, value: float, threshold: float) -> bool:
|
||||
"""Evaluate alert condition"""
|
||||
if comparison == 'gt':
|
||||
return value > threshold
|
||||
elif comparison == 'lt':
|
||||
return value < threshold
|
||||
elif comparison == 'eq':
|
||||
return abs(value - threshold) < 0.001
|
||||
elif comparison == 'ne':
|
||||
return abs(value - threshold) >= 0.001
|
||||
elif comparison == 'gte':
|
||||
return value >= threshold
|
||||
elif comparison == 'lte':
|
||||
return value <= threshold
|
||||
else:
|
||||
logger.warning(f"Unknown comparison operator: {comparison}")
|
||||
return False
|
||||
|
||||
def _trigger_alert(self, rule: AlertRule, value: float, timestamp: datetime) -> None:
|
||||
"""Trigger an alert"""
|
||||
try:
|
||||
alert_id = f"{rule.name}_{rule.metric_name}"
|
||||
|
||||
# Create alert
|
||||
alert = Alert(
|
||||
id=alert_id,
|
||||
name=rule.name,
|
||||
description=rule.description or f"{rule.metric_name} {rule.comparison} {rule.threshold}",
|
||||
severity=rule.severity,
|
||||
metric_name=rule.metric_name,
|
||||
threshold=rule.threshold,
|
||||
comparison=rule.comparison,
|
||||
duration_seconds=rule.duration_seconds,
|
||||
triggered_at=timestamp,
|
||||
metadata={
|
||||
'current_value': value,
|
||||
'rule_metadata': rule.metadata
|
||||
}
|
||||
)
|
||||
|
||||
# Check suppression rules
|
||||
if self._is_suppressed(alert):
|
||||
alert.status = AlertStatus.SUPPRESSED
|
||||
logger.info(f"Alert suppressed: {alert.name}")
|
||||
return
|
||||
|
||||
# Store alert
|
||||
self.active_alerts[alert_id] = alert
|
||||
self.alert_history.append(alert)
|
||||
self.alerts_triggered += 1
|
||||
|
||||
logger.warning(f"Alert triggered: {alert.name} - {alert.description}")
|
||||
|
||||
# Send notifications
|
||||
self._send_notifications(alert)
|
||||
|
||||
# Call callbacks
|
||||
for callback in self.alert_callbacks:
|
||||
try:
|
||||
callback(alert)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in alert callback: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error triggering alert: {e}")
|
||||
|
||||
def _resolve_alert(self, alert_id: str, timestamp: datetime) -> None:
|
||||
"""Resolve an alert"""
|
||||
try:
|
||||
if alert_id in self.active_alerts:
|
||||
alert = self.active_alerts[alert_id]
|
||||
alert.status = AlertStatus.RESOLVED
|
||||
alert.resolved_at = timestamp
|
||||
|
||||
# Move to history and remove from active
|
||||
self.alert_history.append(alert)
|
||||
del self.active_alerts[alert_id]
|
||||
self.alerts_resolved += 1
|
||||
|
||||
logger.info(f"Alert resolved: {alert.name}")
|
||||
|
||||
# Send resolution notifications
|
||||
self._send_resolution_notifications(alert)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error resolving alert {alert_id}: {e}")
|
||||
|
||||
def _is_suppressed(self, alert: Alert) -> bool:
|
||||
"""Check if alert should be suppressed"""
|
||||
for rule_name, rule in self.suppression_rules.items():
|
||||
try:
|
||||
# Check if suppression rule applies
|
||||
if self._matches_suppression_rule(alert, rule):
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking suppression rule {rule_name}: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def _matches_suppression_rule(self, alert: Alert, rule: Dict[str, Any]) -> bool:
|
||||
"""Check if alert matches suppression rule"""
|
||||
# Check alert name pattern
|
||||
if 'alert_pattern' in rule:
|
||||
import re
|
||||
if not re.match(rule['alert_pattern'], alert.name):
|
||||
return False
|
||||
|
||||
# Check severity
|
||||
if 'severity' in rule:
|
||||
if alert.severity.value not in rule['severity']:
|
||||
return False
|
||||
|
||||
# Check time window
|
||||
if 'time_window' in rule:
|
||||
start_time = datetime.fromisoformat(rule['time_window']['start'])
|
||||
end_time = datetime.fromisoformat(rule['time_window']['end'])
|
||||
current_time = get_current_timestamp()
|
||||
|
||||
if not (start_time <= current_time <= end_time):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _send_notifications(self, alert: Alert) -> None:
|
||||
"""Send notifications for an alert"""
|
||||
for channel_name, channel in self.notification_channels.items():
|
||||
try:
|
||||
if not channel.enabled:
|
||||
continue
|
||||
|
||||
# Check severity filter
|
||||
if channel.severity_filter and alert.severity not in channel.severity_filter:
|
||||
continue
|
||||
|
||||
# Send notification based on channel type
|
||||
if channel.type == 'email':
|
||||
self._send_email_notification(alert, channel)
|
||||
elif channel.type == 'webhook':
|
||||
self._send_webhook_notification(alert, channel)
|
||||
elif channel.type == 'slack':
|
||||
self._send_slack_notification(alert, channel)
|
||||
else:
|
||||
logger.warning(f"Unknown notification channel type: {channel.type}")
|
||||
|
||||
self.notifications_sent += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending notification via {channel_name}: {e}")
|
||||
|
||||
def _send_resolution_notifications(self, alert: Alert) -> None:
|
||||
"""Send resolution notifications"""
|
||||
for channel_name, channel in self.notification_channels.items():
|
||||
try:
|
||||
if not channel.enabled:
|
||||
continue
|
||||
|
||||
# Send resolution notification
|
||||
if channel.type == 'email':
|
||||
self._send_email_resolution(alert, channel)
|
||||
elif channel.type == 'webhook':
|
||||
self._send_webhook_resolution(alert, channel)
|
||||
elif channel.type == 'slack':
|
||||
self._send_slack_resolution(alert, channel)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending resolution notification via {channel_name}: {e}")
|
||||
|
||||
def _send_email_notification(self, alert: Alert, channel: NotificationChannel) -> None:
|
||||
"""Send email notification"""
|
||||
try:
|
||||
config = channel.config
|
||||
|
||||
# Create message
|
||||
msg = MIMEMultipart()
|
||||
msg['From'] = config['from_email']
|
||||
msg['To'] = ', '.join(config['to_emails'])
|
||||
msg['Subject'] = f"[{alert.severity.value.upper()}] {alert.name}"
|
||||
|
||||
# Create body
|
||||
body = f"""
|
||||
Alert: {alert.name}
|
||||
Severity: {alert.severity.value.upper()}
|
||||
Description: {alert.description}
|
||||
Metric: {alert.metric_name}
|
||||
Current Value: {alert.metadata.get('current_value', 'N/A')}
|
||||
Threshold: {alert.threshold}
|
||||
Triggered At: {alert.triggered_at.isoformat() if alert.triggered_at else 'N/A'}
|
||||
|
||||
Alert ID: {alert.id}
|
||||
"""
|
||||
|
||||
msg.attach(MIMEText(body, 'plain'))
|
||||
|
||||
# Send email
|
||||
with smtplib.SMTP(config['smtp_server'], config['smtp_port']) as server:
|
||||
if config.get('use_tls', True):
|
||||
server.starttls()
|
||||
if 'username' in config and 'password' in config:
|
||||
server.login(config['username'], config['password'])
|
||||
server.send_message(msg)
|
||||
|
||||
logger.info(f"Email notification sent for alert: {alert.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending email notification: {e}")
|
||||
|
||||
def _send_webhook_notification(self, alert: Alert, channel: NotificationChannel) -> None:
|
||||
"""Send webhook notification"""
|
||||
try:
|
||||
import requests
|
||||
|
||||
config = channel.config
|
||||
payload = {
|
||||
'alert': alert.to_dict(),
|
||||
'type': 'alert_triggered'
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
config['url'],
|
||||
json=payload,
|
||||
headers=config.get('headers', {}),
|
||||
timeout=config.get('timeout', 10)
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
logger.info(f"Webhook notification sent for alert: {alert.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending webhook notification: {e}")
|
||||
|
||||
def _send_slack_notification(self, alert: Alert, channel: NotificationChannel) -> None:
|
||||
"""Send Slack notification"""
|
||||
try:
|
||||
import requests
|
||||
|
||||
config = channel.config
|
||||
|
||||
# Create Slack message
|
||||
color = {
|
||||
AlertSeverity.INFO: 'good',
|
||||
AlertSeverity.WARNING: 'warning',
|
||||
AlertSeverity.CRITICAL: 'danger'
|
||||
}.get(alert.severity, 'warning')
|
||||
|
||||
payload = {
|
||||
'channel': config['channel'],
|
||||
'username': config.get('username', 'AlertBot'),
|
||||
'attachments': [{
|
||||
'color': color,
|
||||
'title': f"{alert.severity.value.upper()}: {alert.name}",
|
||||
'text': alert.description,
|
||||
'fields': [
|
||||
{'title': 'Metric', 'value': alert.metric_name, 'short': True},
|
||||
{'title': 'Current Value', 'value': str(alert.metadata.get('current_value', 'N/A')), 'short': True},
|
||||
{'title': 'Threshold', 'value': str(alert.threshold), 'short': True},
|
||||
{'title': 'Triggered At', 'value': alert.triggered_at.isoformat() if alert.triggered_at else 'N/A', 'short': True}
|
||||
],
|
||||
'footer': f"Alert ID: {alert.id}"
|
||||
}]
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
config['webhook_url'],
|
||||
json=payload,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
logger.info(f"Slack notification sent for alert: {alert.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending Slack notification: {e}")
|
||||
|
||||
def _send_email_resolution(self, alert: Alert, channel: NotificationChannel) -> None:
|
||||
"""Send email resolution notification"""
|
||||
try:
|
||||
config = channel.config
|
||||
|
||||
# Create message
|
||||
msg = MIMEMultipart()
|
||||
msg['From'] = config['from_email']
|
||||
msg['To'] = ', '.join(config['to_emails'])
|
||||
msg['Subject'] = f"[RESOLVED] {alert.name}"
|
||||
|
||||
# Create body
|
||||
duration = ""
|
||||
if alert.triggered_at and alert.resolved_at:
|
||||
duration = str(alert.resolved_at - alert.triggered_at)
|
||||
|
||||
body = f"""
|
||||
Alert RESOLVED: {alert.name}
|
||||
Severity: {alert.severity.value.upper()}
|
||||
Description: {alert.description}
|
||||
Metric: {alert.metric_name}
|
||||
Threshold: {alert.threshold}
|
||||
Triggered At: {alert.triggered_at.isoformat() if alert.triggered_at else 'N/A'}
|
||||
Resolved At: {alert.resolved_at.isoformat() if alert.resolved_at else 'N/A'}
|
||||
Duration: {duration}
|
||||
|
||||
Alert ID: {alert.id}
|
||||
"""
|
||||
|
||||
msg.attach(MIMEText(body, 'plain'))
|
||||
|
||||
# Send email
|
||||
with smtplib.SMTP(config['smtp_server'], config['smtp_port']) as server:
|
||||
if config.get('use_tls', True):
|
||||
server.starttls()
|
||||
if 'username' in config and 'password' in config:
|
||||
server.login(config['username'], config['password'])
|
||||
server.send_message(msg)
|
||||
|
||||
logger.info(f"Email resolution notification sent for alert: {alert.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending email resolution notification: {e}")
|
||||
|
||||
def _send_webhook_resolution(self, alert: Alert, channel: NotificationChannel) -> None:
|
||||
"""Send webhook resolution notification"""
|
||||
try:
|
||||
import requests
|
||||
|
||||
config = channel.config
|
||||
payload = {
|
||||
'alert': alert.to_dict(),
|
||||
'type': 'alert_resolved'
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
config['url'],
|
||||
json=payload,
|
||||
headers=config.get('headers', {}),
|
||||
timeout=config.get('timeout', 10)
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
logger.info(f"Webhook resolution notification sent for alert: {alert.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending webhook resolution notification: {e}")
|
||||
|
||||
def _send_slack_resolution(self, alert: Alert, channel: NotificationChannel) -> None:
|
||||
"""Send Slack resolution notification"""
|
||||
try:
|
||||
import requests
|
||||
|
||||
config = channel.config
|
||||
|
||||
duration = ""
|
||||
if alert.triggered_at and alert.resolved_at:
|
||||
duration = str(alert.resolved_at - alert.triggered_at)
|
||||
|
||||
payload = {
|
||||
'channel': config['channel'],
|
||||
'username': config.get('username', 'AlertBot'),
|
||||
'attachments': [{
|
||||
'color': 'good',
|
||||
'title': f"RESOLVED: {alert.name}",
|
||||
'text': f"Alert has been resolved: {alert.description}",
|
||||
'fields': [
|
||||
{'title': 'Duration', 'value': duration, 'short': True},
|
||||
{'title': 'Resolved At', 'value': alert.resolved_at.isoformat() if alert.resolved_at else 'N/A', 'short': True}
|
||||
],
|
||||
'footer': f"Alert ID: {alert.id}"
|
||||
}]
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
config['webhook_url'],
|
||||
json=payload,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
logger.info(f"Slack resolution notification sent for alert: {alert.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending Slack resolution notification: {e}")
|
||||
|
||||
def acknowledge_alert(self, alert_id: str, acknowledged_by: str) -> bool:
|
||||
"""Acknowledge an alert"""
|
||||
if alert_id in self.active_alerts:
|
||||
alert = self.active_alerts[alert_id]
|
||||
alert.status = AlertStatus.ACKNOWLEDGED
|
||||
alert.acknowledged_at = get_current_timestamp()
|
||||
alert.acknowledged_by = acknowledged_by
|
||||
|
||||
logger.info(f"Alert acknowledged by {acknowledged_by}: {alert.name}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def suppress_alert(self, alert_id: str) -> bool:
|
||||
"""Suppress an alert"""
|
||||
if alert_id in self.active_alerts:
|
||||
alert = self.active_alerts[alert_id]
|
||||
alert.status = AlertStatus.SUPPRESSED
|
||||
|
||||
logger.info(f"Alert suppressed: {alert.name}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def add_suppression_rule(self, name: str, rule: Dict[str, Any]) -> None:
|
||||
"""Add alert suppression rule"""
|
||||
self.suppression_rules[name] = rule
|
||||
logger.info(f"Added suppression rule: {name}")
|
||||
|
||||
def remove_suppression_rule(self, name: str) -> None:
|
||||
"""Remove alert suppression rule"""
|
||||
if name in self.suppression_rules:
|
||||
del self.suppression_rules[name]
|
||||
logger.info(f"Removed suppression rule: {name}")
|
||||
|
||||
def get_active_alerts(self, severity: AlertSeverity = None) -> List[Alert]:
|
||||
"""Get active alerts, optionally filtered by severity"""
|
||||
alerts = list(self.active_alerts.values())
|
||||
|
||||
if severity:
|
||||
alerts = [alert for alert in alerts if alert.severity == severity]
|
||||
|
||||
return sorted(alerts, key=lambda x: x.triggered_at or datetime.min, reverse=True)
|
||||
|
||||
def get_alert_history(self, limit: int = 100, severity: AlertSeverity = None) -> List[Alert]:
|
||||
"""Get alert history"""
|
||||
alerts = list(self.alert_history)
|
||||
|
||||
if severity:
|
||||
alerts = [alert for alert in alerts if alert.severity == severity]
|
||||
|
||||
return sorted(alerts, key=lambda x: x.triggered_at or datetime.min, reverse=True)[:limit]
|
||||
|
||||
def get_alert_summary(self) -> Dict[str, Any]:
|
||||
"""Get alert summary statistics"""
|
||||
active_by_severity = defaultdict(int)
|
||||
for alert in self.active_alerts.values():
|
||||
active_by_severity[alert.severity.value] += 1
|
||||
|
||||
return {
|
||||
'active_alerts': len(self.active_alerts),
|
||||
'active_by_severity': dict(active_by_severity),
|
||||
'total_triggered': self.alerts_triggered,
|
||||
'total_resolved': self.alerts_resolved,
|
||||
'notifications_sent': self.notifications_sent,
|
||||
'alert_rules': len(self.alert_rules),
|
||||
'notification_channels': len(self.notification_channels),
|
||||
'suppression_rules': len(self.suppression_rules)
|
||||
}
|
||||
|
||||
def register_callback(self, callback: Callable[[Alert], None]) -> None:
|
||||
"""Register alert callback"""
|
||||
self.alert_callbacks.append(callback)
|
||||
logger.info(f"Registered alert callback: {callback.__name__}")
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get alert manager statistics"""
|
||||
return {
|
||||
'alert_rules': len(self.alert_rules),
|
||||
'active_alerts': len(self.active_alerts),
|
||||
'alert_history_count': len(self.alert_history),
|
||||
'notification_channels': len(self.notification_channels),
|
||||
'suppression_rules': len(self.suppression_rules),
|
||||
'alerts_triggered': self.alerts_triggered,
|
||||
'alerts_resolved': self.alerts_resolved,
|
||||
'notifications_sent': self.notifications_sent,
|
||||
'registered_callbacks': len(self.alert_callbacks)
|
||||
}
|
||||
|
||||
|
||||
# Global alert manager instance
|
||||
alert_manager = AlertManager()
|
Reference in New Issue
Block a user