Go语言AlertManager告警:实时告警系统
2026/5/11 23:25:02 网站建设 项目流程

Go语言AlertManager告警:实时告警系统

1. AlertManager概述

AlertManager处理Prometheus发送的告警,支持分组、抑制、静默等高级功能。

2. 告警规则

package alert type Rule struct { Name string `json:"name"` Expr string `json:"expr"` For string `json:"for"` Labels map[string]string `json:"labels"` Annotations map[string]string `json:"annotations"` } type RuleGroup struct { Name string `json:"name"` Rules []Rule `json:"rules"` } func NewRule(name, expr, forDuration string) *Rule { return &Rule{ Name: name, Expr: expr, For: forDuration, Labels: make(map[string]string), Annotations: make(map[string]string), } } func (r *Rule) WithLabel(key, value string) *Rule { r.Labels[key] = value return r } func (r *Rule) WithAnnotation(key, value string) *Rule { r.Annotations[key] = value return r } const ( HighCPUUsage = `avg(rate(container_cpu_usage_seconds_total{name=~".+"}[5m])) by (name) > 0.8` HighMemoryUsage = `avg(container_memory_usage_bytes{name=~".+"}) by (name) / avg(container_spec_memory_limit_bytes{name=~".+"}) by (name) > 0.8` HighErrorRate = `rate(http_requests_total{status=~"5.."}[5m]) > 0.05` HighLatency = `histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1` ) func DefaultRules() []*Rule { return []*Rule{ NewRule("HighCPUUsage", HighCPUUsage, "5m"). WithLabel("severity", "warning"). WithAnnotation("summary", "High CPU usage detected"), NewRule("HighMemoryUsage", HighMemoryUsage, "5m"). WithLabel("severity", "warning"). WithAnnotation("summary", "High memory usage detected"), NewRule("HighErrorRate", HighErrorRate, "5m"). WithLabel("severity", "critical"). WithAnnotation("summary", "High error rate detected"), NewRule("HighLatency", HighLatency, "5m"). WithLabel("severity", "warning"). WithAnnotation("summary", "High latency detected"), } }

3. Webhook通知

type AlertWebhook struct { receiver string status string alerts []Alert } type Alert struct { Status string `json:"status"` Labels map[string]string `json:"labels"` Annotations map[string]string `json:"annotations"` StartsAt time.Time `json:"startsAt"` EndsAt time.Time `json:"endsAt"` } func NewAlertWebhook(payload []byte) (*AlertWebhook, error) { var wh AlertWebhook err := json.Unmarshal(payload, &wh) if err != nil { return nil, err } return &wh, nil } func (wh *AlertWebhook) Handle() error { switch wh.status { case "firing": return wh.handleFiring() case "resolved": return wh.handleResolved() } return nil } func (wh *AlertWebhook) handleFiring() error { for _, alert := range wh.alerts { fmt.Printf("Alert Firing: %s - %s\n", alert.Labels["alertname"], alert.Annotations["summary"]) } return nil } func (wh *AlertWebhook) handleResolved() error { for _, alert := range wh.alerts { fmt.Printf("Alert Resolved: %s - %s\n", alert.Labels["alertname"], alert.Annotations["summary"]) } return nil }

4. 告警处理器

type AlertHandler struct { notifiers []Notifier } type Notifier interface { Notify(alert *Alert) error } type EmailNotifier struct { smtpHost string smtpPort int from string to []string } func (n *EmailNotifier) Notify(alert *Alert) error { subject := fmt.Sprintf("[%s] %s", alert.Labels["severity"], alert.Labels["alertname"]) body := fmt.Sprintf("Alert: %s\nSummary: %s\n", alert.Labels["alertname"], alert.Annotations["summary"]) return sendEmail(n.smtpHost, n.smtpPort, n.from, n.to, subject, body) } type SlackNotifier struct { webhookURL string channel string } func (n *SlackNotifier) Notify(alert *Alert) error { payload := map[string]interface{}{ "channel": n.channel, "text": fmt.Sprintf("[%s] %s: %s", alert.Labels["severity"], alert.Labels["alertname"], alert.Annotations["summary"]), } return postJSON(n.webhookURL, payload) } func NewAlertHandler() *AlertHandler { return &AlertHandler{ notifiers: make([]Notifier, 0), } } func (h *AlertHandler) AddNotifier(notifier Notifier) { h.notifiers = append(h.notifiers, notifier) } func (h *AlertHandler) HandleWebhook(payload []byte) error { wh, err := NewAlertWebhook(payload) if err != nil { return err } for _, alert := range wh.alerts { for _, notifier := range h.notifiers { if err := notifier.Notify(&alert); err != nil { return err } } } return nil }

5. 总结

AlertManager提供了强大的告警能力,通过合理的告警规则设计和多渠道通知可以实现高效的故障响应。

需要专业的网站建设服务?

联系我们获取免费的网站建设咨询和方案报价,让我们帮助您实现业务目标

立即咨询