CANopen网络管理实战:如何用NMT命令精准控制PDO的开关(以STM32H743为例)
2026/5/12 0:26:20
AlertManager处理Prometheus发送的告警,支持分组、抑制、静默等高级功能。
package alert type Rule struct { Name string `json:"name"` Expr string `json:"expr"` For string `json:"for"` Labels map[string]string `json:"labels"` Annotations map[string]string `json:"annotations"` } type RuleGroup struct { Name string `json:"name"` Rules []Rule `json:"rules"` } func NewRule(name, expr, forDuration string) *Rule { return &Rule{ Name: name, Expr: expr, For: forDuration, Labels: make(map[string]string), Annotations: make(map[string]string), } } func (r *Rule) WithLabel(key, value string) *Rule { r.Labels[key] = value return r } func (r *Rule) WithAnnotation(key, value string) *Rule { r.Annotations[key] = value return r } const ( HighCPUUsage = `avg(rate(container_cpu_usage_seconds_total{name=~".+"}[5m])) by (name) > 0.8` HighMemoryUsage = `avg(container_memory_usage_bytes{name=~".+"}) by (name) / avg(container_spec_memory_limit_bytes{name=~".+"}) by (name) > 0.8` HighErrorRate = `rate(http_requests_total{status=~"5.."}[5m]) > 0.05` HighLatency = `histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1` ) func DefaultRules() []*Rule { return []*Rule{ NewRule("HighCPUUsage", HighCPUUsage, "5m"). WithLabel("severity", "warning"). WithAnnotation("summary", "High CPU usage detected"), NewRule("HighMemoryUsage", HighMemoryUsage, "5m"). WithLabel("severity", "warning"). WithAnnotation("summary", "High memory usage detected"), NewRule("HighErrorRate", HighErrorRate, "5m"). WithLabel("severity", "critical"). WithAnnotation("summary", "High error rate detected"), NewRule("HighLatency", HighLatency, "5m"). WithLabel("severity", "warning"). WithAnnotation("summary", "High latency detected"), } }type AlertWebhook struct { receiver string status string alerts []Alert } type Alert struct { Status string `json:"status"` Labels map[string]string `json:"labels"` Annotations map[string]string `json:"annotations"` StartsAt time.Time `json:"startsAt"` EndsAt time.Time `json:"endsAt"` } func NewAlertWebhook(payload []byte) (*AlertWebhook, error) { var wh AlertWebhook err := json.Unmarshal(payload, &wh) if err != nil { return nil, err } return &wh, nil } func (wh *AlertWebhook) Handle() error { switch wh.status { case "firing": return wh.handleFiring() case "resolved": return wh.handleResolved() } return nil } func (wh *AlertWebhook) handleFiring() error { for _, alert := range wh.alerts { fmt.Printf("Alert Firing: %s - %s\n", alert.Labels["alertname"], alert.Annotations["summary"]) } return nil } func (wh *AlertWebhook) handleResolved() error { for _, alert := range wh.alerts { fmt.Printf("Alert Resolved: %s - %s\n", alert.Labels["alertname"], alert.Annotations["summary"]) } return nil }type AlertHandler struct { notifiers []Notifier } type Notifier interface { Notify(alert *Alert) error } type EmailNotifier struct { smtpHost string smtpPort int from string to []string } func (n *EmailNotifier) Notify(alert *Alert) error { subject := fmt.Sprintf("[%s] %s", alert.Labels["severity"], alert.Labels["alertname"]) body := fmt.Sprintf("Alert: %s\nSummary: %s\n", alert.Labels["alertname"], alert.Annotations["summary"]) return sendEmail(n.smtpHost, n.smtpPort, n.from, n.to, subject, body) } type SlackNotifier struct { webhookURL string channel string } func (n *SlackNotifier) Notify(alert *Alert) error { payload := map[string]interface{}{ "channel": n.channel, "text": fmt.Sprintf("[%s] %s: %s", alert.Labels["severity"], alert.Labels["alertname"], alert.Annotations["summary"]), } return postJSON(n.webhookURL, payload) } func NewAlertHandler() *AlertHandler { return &AlertHandler{ notifiers: make([]Notifier, 0), } } func (h *AlertHandler) AddNotifier(notifier Notifier) { h.notifiers = append(h.notifiers, notifier) } func (h *AlertHandler) HandleWebhook(payload []byte) error { wh, err := NewAlertWebhook(payload) if err != nil { return err } for _, alert := range wh.alerts { for _, notifier := range h.notifiers { if err := notifier.Notify(&alert); err != nil { return err } } } return nil }AlertManager提供了强大的告警能力,通过合理的告警规则设计和多渠道通知可以实现高效的故障响应。