Skip to content

Commit 7330dde

Browse files
committed
Add option to include exclude alerts via their labels
1 parent e64c05a commit 7330dde

4 files changed

Lines changed: 134 additions & 19 deletions

File tree

README.md

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,16 +153,20 @@ Examples:
153153
| total=2 firing=1 pending=0 inactive=1
154154
155155
Flags:
156-
--exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex.
157-
-h, --help help for alert
158-
-n, --name strings The name of one or more specific alerts to check.
159-
This parameter can be repeated e.G.: '--name alert1 --name alert2'
160-
If no name is given, all alerts will be evaluated
161-
-g, --group strings The name of one or more specific groups to check.
162-
This parameter can be repeated e.G.: '--group group1 --group group2'
163-
If no group is given, all groups will be scanned for alerts
164-
-T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK")
165-
-P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed
156+
--exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex.
157+
--exclude-label stringArray The label of one or more specific alerts to exclude.
158+
This parameter can be repeated e.g.: '--exclude-label prio=high --exclude-label another=example'
159+
-g, --group strings The name of one or more specific groups to check for alerts.
160+
This parameter can be repeated e.g.: '--group group1 --group group2'
161+
If no group is given, all groups will be scanned for alerts
162+
-h, --help help for alert
163+
--include-label stringArray The label of one or more specific alerts to include.
164+
This parameter can be repeated e.g.: '--include-label prio=high --include-label another=example'
165+
-n, --name strings The name of one or more specific alerts to check.
166+
This parameter can be repeated e.g.: '--name alert1 --name alert2'
167+
If no name is given, all alerts will be evaluated
168+
-T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK")
169+
-P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed
166170
```
167171
168172
#### Checking all defined alerts

cmd/alert.go

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ import (
1111
"github.com/NETWAYS/go-check"
1212
"github.com/NETWAYS/go-check/perfdata"
1313
"github.com/NETWAYS/go-check/result"
14+
"github.com/prometheus/common/model"
1415
"github.com/spf13/cobra"
1516
)
1617

1718
type AlertConfig struct {
1819
AlertName []string
1920
Group []string
2021
ExcludeAlerts []string
22+
ExcludeLabels []string
23+
IncludeLabels []string
2124
ProblemsOnly bool
2225
NoAlertsState string
2326
}
@@ -101,31 +104,47 @@ inactive = 0`,
101104

102105
var overall result.Overall
103106

104-
for _, rl := range rules {
107+
includeLabels := sliceToMap(cliAlertConfig.IncludeLabels)
108+
excludeLabels := sliceToMap(cliAlertConfig.ExcludeLabels)
105109

110+
for _, rl := range rules {
106111
// If it's not the Alert we're looking for, Skip!
107112
if cliAlertConfig.AlertName != nil {
108113
if !slices.Contains(cliAlertConfig.AlertName, rl.AlertingRule.Name) {
109114
continue
110115
}
111116
}
112117

118+
labelsMatchedInclude := matchesLabel(rl.AlertingRule.Labels, includeLabels)
119+
120+
if len(cliAlertConfig.IncludeLabels) > 0 && !labelsMatchedInclude {
121+
// If the alert labels don't match here we can skip it.
122+
continue
123+
}
124+
113125
// Skip inactive alerts if flag is set
114126
if len(rl.AlertingRule.Alerts) == 0 && cliAlertConfig.ProblemsOnly {
115127
continue
116128
}
117129

118-
alertMatched, regexErr := matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts)
130+
alertMatchedExclude, regexErr := matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts)
119131

120132
if regexErr != nil {
121133
check.ExitRaw(check.Unknown, "Invalid regular expression provided:", regexErr.Error())
122134
}
123135

124-
if alertMatched {
136+
if alertMatchedExclude {
125137
// If the alert matches a regex from the list we can skip it.
126138
continue
127139
}
128140

141+
labelsMatchedExclude := matchesLabel(rl.AlertingRule.Labels, excludeLabels)
142+
143+
if len(cliAlertConfig.ExcludeLabels) > 0 && labelsMatchedExclude {
144+
// If the alert labels matches here we can skip it.
145+
continue
146+
}
147+
129148
// Handle Inactive Alerts
130149
if len(rl.AlertingRule.Alerts) == 0 {
131150
// Counting states for perfdata
@@ -208,18 +227,27 @@ func init() {
208227

209228
fs.StringVarP(&cliAlertConfig.NoAlertsState, "no-alerts-state", "T", "OK", "State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK")
210229

211-
fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{}, "Alerts to ignore. Can be used multiple times and supports regex.")
230+
fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{},
231+
"Alerts to ignore. Can be used multiple times and supports regex.")
212232

213233
fs.StringSliceVarP(&cliAlertConfig.AlertName, "name", "n", nil,
214234
"The name of one or more specific alerts to check."+
215-
"\nThis parameter can be repeated e.G.: '--name alert1 --name alert2'"+
235+
"\nThis parameter can be repeated e.g.: '--name alert1 --name alert2'"+
216236
"\nIf no name is given, all alerts will be evaluated")
217237

218238
fs.StringSliceVarP(&cliAlertConfig.Group, "group", "g", nil,
219239
"The name of one or more specific groups to check for alerts."+
220-
"\nThis parameter can be repeated e.G.: '--group group1 --group group2'"+
240+
"\nThis parameter can be repeated e.g.: '--group group1 --group group2'"+
221241
"\nIf no group is given, all groups will be scanned for alerts")
222242

243+
fs.StringArrayVar(&cliAlertConfig.IncludeLabels, "include-label", []string{},
244+
"The label of one or more specific alerts to include."+
245+
"\nThis parameter can be repeated e.g.: '--include-label prio=high --include-label another=example'")
246+
247+
fs.StringArrayVar(&cliAlertConfig.ExcludeLabels, "exclude-label", []string{},
248+
"The label of one or more specific alerts to exclude."+
249+
"\nThis parameter can be repeated e.g.: '--exclude-label prio=high --exclude-label another=example'")
250+
223251
fs.BoolVarP(&cliAlertConfig.ProblemsOnly, "problems", "P", false,
224252
"Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed")
225253
}
@@ -257,3 +285,29 @@ func matches(input string, regexToExclude []string) (bool, error) {
257285

258286
return false, nil
259287
}
288+
289+
// Matches a list of labels against a list of labels
290+
func matchesLabel(labels model.LabelSet, labelsToMatch map[string]string) bool {
291+
for k, v := range labels {
292+
if dv, ok := labelsToMatch[string(k)]; ok && dv == string(v) {
293+
return true
294+
}
295+
}
296+
297+
return false
298+
}
299+
300+
func sliceToMap(labels []string) map[string]string {
301+
m := make(map[string]string, len(labels))
302+
303+
for _, s := range labels {
304+
kv := strings.SplitN(s, "=", 2)
305+
if len(kv) != 2 {
306+
continue
307+
}
308+
309+
m[kv[0]] = kv[1]
310+
}
311+
312+
return m
313+
}

cmd/alert_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,60 @@ exit status 2
234234
args: []string{"run", "../main.go", "alert", "--name", "InactiveAlert"},
235235
expected: "[OK] - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive\n\\_ [OK] [InactiveAlert] is inactive\n|total=1 firing=0 pending=0 inactive=1\n\n",
236236
},
237+
{
238+
name: "alert-include-label",
239+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
240+
w.WriteHeader(http.StatusOK)
241+
w.Write(loadTestdata(alertTestDataSet1))
242+
})),
243+
args: []string{"run", "../main.go", "alert", "--include-label", "severity=critical"},
244+
expected: `[CRITICAL] - 2 Alerts: 1 Firing - 0 Pending - 1 Inactive
245+
\_ [OK] [HostOutOfMemory] is inactive
246+
\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 - {"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"}
247+
|total=2 firing=1 pending=0 inactive=1
248+
249+
exit status 2
250+
`,
251+
},
252+
{
253+
name: "alert-exclude-label",
254+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
255+
w.WriteHeader(http.StatusOK)
256+
w.Write(loadTestdata(alertTestDataSet1))
257+
})),
258+
args: []string{"run", "../main.go", "alert", "--exclude-label", "severity=critical"},
259+
expected: `[WARNING] - 1 Alerts: 0 Firing - 1 Pending - 0 Inactive
260+
\_ [WARNING] [SqlAccessDeniedRate] - Job: [mysql] on Instance: [localhost] is pending - value: 0.40 - {"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"}
261+
|total=1 firing=0 pending=1 inactive=0
262+
263+
exit status 1
264+
`,
265+
},
266+
{
267+
name: "alert-include-label-multiple",
268+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
269+
w.WriteHeader(http.StatusOK)
270+
w.Write(loadTestdata(alertTestDataSet1))
271+
})),
272+
args: []string{"run", "../main.go", "alert", "--include-label", "team=database", "--include-label", "severity=critical"},
273+
expected: `[CRITICAL] - 3 Alerts: 1 Firing - 1 Pending - 1 Inactive
274+
\_ [OK] [HostOutOfMemory] is inactive
275+
\_ [WARNING] [SqlAccessDeniedRate] - Job: [mysql] on Instance: [localhost] is pending - value: 0.40 - {"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"}
276+
\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 - {"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"}
277+
|total=3 firing=1 pending=1 inactive=1
278+
279+
exit status 2
280+
`,
281+
},
282+
{
283+
name: "alert-exclude-label-multiple",
284+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
285+
w.WriteHeader(http.StatusOK)
286+
w.Write(loadTestdata(alertTestDataSet1))
287+
})),
288+
args: []string{"run", "../main.go", "alert", "--exclude-label", "team=database", "--exclude-label", "severity=critical"},
289+
expected: "[OK] - 0 Alerts: 0 Firing - 0 Pending - 0 Inactive\n\\_ [OK] No alerts retrieved\n|total=0 firing=0 pending=0 inactive=0\n\n",
290+
},
237291
}
238292

239293
for _, test := range tests {

testdata/unittest/alertDataset1.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
"query": "up",
1313
"duration": 120,
1414
"labels": {
15-
"severity": "critical"
15+
"severity": "critical",
16+
"team": "network"
1617
},
1718
"annotations": {
1819
"description": "Foo",
@@ -40,7 +41,8 @@
4041
"query": "mysql",
4142
"duration": 17280000,
4243
"labels": {
43-
"severity": "warning"
44+
"severity": "warning",
45+
"team": "database"
4446
},
4547
"annotations": {
4648
"description": "MySQL",
@@ -84,7 +86,8 @@
8486
"query": "SSL",
8587
"duration": 0,
8688
"labels": {
87-
"severity": "critical"
89+
"severity": "critical",
90+
"team": "network"
8891
},
8992
"annotations": {
9093
"description": "TLS",

0 commit comments

Comments
 (0)