Skip to content

Commit

Permalink
Add FailedEvictedPods alert
Browse files Browse the repository at this point in the history
  • Loading branch information
wirwolf committed Feb 13, 2024
1 parent 5c73505 commit ee5b49f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
4 changes: 2 additions & 2 deletions charts/kube-monitoring/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ apiVersion: v2
name: kube-monitoring
description: More rules for base kubernetes metrics
type: application
version: 0.2.1
appVersion: 0.2.1
version: 0.3.0
appVersion: 0.3.0
icon: https://raw.githubusercontent.com/SomeBlackMagic/helm-charts/master/charts/kube-monitoring/kube-monitoring.png
keywords:
- kube
Expand Down
25 changes: 25 additions & 0 deletions charts/kube-monitoring/templates/prometheus/prometheusrule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,31 @@ spec:
severity: "{{ $.Values.prometheus.rule.alerts.containerIsOOMKilled.severity }}"
{{- end }}

{{- if $.Values.prometheus.rule.alerts.failedEvictedPods.enabled }}
#https://stackoverflow.com/a/69340335
- alert: FailedEvictedPods
annotations:
summary: {{`Failed Evicted pod: {{ $labels.pod }}`}}
description: {{`Pod {{ $labels.namespace }}/{{ $labels.pod }} has been Evicted. Please check reason and remove evicted pod`}}
expr: sum by(namespace, pod) (kube_pod_status_phase{phase="Failed"} > 0 and on(namespace, pod) kube_pod_status_reason{reason="Evicted"} > 0) > 0
for: 10m
labels:
severity: "{{ $.Values.prometheus.rule.alerts.failedEvictedPods.severity }}"
{{- end }}

{{- if $.Values.prometheus.rule.alerts.tooManyEvictedPods.enabled }}
#https://stackoverflow.com/a/69340335
- alert: TooManyEvictedPodsOnNamespace
annotations:
summary: {{`Too many Failed Evicted Pods in {{ $labels.namespace }}`}}
description: {{`Too many Failed Evicted Pods: {{ $value }} on namespace {{ $labels.namespace }}`}}
expr: sum by(namespace) (kube_pod_status_reason{reason="Evicted"}) >= {{ $.Values.prometheus.rule.alerts.tooManyEvictedPods.threshold }}
labels:
severity: "{{ $.Values.prometheus.rule.alerts.tooManyEvictedPods.severity }}"
{{- end }}



{{- with $.Values.prometheus.rule.additionalAlerts }}
{{ . | nindent 4 }}
{{- end }}
Expand Down
9 changes: 9 additions & 0 deletions charts/kube-monitoring/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ prometheus:
containerIsOOMKilled:
enabled: true
severity: critical
failedEvictedPods:
enabled: true
severity: warning
tooManyEvictedPods:
enabled: true
severity: warning
threshold: 5



additionalAlerts: []
# - alert: CertManagerCertificateReadyStatus
Expand Down

0 comments on commit ee5b49f

Please # to comment.