Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- apiVersion: monitoring.coreos.com/v1
- kind: PrometheusRule
- metadata:
- labels:
- release: monitoring
- name: daniel-custom-rules-prod
- namespace: monitoring
- spec:
- groups:
- - name: "pod-error-rule"
- rules:
- - alert: PodErrorAlert
- for: 3m
- expr: count(kube_pod_container_status_terminated_reason{reason='Error'} > 0) by (namespace, pod)
- labels:
- severity: critical
- annotations:
- summary: Pod has been in error state for more than 10 minutes.
- message: "pod {{ $labels.pod }} in the {{ $labels.namespace }} namespace has been in error state for more than 10m"
- - name: "pod-pending-rule"
- rules:
- - alert: PodPendingFor30mAlert
- for: 2m
- expr: count(kube_pod_status_phase{phase="Pending"} > 0) by (namespace, pod)
- labels:
- severity: critical
- annotations:
- summary: Pod has been pending for more than 30 minutes.
- message: "pod {{ $labels.pod }} in the {{ $labels.namespace }} namespace has been in pending state for more than 30m"
Add Comment
Please, Sign In to add comment