Guest User

traefik-prometheus-rules

a guest
Oct 25th, 2020
96
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. groups:
  2. - name: traefik
  3.   rules:
  4.   - alert: TraefikBackendDown
  5.     expr: count(traefik_backend_server_up) by (backend) == 0
  6.     for: 5m
  7.     labels:
  8.       severity: critical
  9.     annotations:
  10.       summary: Traefik backend down (instance {{ $labels.instance }})
  11.       description: All Traefik backends are down
  12.  
  13.   - alert: TraefikHighHttp4xxErrorRateBackend
  14.     expr: sum(rate(traefik_backend_requests_total{code=~"4.*"}[3m])) by (backend) / sum(rate(traefik_backend_requests_total[3m])) by (backend) * 100 > 5
  15.     for: 5m
  16.     labels:
  17.       severity: critical
  18.     annotations:
  19.       summary: Traefik high HTTP 4xx error rate backend (instance {{ $labels.instance }})
  20.       description: Traefik backend 4xx error rate is above 5%
  21.  
  22.   - alert: TraefikHighHttp5xxErrorRateBackend
  23.     expr: sum(rate(traefik_backend_requests_total{code=~"5.*"}[3m])) by (backend) / sum(rate(traefik_backend_requests_total[3m])) by (backend) * 100 > 5
  24.     for: 5m
  25.     labels:
  26.       severity: critical
  27.     annotations:
  28.       summary: Traefik high HTTP 5xx error rate backend (instance {{ $labels.instance }})
  29.       description: Traefik backend 5xx error rate is above 5%
  30.  
  31.   - alert: TraefikServiceDown
  32.     expr: count(traefik_service_server_up) by (service) == 0
  33.     for: 5m
  34.     labels:
  35.       severity: critical
  36.     annotations:
  37.       summary: Traefik service down (instance {{ $labels.instance }})
  38.       description: All Traefik services are down
  39.  
  40.   - alert: TraefikHighHttp4xxErrorRateService
  41.     expr: sum(rate(traefik_service_requests_total{code=~"4.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5
  42.     for: 5m
  43.     labels:
  44.       severity: critical
  45.     annotations:
  46.       summary: Traefik high HTTP 4xx error rate service (instance {{ $labels.instance }})
  47.       description: Traefik service 4xx error rate is above 5%
  48.  
  49.   - alert: TraefikHighHttp5xxErrorRateService
  50.     expr: sum(rate(traefik_service_requests_total{code=~"5.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5
  51.     for: 5m
  52.     labels:
  53.       severity: critical
  54.     annotations:
  55.       summary: Traefik high HTTP 5xx error rate service (instance {{ $labels.instance }})
  56.       description: Traefik service 5xx error rate is above 5%
RAW Paste Data