arshad75

prometheus-grafana.yaml

Jan 26th, 2019
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 95.65 KB | None | 0 0
  1. # Derived from ./manifests
  2. ---
  3. apiVersion: v1
  4. kind: Namespace
  5. metadata:
  6. name: monitoring
  7. ---
  8. apiVersion: rbac.authorization.k8s.io/v1beta1
  9. kind: ClusterRoleBinding
  10. metadata:
  11. name: prometheus
  12. roleRef:
  13. apiGroup: rbac.authorization.k8s.io
  14. kind: ClusterRole
  15. name: prometheus
  16. subjects:
  17. - kind: ServiceAccount
  18. name: prometheus-k8s
  19. namespace: monitoring
  20. ---
  21. apiVersion: rbac.authorization.k8s.io/v1beta1
  22. kind: ClusterRole
  23. metadata:
  24. name: prometheus
  25. rules:
  26. - apiGroups: [""]
  27. resources:
  28. - nodes
  29. - nodes/proxy
  30. - services
  31. - endpoints
  32. - pods
  33. verbs: ["get", "list", "watch"]
  34. - apiGroups: [""]
  35. resources:
  36. - configmaps
  37. verbs: ["get"]
  38. - nonResourceURLs: ["/metrics"]
  39. verbs: ["get"]
  40. ---
  41. apiVersion: v1
  42. kind: ServiceAccount
  43. metadata:
  44. name: prometheus-k8s
  45. namespace: monitoring
  46. ---
  47. apiVersion: v1
  48. data:
  49. default.tmpl: |
  50. {{ define "__alertmanager" }}AlertManager{{ end }}
  51. {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }}
  52.  
  53. {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
  54. {{ define "__description" }}{{ end }}
  55.  
  56. {{ define "__text_alert_list" }}{{ range . }}Labels:
  57. {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }}
  58. {{ end }}Annotations:
  59. {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }}
  60. {{ end }}Source: {{ .GeneratorURL }}
  61. {{ end }}{{ end }}
  62.  
  63.  
  64. {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }}
  65. {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }}
  66. {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }}
  67. {{ define "slack.default.pretext" }}{{ end }}
  68. {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }}
  69. {{ define "slack.default.iconemoji" }}{{ end }}
  70. {{ define "slack.default.iconurl" }}{{ end }}
  71. {{ define "slack.default.text" }}{{ end }}
  72.  
  73.  
  74. {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }}
  75. {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }}
  76.  
  77.  
  78. {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }}
  79. {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }}
  80. {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }}
  81. {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }}
  82.  
  83.  
  84. {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }}
  85. {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
  86. {{ if gt (len .Alerts.Firing) 0 -}}
  87. Alerts Firing:
  88. {{ template "__text_alert_list" .Alerts.Firing }}
  89. {{- end }}
  90. {{ if gt (len .Alerts.Resolved) 0 -}}
  91. Alerts Resolved:
  92. {{ template "__text_alert_list" .Alerts.Resolved }}
  93. {{- end }}
  94. {{- end }}
  95. {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }}
  96.  
  97.  
  98. {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }}
  99. {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }}
  100.  
  101.  
  102. {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }}
  103. {{ define "email.default.html" }}
  104. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  105. <!--
  106. Style and HTML derived from https://github.com/mailgun/transactional-email-templates
  107.  
  108.  
  109. The MIT License (MIT)
  110.  
  111. Copyright (c) 2014 Mailgun
  112.  
  113. Permission is hereby granted, free of charge, to any person obtaining a copy
  114. of this software and associated documentation files (the "Software"), to deal
  115. in the Software without restriction, including without limitation the rights
  116. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  117. copies of the Software, and to permit persons to whom the Software is
  118. furnished to do so, subject to the following conditions:
  119.  
  120. The above copyright notice and this permission notice shall be included in all
  121. copies or substantial portions of the Software.
  122.  
  123. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  124. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  125. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  126. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  127. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  128. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  129. SOFTWARE.
  130. -->
  131. <html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  132. <head style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  133. <meta name="viewport" content="width=device-width" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  134. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  135. <title style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">{{ template "__subject" . }}</title>
  136.  
  137. </head>
  138.  
  139. <body itemscope="" itemtype="http://schema.org/EmailMessage" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; -webkit-font-smoothing: antialiased; -webkit-text-size-adjust: none; height: 100%; line-height: 1.6em; width: 100% !important; background-color: #f6f6f6; margin: 0; padding: 0;" bgcolor="#f6f6f6">
  140.  
  141. <table style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; background-color: #f6f6f6; margin: 0;" bgcolor="#f6f6f6">
  142. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  143. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td>
  144. <td width="600" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; display: block !important; max-width: 600px !important; clear: both !important; width: 100% !important; margin: 0 auto; padding: 0;" valign="top">
  145. <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; max-width: 600px; display: block; margin: 0 auto; padding: 0;">
  146. <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; border-radius: 3px; background-color: #fff; margin: 0; border: 1px solid #e9e9e9;" bgcolor="#fff">
  147. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  148. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 16px; vertical-align: top; color: #fff; font-weight: 500; text-align: center; border-radius: 3px 3px 0 0; background-color: #E6522C; margin: 0; padding: 20px;" align="center" bgcolor="#E6522C" valign="top">
  149. {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }}
  150. {{ .Name }}={{ .Value }}
  151. {{ end }}
  152. </td>
  153. </tr>
  154. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  155. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 10px;" valign="top">
  156. <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  157. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  158. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
  159. <a href="{{ template "__alertmanagerURL" . }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #FFF; text-decoration: none; line-height: 2em; font-weight: bold; text-align: center; cursor: pointer; display: inline-block; border-radius: 5px; text-transform: capitalize; background-color: #348eda; margin: 0; border-color: #348eda; border-style: solid; border-width: 10px 20px;">View in {{ template "__alertmanager" . }}</a>
  160. </td>
  161. </tr>
  162. {{ if gt (len .Alerts.Firing) 0 }}
  163. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  164. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
  165. <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Firing | len }}] Firing</strong>
  166. </td>
  167. </tr>
  168. {{ end }}
  169. {{ range .Alerts.Firing }}
  170. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  171. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
  172. <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  173. {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
  174. {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
  175. {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
  176. <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  177. </td>
  178. </tr>
  179. {{ end }}
  180.  
  181. {{ if gt (len .Alerts.Resolved) 0 }}
  182. {{ if gt (len .Alerts.Firing) 0 }}
  183. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  184. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
  185. <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  186. <hr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  187. <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  188. </td>
  189. </tr>
  190. {{ end }}
  191. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  192. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
  193. <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Resolved | len }}] Resolved</strong>
  194. </td>
  195. </tr>
  196. {{ end }}
  197. {{ range .Alerts.Resolved }}
  198. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  199. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
  200. <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  201. {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
  202. {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
  203. {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
  204. <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
  205. </td>
  206. </tr>
  207. {{ end }}
  208. </table>
  209. </td>
  210. </tr>
  211. </table>
  212.  
  213. <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; clear: both; color: #999; margin: 0; padding: 20px;">
  214. <table width="100%" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  215. <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
  216. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; vertical-align: top; text-align: center; color: #999; margin: 0; padding: 0 0 20px;" align="center" valign="top"><a href="{{ .ExternalURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; color: #999; text-decoration: underline; margin: 0;">Sent by {{ template "__alertmanager" . }}</a></td>
  217. </tr>
  218. </table>
  219. </div></div>
  220. </td>
  221. <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td>
  222. </tr>
  223. </table>
  224.  
  225. </body>
  226. </html>
  227.  
  228. {{ end }}
  229.  
  230. {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }}
  231. {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
  232. {{ if gt (len .Alerts.Firing) 0 }}
  233. Alerts Firing:
  234. {{ template "__text_alert_list" .Alerts.Firing }}
  235. {{ end }}
  236. {{ if gt (len .Alerts.Resolved) 0 }}
  237. Alerts Resolved:
  238. {{ template "__text_alert_list" .Alerts.Resolved }}
  239. {{ end }}
  240. {{ end }}
  241. {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }}
  242. slack.tmpl: |
  243. {{ define "slack.devops.text" }}
  244. {{range .Alerts}}{{.Annotations.DESCRIPTION}}
  245. {{end}}
  246. {{ end }}
  247. kind: ConfigMap
  248. metadata:
  249. creationTimestamp: null
  250. name: alertmanager-templates
  251. namespace: monitoring
  252. ---
  253. kind: ConfigMap
  254. apiVersion: v1
  255. metadata:
  256. name: alertmanager
  257. namespace: monitoring
  258. data:
  259. config.yml: |-
  260. global:
  261. # ResolveTimeout is the time after which an alert is declared resolved
  262. # if it has not been updated.
  263. resolve_timeout: 5m
  264.  
  265. # The smarthost and SMTP sender used for mail notifications.
  266. smtp_smarthost: 'smtp.gmail.com:587'
  267. smtp_from: 'foo@bar.com'
  268. smtp_auth_username: 'foo@bar.com'
  269. smtp_auth_password: 'barfoo'
  270.  
  271. # The API URL to use for Slack notifications.
  272. slack_api_url: 'https://hooks.slack.com/services/some/api/token'
  273.  
  274. # # The directory from which notification templates are read.
  275. templates:
  276. - '/etc/alertmanager-templates/*.tmpl'
  277.  
  278. # The root route on which each incoming alert enters.
  279. route:
  280.  
  281. # The labels by which incoming alerts are grouped together. For example,
  282. # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
  283. # be batched into a single group.
  284.  
  285. group_by: ['alertname', 'cluster', 'service']
  286.  
  287. # When a new group of alerts is created by an incoming alert, wait at
  288. # least 'group_wait' to send the initial notification.
  289. # This way ensures that you get multiple alerts for the same group that start
  290. # firing shortly after another are batched together on the first
  291. # notification.
  292.  
  293. group_wait: 30s
  294.  
  295. # When the first notification was sent, wait 'group_interval' to send a batch
  296. # of new alerts that started firing for that group.
  297.  
  298. group_interval: 5m
  299.  
  300. # If an alert has successfully been sent, wait 'repeat_interval' to
  301. # resend them.
  302.  
  303. #repeat_interval: 1m
  304. repeat_interval: 15m
  305.  
  306. # A default receiver
  307.  
  308. # If an alert isn't caught by a route, send it to default.
  309. receiver: default
  310.  
  311. # All the above attributes are inherited by all child routes and can
  312. # overwritten on each.
  313.  
  314. # The child route trees.
  315. routes:
  316. # Send severity=slack alerts to slack.
  317. - match:
  318. severity: slack
  319. receiver: slack_alert
  320. # - match:
  321. # severity: email
  322. # receiver: email_alert
  323.  
  324. receivers:
  325. - name: 'default'
  326. slack_configs:
  327. - channel: '#alertmanager-test'
  328. text: '<!channel>{{ template "slack.devops.text" . }}'
  329. send_resolved: true
  330.  
  331. - name: 'slack_alert'
  332. slack_configs:
  333. - channel: '#alertmanager-test'
  334. send_resolved: true
  335. ---
  336. apiVersion: extensions/v1beta1
  337. kind: Deployment
  338. metadata:
  339. name: alertmanager
  340. namespace: monitoring
  341. spec:
  342. replicas: 1
  343. selector:
  344. matchLabels:
  345. app: alertmanager
  346. template:
  347. metadata:
  348. name: alertmanager
  349. labels:
  350. app: alertmanager
  351. spec:
  352. containers:
  353. - name: alertmanager
  354. image: quay.io/prometheus/alertmanager:v0.7.1
  355. args:
  356. - '-config.file=/etc/alertmanager/config.yml'
  357. - '-storage.path=/alertmanager'
  358. ports:
  359. - name: alertmanager
  360. containerPort: 9093
  361. volumeMounts:
  362. - name: config-volume
  363. mountPath: /etc/alertmanager
  364. - name: templates-volume
  365. mountPath: /etc/alertmanager-templates
  366. - name: alertmanager
  367. mountPath: /alertmanager
  368. volumes:
  369. - name: config-volume
  370. configMap:
  371. name: alertmanager
  372. - name: templates-volume
  373. configMap:
  374. name: alertmanager-templates
  375. - name: alertmanager
  376. emptyDir: {}
  377. ---
  378. apiVersion: v1
  379. kind: Service
  380. metadata:
  381. annotations:
  382. prometheus.io/scrape: 'true'
  383. prometheus.io/path: '/metrics'
  384. labels:
  385. name: alertmanager
  386. name: alertmanager
  387. namespace: monitoring
  388. spec:
  389. selector:
  390. app: alertmanager
  391. type: NodePort
  392. ports:
  393. - name: alertmanager
  394. protocol: TCP
  395. port: 9093
  396. targetPort: 9093
  397. ---
  398. apiVersion: extensions/v1beta1
  399. kind: Deployment
  400. metadata:
  401. name: grafana-core
  402. namespace: monitoring
  403. labels:
  404. app: grafana
  405. component: core
  406. spec:
  407. replicas: 1
  408. template:
  409. metadata:
  410. labels:
  411. app: grafana
  412. component: core
  413. spec:
  414. containers:
  415. - image: grafana/grafana:latest
  416. name: grafana-core
  417. imagePullPolicy: IfNotPresent
  418. # env:
  419. resources:
  420. # keep request = limit to keep this container in guaranteed class
  421. limits:
  422. cpu: 100m
  423. memory: 100Mi
  424. requests:
  425. cpu: 100m
  426. memory: 100Mi
  427. env:
  428. # The following env variables set up basic auth twith the default admin user and admin password.
  429. - name: GF_AUTH_BASIC_ENABLED
  430. value: "true"
  431. - name: GF_SECURITY_ADMIN_USER
  432. valueFrom:
  433. secretKeyRef:
  434. name: grafana
  435. key: admin-username
  436. - name: GF_SECURITY_ADMIN_PASSWORD
  437. valueFrom:
  438. secretKeyRef:
  439. name: grafana
  440. key: admin-password
  441. - name: GF_AUTH_ANONYMOUS_ENABLED
  442. value: "false"
  443. # - name: GF_AUTH_ANONYMOUS_ORG_ROLE
  444. # value: Admin
  445. # does not really work, because of template variables in exported dashboards:
  446. # - name: GF_DASHBOARDS_JSON_ENABLED
  447. # value: "true"
  448. readinessProbe:
  449. httpGet:
  450. path: /login
  451. port: 3000
  452. # initialDelaySeconds: 30
  453. # timeoutSeconds: 1
  454. volumeMounts:
  455. - name: grafana-persistent-storage
  456. mountPath: /var/lib/grafana
  457. volumes:
  458. - name: grafana-persistent-storage
  459. emptyDir: {}
  460. ---
  461. apiVersion: v1
  462. data:
  463. grafana-net-2-dashboard.json: |
  464. {
  465. "__inputs": [{
  466. "name": "DS_PROMETHEUS",
  467. "label": "Prometheus",
  468. "description": "",
  469. "type": "datasource",
  470. "pluginId": "prometheus",
  471. "pluginName": "Prometheus"
  472. }],
  473. "__requires": [{
  474. "type": "panel",
  475. "id": "singlestat",
  476. "name": "Singlestat",
  477. "version": ""
  478. }, {
  479. "type": "panel",
  480. "id": "text",
  481. "name": "Text",
  482. "version": ""
  483. }, {
  484. "type": "panel",
  485. "id": "graph",
  486. "name": "Graph",
  487. "version": ""
  488. }, {
  489. "type": "grafana",
  490. "id": "grafana",
  491. "name": "Grafana",
  492. "version": "3.1.0"
  493. }, {
  494. "type": "datasource",
  495. "id": "prometheus",
  496. "name": "Prometheus",
  497. "version": "1.0.0"
  498. }],
  499. "id": null,
  500. "title": "Prometheus Stats",
  501. "tags": [],
  502. "style": "dark",
  503. "timezone": "browser",
  504. "editable": true,
  505. "hideControls": true,
  506. "sharedCrosshair": false,
  507. "rows": [{
  508. "collapse": false,
  509. "editable": true,
  510. "height": 178,
  511. "panels": [{
  512. "cacheTimeout": null,
  513. "colorBackground": false,
  514. "colorValue": false,
  515. "colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"],
  516. "datasource": "${DS_PROMETHEUS}",
  517. "decimals": 1,
  518. "editable": true,
  519. "error": false,
  520. "format": "s",
  521. "id": 5,
  522. "interval": null,
  523. "links": [],
  524. "maxDataPoints": 100,
  525. "nullPointMode": "connected",
  526. "nullText": null,
  527. "postfix": "",
  528. "postfixFontSize": "50%",
  529. "prefix": "",
  530. "prefixFontSize": "50%",
  531. "span": 3,
  532. "sparkline": {
  533. "fillColor": "rgba(31, 118, 189, 0.18)",
  534. "full": false,
  535. "lineColor": "rgb(31, 120, 193)",
  536. "show": false
  537. },
  538. "targets": [{
  539. "expr": "(time() - process_start_time_seconds{job=\"prometheus\"})",
  540. "intervalFactor": 2,
  541. "refId": "A",
  542. "step": 4
  543. }],
  544. "thresholds": "",
  545. "title": "Uptime",
  546. "type": "singlestat",
  547. "valueFontSize": "80%",
  548. "valueMaps": [{
  549. "op": "=",
  550. "text": "N/A",
  551. "value": "null"
  552. }],
  553. "valueName": "current",
  554. "mappingTypes": [{
  555. "name": "value to text",
  556. "value": 1
  557. }, {
  558. "name": "range to text",
  559. "value": 2
  560. }],
  561. "rangeMaps": [{
  562. "from": "null",
  563. "to": "null",
  564. "text": "N/A"
  565. }],
  566. "mappingType": 1,
  567. "gauge": {
  568. "show": false,
  569. "minValue": 0,
  570. "maxValue": 100,
  571. "thresholdMarkers": true,
  572. "thresholdLabels": false
  573. }
  574. }, {
  575. "cacheTimeout": null,
  576. "colorBackground": false,
  577. "colorValue": false,
  578. "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"],
  579. "datasource": "${DS_PROMETHEUS}",
  580. "editable": true,
  581. "error": false,
  582. "format": "none",
  583. "id": 6,
  584. "interval": null,
  585. "links": [],
  586. "maxDataPoints": 100,
  587. "nullPointMode": "connected",
  588. "nullText": null,
  589. "postfix": "",
  590. "postfixFontSize": "50%",
  591. "prefix": "",
  592. "prefixFontSize": "50%",
  593. "span": 3,
  594. "sparkline": {
  595. "fillColor": "rgba(31, 118, 189, 0.18)",
  596. "full": false,
  597. "lineColor": "rgb(31, 120, 193)",
  598. "show": true
  599. },
  600. "targets": [{
  601. "expr": "prometheus_local_storage_memory_series",
  602. "intervalFactor": 2,
  603. "refId": "A",
  604. "step": 4
  605. }],
  606. "thresholds": "1,5",
  607. "title": "Local Storage Memory Series",
  608. "type": "singlestat",
  609. "valueFontSize": "70%",
  610. "valueMaps": [],
  611. "valueName": "current",
  612. "mappingTypes": [{
  613. "name": "value to text",
  614. "value": 1
  615. }, {
  616. "name": "range to text",
  617. "value": 2
  618. }],
  619. "rangeMaps": [{
  620. "from": "null",
  621. "to": "null",
  622. "text": "N/A"
  623. }],
  624. "mappingType": 1,
  625. "gauge": {
  626. "show": false,
  627. "minValue": 0,
  628. "maxValue": 100,
  629. "thresholdMarkers": true,
  630. "thresholdLabels": false
  631. }
  632. }, {
  633. "cacheTimeout": null,
  634. "colorBackground": false,
  635. "colorValue": true,
  636. "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"],
  637. "datasource": "${DS_PROMETHEUS}",
  638. "editable": true,
  639. "error": false,
  640. "format": "none",
  641. "id": 7,
  642. "interval": null,
  643. "links": [],
  644. "maxDataPoints": 100,
  645. "nullPointMode": "connected",
  646. "nullText": null,
  647. "postfix": "",
  648. "postfixFontSize": "50%",
  649. "prefix": "",
  650. "prefixFontSize": "50%",
  651. "span": 3,
  652. "sparkline": {
  653. "fillColor": "rgba(31, 118, 189, 0.18)",
  654. "full": false,
  655. "lineColor": "rgb(31, 120, 193)",
  656. "show": true
  657. },
  658. "targets": [{
  659. "expr": "prometheus_local_storage_indexing_queue_length",
  660. "intervalFactor": 2,
  661. "refId": "A",
  662. "step": 4
  663. }],
  664. "thresholds": "500,4000",
  665. "title": "Internal Storage Queue Length",
  666. "type": "singlestat",
  667. "valueFontSize": "70%",
  668. "valueMaps": [{
  669. "op": "=",
  670. "text": "Empty",
  671. "value": "0"
  672. }],
  673. "valueName": "current",
  674. "mappingTypes": [{
  675. "name": "value to text",
  676. "value": 1
  677. }, {
  678. "name": "range to text",
  679. "value": 2
  680. }],
  681. "rangeMaps": [{
  682. "from": "null",
  683. "to": "null",
  684. "text": "N/A"
  685. }],
  686. "mappingType": 1,
  687. "gauge": {
  688. "show": false,
  689. "minValue": 0,
  690. "maxValue": 100,
  691. "thresholdMarkers": true,
  692. "thresholdLabels": false
  693. }
  694. }, {
  695. "content": "<img src=\"http://prometheus.io/assets/prometheus_logo_grey.svg\" alt=\"Prometheus logo\" style=\"height: 40px;\">\n<span style=\"font-family: 'Open Sans', 'Helvetica Neue', Helvetica; font-size: 25px;vertical-align: text-top;color: #bbbfc2;margin-left: 10px;\">Prometheus</span>\n\n<p style=\"margin-top: 10px;\">You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the <a href=\"http://www.grafana.org/\">Grafana</a> and <a href=\"http://prometheus.io/\">Prometheus</a> projects.</p>",
  696. "editable": true,
  697. "error": false,
  698. "id": 9,
  699. "links": [],
  700. "mode": "html",
  701. "span": 3,
  702. "style": {},
  703. "title": "",
  704. "transparent": true,
  705. "type": "text"
  706. }],
  707. "title": "New row"
  708. }, {
  709. "collapse": false,
  710. "editable": true,
  711. "height": 227,
  712. "panels": [{
  713. "aliasColors": {
  714. "prometheus": "#C15C17",
  715. "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17"
  716. },
  717. "bars": false,
  718. "datasource": "${DS_PROMETHEUS}",
  719. "editable": true,
  720. "error": false,
  721. "fill": 1,
  722. "grid": {
  723. "threshold1": null,
  724. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  725. "threshold2": null,
  726. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  727. },
  728. "id": 3,
  729. "legend": {
  730. "avg": false,
  731. "current": false,
  732. "max": false,
  733. "min": false,
  734. "show": true,
  735. "total": false,
  736. "values": false
  737. },
  738. "lines": true,
  739. "linewidth": 2,
  740. "links": [],
  741. "nullPointMode": "connected",
  742. "percentage": false,
  743. "pointradius": 2,
  744. "points": false,
  745. "renderer": "flot",
  746. "seriesOverrides": [],
  747. "span": 9,
  748. "stack": false,
  749. "steppedLine": false,
  750. "targets": [{
  751. "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])",
  752. "interval": "",
  753. "intervalFactor": 2,
  754. "legendFormat": "{{job}}",
  755. "metric": "",
  756. "refId": "A",
  757. "step": 2
  758. }],
  759. "timeFrom": null,
  760. "timeShift": null,
  761. "title": "Samples ingested (rate-5m)",
  762. "tooltip": {
  763. "shared": true,
  764. "value_type": "cumulative",
  765. "ordering": "alphabetical",
  766. "msResolution": false
  767. },
  768. "type": "graph",
  769. "yaxes": [{
  770. "show": true,
  771. "min": null,
  772. "max": null,
  773. "logBase": 1,
  774. "format": "short"
  775. }, {
  776. "show": true,
  777. "min": null,
  778. "max": null,
  779. "logBase": 1,
  780. "format": "short"
  781. }],
  782. "xaxis": {
  783. "show": true
  784. }
  785. }, {
  786. "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ",
  787. "editable": true,
  788. "error": false,
  789. "id": 8,
  790. "links": [],
  791. "mode": "markdown",
  792. "span": 2.995914043583536,
  793. "style": {},
  794. "title": "",
  795. "transparent": true,
  796. "type": "text"
  797. }],
  798. "title": "New row"
  799. }, {
  800. "collapse": false,
  801. "editable": true,
  802. "height": "250px",
  803. "panels": [{
  804. "aliasColors": {
  805. "prometheus": "#F9BA8F",
  806. "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F"
  807. },
  808. "bars": false,
  809. "datasource": "${DS_PROMETHEUS}",
  810. "editable": true,
  811. "error": false,
  812. "fill": 1,
  813. "grid": {
  814. "threshold1": null,
  815. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  816. "threshold2": null,
  817. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  818. },
  819. "id": 2,
  820. "legend": {
  821. "avg": false,
  822. "current": false,
  823. "max": false,
  824. "min": false,
  825. "show": true,
  826. "total": false,
  827. "values": false
  828. },
  829. "lines": true,
  830. "linewidth": 2,
  831. "links": [],
  832. "nullPointMode": "connected",
  833. "percentage": false,
  834. "pointradius": 5,
  835. "points": false,
  836. "renderer": "flot",
  837. "seriesOverrides": [],
  838. "span": 5,
  839. "stack": false,
  840. "steppedLine": false,
  841. "targets": [{
  842. "expr": "rate(prometheus_target_interval_length_seconds_count[5m])",
  843. "intervalFactor": 2,
  844. "legendFormat": "{{job}}",
  845. "refId": "A",
  846. "step": 2
  847. }],
  848. "timeFrom": null,
  849. "timeShift": null,
  850. "title": "Target Scrapes (last 5m)",
  851. "tooltip": {
  852. "shared": true,
  853. "value_type": "cumulative",
  854. "ordering": "alphabetical",
  855. "msResolution": false
  856. },
  857. "type": "graph",
  858. "yaxes": [{
  859. "show": true,
  860. "min": null,
  861. "max": null,
  862. "logBase": 1,
  863. "format": "short"
  864. }, {
  865. "show": true,
  866. "min": null,
  867. "max": null,
  868. "logBase": 1,
  869. "format": "short"
  870. }],
  871. "xaxis": {
  872. "show": true
  873. }
  874. }, {
  875. "aliasColors": {},
  876. "bars": false,
  877. "datasource": "${DS_PROMETHEUS}",
  878. "editable": true,
  879. "error": false,
  880. "fill": 1,
  881. "grid": {
  882. "threshold1": null,
  883. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  884. "threshold2": null,
  885. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  886. },
  887. "id": 14,
  888. "legend": {
  889. "avg": false,
  890. "current": false,
  891. "max": false,
  892. "min": false,
  893. "show": true,
  894. "total": false,
  895. "values": false
  896. },
  897. "lines": true,
  898. "linewidth": 2,
  899. "links": [],
  900. "nullPointMode": "connected",
  901. "percentage": false,
  902. "pointradius": 5,
  903. "points": false,
  904. "renderer": "flot",
  905. "seriesOverrides": [],
  906. "span": 4,
  907. "stack": false,
  908. "steppedLine": false,
  909. "targets": [{
  910. "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}",
  911. "interval": "",
  912. "intervalFactor": 2,
  913. "legendFormat": "{{quantile}} ({{interval}})",
  914. "metric": "",
  915. "refId": "A",
  916. "step": 2
  917. }],
  918. "timeFrom": null,
  919. "timeShift": null,
  920. "title": "Scrape Duration",
  921. "tooltip": {
  922. "shared": true,
  923. "value_type": "cumulative",
  924. "ordering": "alphabetical",
  925. "msResolution": false
  926. },
  927. "type": "graph",
  928. "yaxes": [{
  929. "show": true,
  930. "min": null,
  931. "max": null,
  932. "logBase": 1,
  933. "format": "short"
  934. }, {
  935. "show": true,
  936. "min": null,
  937. "max": null,
  938. "logBase": 1,
  939. "format": "short"
  940. }],
  941. "xaxis": {
  942. "show": true
  943. }
  944. }, {
  945. "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ",
  946. "editable": true,
  947. "error": false,
  948. "id": 11,
  949. "links": [],
  950. "mode": "markdown",
  951. "span": 3,
  952. "style": {},
  953. "title": "",
  954. "transparent": true,
  955. "type": "text"
  956. }],
  957. "title": "New row"
  958. }, {
  959. "collapse": false,
  960. "editable": true,
  961. "height": "250px",
  962. "panels": [{
  963. "aliasColors": {},
  964. "bars": false,
  965. "datasource": "${DS_PROMETHEUS}",
  966. "decimals": null,
  967. "editable": true,
  968. "error": false,
  969. "fill": 1,
  970. "grid": {
  971. "threshold1": null,
  972. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  973. "threshold2": null,
  974. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  975. },
  976. "id": 12,
  977. "legend": {
  978. "alignAsTable": false,
  979. "avg": false,
  980. "current": false,
  981. "hideEmpty": true,
  982. "max": false,
  983. "min": false,
  984. "show": true,
  985. "total": false,
  986. "values": false
  987. },
  988. "lines": true,
  989. "linewidth": 2,
  990. "links": [],
  991. "nullPointMode": "connected",
  992. "percentage": false,
  993. "pointradius": 5,
  994. "points": false,
  995. "renderer": "flot",
  996. "seriesOverrides": [],
  997. "span": 9,
  998. "stack": false,
  999. "steppedLine": false,
  1000. "targets": [{
  1001. "expr": "prometheus_evaluator_duration_milliseconds{quantile!=\"0.01\", quantile!=\"0.05\"}",
  1002. "interval": "",
  1003. "intervalFactor": 2,
  1004. "legendFormat": "{{quantile}}",
  1005. "refId": "A",
  1006. "step": 2
  1007. }],
  1008. "timeFrom": null,
  1009. "timeShift": null,
  1010. "title": "Rule Eval Duration",
  1011. "tooltip": {
  1012. "shared": true,
  1013. "value_type": "cumulative",
  1014. "ordering": "alphabetical",
  1015. "msResolution": false
  1016. },
  1017. "type": "graph",
  1018. "yaxes": [{
  1019. "show": true,
  1020. "min": null,
  1021. "max": null,
  1022. "logBase": 1,
  1023. "format": "percentunit",
  1024. "label": ""
  1025. }, {
  1026. "show": true,
  1027. "min": null,
  1028. "max": null,
  1029. "logBase": 1,
  1030. "format": "short"
  1031. }],
  1032. "xaxis": {
  1033. "show": true
  1034. }
  1035. }, {
  1036. "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.",
  1037. "editable": true,
  1038. "error": false,
  1039. "id": 15,
  1040. "links": [],
  1041. "mode": "markdown",
  1042. "span": 3,
  1043. "style": {},
  1044. "title": "",
  1045. "transparent": true,
  1046. "type": "text"
  1047. }],
  1048. "title": "New row"
  1049. }],
  1050. "time": {
  1051. "from": "now-5m",
  1052. "to": "now"
  1053. },
  1054. "timepicker": {
  1055. "now": true,
  1056. "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"],
  1057. "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"]
  1058. },
  1059. "templating": {
  1060. "list": []
  1061. },
  1062. "annotations": {
  1063. "list": []
  1064. },
  1065. "refresh": false,
  1066. "schemaVersion": 12,
  1067. "version": 0,
  1068. "links": [{
  1069. "icon": "info",
  1070. "tags": [],
  1071. "targetBlank": true,
  1072. "title": "Grafana Docs",
  1073. "tooltip": "",
  1074. "type": "link",
  1075. "url": "http://www.grafana.org/docs"
  1076. }, {
  1077. "icon": "info",
  1078. "tags": [],
  1079. "targetBlank": true,
  1080. "title": "Prometheus Docs",
  1081. "type": "link",
  1082. "url": "http://prometheus.io/docs/introduction/overview/"
  1083. }],
  1084. "gnetId": 2,
  1085. "description": "The official, pre-built Prometheus Stats Dashboard."
  1086. }
  1087. grafana-net-737-dashboard.json: |
  1088. {
  1089. "__inputs": [{
  1090. "name": "DS_PROMETHEUS",
  1091. "label": "prometheus",
  1092. "description": "",
  1093. "type": "datasource",
  1094. "pluginId": "prometheus",
  1095. "pluginName": "Prometheus"
  1096. }],
  1097. "__requires": [{
  1098. "type": "panel",
  1099. "id": "singlestat",
  1100. "name": "Singlestat",
  1101. "version": ""
  1102. }, {
  1103. "type": "panel",
  1104. "id": "graph",
  1105. "name": "Graph",
  1106. "version": ""
  1107. }, {
  1108. "type": "grafana",
  1109. "id": "grafana",
  1110. "name": "Grafana",
  1111. "version": "3.1.0"
  1112. }, {
  1113. "type": "datasource",
  1114. "id": "prometheus",
  1115. "name": "Prometheus",
  1116. "version": "1.0.0"
  1117. }],
  1118. "id": null,
  1119. "title": "Kubernetes Pod Resources",
  1120. "description": "Shows resource usage of Kubernetes pods.",
  1121. "tags": [
  1122. "kubernetes"
  1123. ],
  1124. "style": "dark",
  1125. "timezone": "browser",
  1126. "editable": true,
  1127. "hideControls": false,
  1128. "sharedCrosshair": false,
  1129. "rows": [{
  1130. "collapse": false,
  1131. "editable": true,
  1132. "height": "250px",
  1133. "panels": [{
  1134. "cacheTimeout": null,
  1135. "colorBackground": false,
  1136. "colorValue": true,
  1137. "colors": [
  1138. "rgba(50, 172, 45, 0.97)",
  1139. "rgba(237, 129, 40, 0.89)",
  1140. "rgba(245, 54, 54, 0.9)"
  1141. ],
  1142. "datasource": "${DS_PROMETHEUS}",
  1143. "editable": true,
  1144. "error": false,
  1145. "format": "percent",
  1146. "gauge": {
  1147. "maxValue": 100,
  1148. "minValue": 0,
  1149. "show": true,
  1150. "thresholdLabels": false,
  1151. "thresholdMarkers": true
  1152. },
  1153. "height": "180px",
  1154. "id": 4,
  1155. "interval": null,
  1156. "isNew": true,
  1157. "links": [],
  1158. "mappingType": 1,
  1159. "mappingTypes": [{
  1160. "name": "value to text",
  1161. "value": 1
  1162. }, {
  1163. "name": "range to text",
  1164. "value": 2
  1165. }],
  1166. "maxDataPoints": 100,
  1167. "nullPointMode": "connected",
  1168. "nullText": null,
  1169. "postfix": "",
  1170. "postfixFontSize": "50%",
  1171. "prefix": "",
  1172. "prefixFontSize": "50%",
  1173. "rangeMaps": [{
  1174. "from": "null",
  1175. "text": "N/A",
  1176. "to": "null"
  1177. }],
  1178. "span": 4,
  1179. "sparkline": {
  1180. "fillColor": "rgba(31, 118, 189, 0.18)",
  1181. "full": false,
  1182. "lineColor": "rgb(31, 120, 193)",
  1183. "show": false
  1184. },
  1185. "targets": [{
  1186. "expr": "sum (container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum (machine_memory_bytes{instance=~\"^$instance$\"}) * 100",
  1187. "interval": "",
  1188. "intervalFactor": 2,
  1189. "legendFormat": "",
  1190. "refId": "A",
  1191. "step": 2
  1192. }],
  1193. "thresholds": "65, 90",
  1194. "timeFrom": "1m",
  1195. "timeShift": null,
  1196. "title": "Memory Working Set",
  1197. "transparent": false,
  1198. "type": "singlestat",
  1199. "valueFontSize": "80%",
  1200. "valueMaps": [{
  1201. "op": "=",
  1202. "text": "N/A",
  1203. "value": "null"
  1204. }],
  1205. "valueName": "current"
  1206. }, {
  1207. "cacheTimeout": null,
  1208. "colorBackground": false,
  1209. "colorValue": true,
  1210. "colors": [
  1211. "rgba(50, 172, 45, 0.97)",
  1212. "rgba(237, 129, 40, 0.89)",
  1213. "rgba(245, 54, 54, 0.9)"
  1214. ],
  1215. "datasource": "${DS_PROMETHEUS}",
  1216. "decimals": 2,
  1217. "editable": true,
  1218. "error": false,
  1219. "format": "percent",
  1220. "gauge": {
  1221. "maxValue": 100,
  1222. "minValue": 0,
  1223. "show": true,
  1224. "thresholdLabels": false,
  1225. "thresholdMarkers": true
  1226. },
  1227. "height": "180px",
  1228. "id": 6,
  1229. "interval": null,
  1230. "isNew": true,
  1231. "links": [],
  1232. "mappingType": 1,
  1233. "mappingTypes": [{
  1234. "name": "value to text",
  1235. "value": 1
  1236. }, {
  1237. "name": "range to text",
  1238. "value": 2
  1239. }],
  1240. "maxDataPoints": 100,
  1241. "nullPointMode": "connected",
  1242. "nullText": null,
  1243. "postfix": "",
  1244. "postfixFontSize": "50%",
  1245. "prefix": "",
  1246. "prefixFontSize": "50%",
  1247. "rangeMaps": [{
  1248. "from": "null",
  1249. "text": "N/A",
  1250. "to": "null"
  1251. }],
  1252. "span": 4,
  1253. "sparkline": {
  1254. "fillColor": "rgba(31, 118, 189, 0.18)",
  1255. "full": false,
  1256. "lineColor": "rgb(31, 120, 193)",
  1257. "show": false
  1258. },
  1259. "targets": [{
  1260. "expr": "sum(rate(container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m])) / sum (machine_cpu_cores{instance=~\"^$instance$\"}) * 100",
  1261. "interval": "10s",
  1262. "intervalFactor": 1,
  1263. "refId": "A",
  1264. "step": 10
  1265. }],
  1266. "thresholds": "65, 90",
  1267. "timeFrom": "1m",
  1268. "timeShift": null,
  1269. "title": "Cpu Usage",
  1270. "type": "singlestat",
  1271. "valueFontSize": "80%",
  1272. "valueMaps": [{
  1273. "op": "=",
  1274. "text": "N/A",
  1275. "value": "null"
  1276. }],
  1277. "valueName": "current"
  1278. }, {
  1279. "cacheTimeout": null,
  1280. "colorBackground": false,
  1281. "colorValue": true,
  1282. "colors": [
  1283. "rgba(50, 172, 45, 0.97)",
  1284. "rgba(237, 129, 40, 0.89)",
  1285. "rgba(245, 54, 54, 0.9)"
  1286. ],
  1287. "datasource": "${DS_PROMETHEUS}",
  1288. "decimals": 2,
  1289. "editable": true,
  1290. "error": false,
  1291. "format": "percent",
  1292. "gauge": {
  1293. "maxValue": 100,
  1294. "minValue": 0,
  1295. "show": true,
  1296. "thresholdLabels": false,
  1297. "thresholdMarkers": true
  1298. },
  1299. "height": "180px",
  1300. "id": 7,
  1301. "interval": null,
  1302. "isNew": true,
  1303. "links": [],
  1304. "mappingType": 1,
  1305. "mappingTypes": [{
  1306. "name": "value to text",
  1307. "value": 1
  1308. }, {
  1309. "name": "range to text",
  1310. "value": 2
  1311. }],
  1312. "maxDataPoints": 100,
  1313. "nullPointMode": "connected",
  1314. "nullText": null,
  1315. "postfix": "",
  1316. "postfixFontSize": "50%",
  1317. "prefix": "",
  1318. "prefixFontSize": "50%",
  1319. "rangeMaps": [{
  1320. "from": "null",
  1321. "text": "N/A",
  1322. "to": "null"
  1323. }],
  1324. "span": 4,
  1325. "sparkline": {
  1326. "fillColor": "rgba(31, 118, 189, 0.18)",
  1327. "full": false,
  1328. "lineColor": "rgb(31, 120, 193)",
  1329. "show": false
  1330. },
  1331. "targets": [{
  1332. "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum(container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"}) * 100",
  1333. "interval": "10s",
  1334. "intervalFactor": 1,
  1335. "legendFormat": "",
  1336. "metric": "",
  1337. "refId": "A",
  1338. "step": 10
  1339. }],
  1340. "thresholds": "65, 90",
  1341. "timeFrom": "1m",
  1342. "timeShift": null,
  1343. "title": "Filesystem Usage",
  1344. "type": "singlestat",
  1345. "valueFontSize": "80%",
  1346. "valueMaps": [{
  1347. "op": "=",
  1348. "text": "N/A",
  1349. "value": "null"
  1350. }],
  1351. "valueName": "current"
  1352. }, {
  1353. "cacheTimeout": null,
  1354. "colorBackground": false,
  1355. "colorValue": false,
  1356. "colors": [
  1357. "rgba(50, 172, 45, 0.97)",
  1358. "rgba(237, 129, 40, 0.89)",
  1359. "rgba(245, 54, 54, 0.9)"
  1360. ],
  1361. "datasource": "${DS_PROMETHEUS}",
  1362. "decimals": 2,
  1363. "editable": true,
  1364. "error": false,
  1365. "format": "bytes",
  1366. "gauge": {
  1367. "maxValue": 100,
  1368. "minValue": 0,
  1369. "show": false,
  1370. "thresholdLabels": false,
  1371. "thresholdMarkers": true
  1372. },
  1373. "height": "1px",
  1374. "hideTimeOverride": true,
  1375. "id": 9,
  1376. "interval": null,
  1377. "isNew": true,
  1378. "links": [],
  1379. "mappingType": 1,
  1380. "mappingTypes": [{
  1381. "name": "value to text",
  1382. "value": 1
  1383. }, {
  1384. "name": "range to text",
  1385. "value": 2
  1386. }],
  1387. "maxDataPoints": 100,
  1388. "nullPointMode": "connected",
  1389. "nullText": null,
  1390. "postfix": "",
  1391. "postfixFontSize": "20%",
  1392. "prefix": "",
  1393. "prefixFontSize": "20%",
  1394. "rangeMaps": [{
  1395. "from": "null",
  1396. "text": "N/A",
  1397. "to": "null"
  1398. }],
  1399. "span": 2,
  1400. "sparkline": {
  1401. "fillColor": "rgba(31, 118, 189, 0.18)",
  1402. "full": false,
  1403. "lineColor": "rgb(31, 120, 193)",
  1404. "show": false
  1405. },
  1406. "targets": [{
  1407. "expr": "sum(container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"})",
  1408. "interval": "10s",
  1409. "intervalFactor": 1,
  1410. "refId": "A",
  1411. "step": 10
  1412. }],
  1413. "thresholds": "",
  1414. "timeFrom": "1m",
  1415. "title": "Used",
  1416. "type": "singlestat",
  1417. "valueFontSize": "50%",
  1418. "valueMaps": [{
  1419. "op": "=",
  1420. "text": "N/A",
  1421. "value": "null"
  1422. }],
  1423. "valueName": "current"
  1424. }, {
  1425. "cacheTimeout": null,
  1426. "colorBackground": false,
  1427. "colorValue": false,
  1428. "colors": [
  1429. "rgba(50, 172, 45, 0.97)",
  1430. "rgba(237, 129, 40, 0.89)",
  1431. "rgba(245, 54, 54, 0.9)"
  1432. ],
  1433. "datasource": "${DS_PROMETHEUS}",
  1434. "decimals": 2,
  1435. "editable": true,
  1436. "error": false,
  1437. "format": "bytes",
  1438. "gauge": {
  1439. "maxValue": 100,
  1440. "minValue": 0,
  1441. "show": false,
  1442. "thresholdLabels": false,
  1443. "thresholdMarkers": true
  1444. },
  1445. "height": "1px",
  1446. "hideTimeOverride": true,
  1447. "id": 10,
  1448. "interval": null,
  1449. "isNew": true,
  1450. "links": [],
  1451. "mappingType": 1,
  1452. "mappingTypes": [{
  1453. "name": "value to text",
  1454. "value": 1
  1455. }, {
  1456. "name": "range to text",
  1457. "value": 2
  1458. }],
  1459. "maxDataPoints": 100,
  1460. "nullPointMode": "connected",
  1461. "nullText": null,
  1462. "postfix": "",
  1463. "postfixFontSize": "50%",
  1464. "prefix": "",
  1465. "prefixFontSize": "50%",
  1466. "rangeMaps": [{
  1467. "from": "null",
  1468. "text": "N/A",
  1469. "to": "null"
  1470. }],
  1471. "span": 2,
  1472. "sparkline": {
  1473. "fillColor": "rgba(31, 118, 189, 0.18)",
  1474. "full": false,
  1475. "lineColor": "rgb(31, 120, 193)",
  1476. "show": false
  1477. },
  1478. "targets": [{
  1479. "expr": "sum (machine_memory_bytes{instance=~\"^$instance$\"})",
  1480. "interval": "10s",
  1481. "intervalFactor": 1,
  1482. "refId": "A",
  1483. "step": 10
  1484. }],
  1485. "thresholds": "",
  1486. "timeFrom": "1m",
  1487. "title": "Total",
  1488. "type": "singlestat",
  1489. "valueFontSize": "50%",
  1490. "valueMaps": [{
  1491. "op": "=",
  1492. "text": "N/A",
  1493. "value": "null"
  1494. }],
  1495. "valueName": "current"
  1496. }, {
  1497. "cacheTimeout": null,
  1498. "colorBackground": false,
  1499. "colorValue": false,
  1500. "colors": [
  1501. "rgba(50, 172, 45, 0.97)",
  1502. "rgba(237, 129, 40, 0.89)",
  1503. "rgba(245, 54, 54, 0.9)"
  1504. ],
  1505. "datasource": "${DS_PROMETHEUS}",
  1506. "decimals": 2,
  1507. "editable": true,
  1508. "error": false,
  1509. "format": "none",
  1510. "gauge": {
  1511. "maxValue": 100,
  1512. "minValue": 0,
  1513. "show": false,
  1514. "thresholdLabels": false,
  1515. "thresholdMarkers": true
  1516. },
  1517. "height": "1px",
  1518. "hideTimeOverride": true,
  1519. "id": 11,
  1520. "interval": null,
  1521. "isNew": true,
  1522. "links": [],
  1523. "mappingType": 1,
  1524. "mappingTypes": [{
  1525. "name": "value to text",
  1526. "value": 1
  1527. }, {
  1528. "name": "range to text",
  1529. "value": 2
  1530. }],
  1531. "maxDataPoints": 100,
  1532. "nullPointMode": "connected",
  1533. "nullText": null,
  1534. "postfix": " cores",
  1535. "postfixFontSize": "30%",
  1536. "prefix": "",
  1537. "prefixFontSize": "50%",
  1538. "rangeMaps": [{
  1539. "from": "null",
  1540. "text": "N/A",
  1541. "to": "null"
  1542. }],
  1543. "span": 2,
  1544. "sparkline": {
  1545. "fillColor": "rgba(31, 118, 189, 0.18)",
  1546. "full": false,
  1547. "lineColor": "rgb(31, 120, 193)",
  1548. "show": false
  1549. },
  1550. "targets": [{
  1551. "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m]))",
  1552. "interval": "10s",
  1553. "intervalFactor": 1,
  1554. "refId": "A",
  1555. "step": 10
  1556. }],
  1557. "thresholds": "",
  1558. "timeFrom": "1m",
  1559. "timeShift": null,
  1560. "title": "Used",
  1561. "type": "singlestat",
  1562. "valueFontSize": "50%",
  1563. "valueMaps": [{
  1564. "op": "=",
  1565. "text": "N/A",
  1566. "value": "null"
  1567. }],
  1568. "valueName": "current"
  1569. }, {
  1570. "cacheTimeout": null,
  1571. "colorBackground": false,
  1572. "colorValue": false,
  1573. "colors": [
  1574. "rgba(50, 172, 45, 0.97)",
  1575. "rgba(237, 129, 40, 0.89)",
  1576. "rgba(245, 54, 54, 0.9)"
  1577. ],
  1578. "datasource": "${DS_PROMETHEUS}",
  1579. "decimals": 2,
  1580. "editable": true,
  1581. "error": false,
  1582. "format": "none",
  1583. "gauge": {
  1584. "maxValue": 100,
  1585. "minValue": 0,
  1586. "show": false,
  1587. "thresholdLabels": false,
  1588. "thresholdMarkers": true
  1589. },
  1590. "height": "1px",
  1591. "hideTimeOverride": true,
  1592. "id": 12,
  1593. "interval": null,
  1594. "isNew": true,
  1595. "links": [],
  1596. "mappingType": 1,
  1597. "mappingTypes": [{
  1598. "name": "value to text",
  1599. "value": 1
  1600. }, {
  1601. "name": "range to text",
  1602. "value": 2
  1603. }],
  1604. "maxDataPoints": 100,
  1605. "nullPointMode": "connected",
  1606. "nullText": null,
  1607. "postfix": " cores",
  1608. "postfixFontSize": "30%",
  1609. "prefix": "",
  1610. "prefixFontSize": "50%",
  1611. "rangeMaps": [{
  1612. "from": "null",
  1613. "text": "N/A",
  1614. "to": "null"
  1615. }],
  1616. "span": 2,
  1617. "sparkline": {
  1618. "fillColor": "rgba(31, 118, 189, 0.18)",
  1619. "full": false,
  1620. "lineColor": "rgb(31, 120, 193)",
  1621. "show": false
  1622. },
  1623. "targets": [{
  1624. "expr": "sum (machine_cpu_cores{instance=~\"^$instance$\"})",
  1625. "interval": "10s",
  1626. "intervalFactor": 1,
  1627. "refId": "A",
  1628. "step": 10
  1629. }],
  1630. "thresholds": "",
  1631. "timeFrom": "1m",
  1632. "title": "Total",
  1633. "type": "singlestat",
  1634. "valueFontSize": "50%",
  1635. "valueMaps": [{
  1636. "op": "=",
  1637. "text": "N/A",
  1638. "value": "null"
  1639. }],
  1640. "valueName": "current"
  1641. }, {
  1642. "cacheTimeout": null,
  1643. "colorBackground": false,
  1644. "colorValue": false,
  1645. "colors": [
  1646. "rgba(50, 172, 45, 0.97)",
  1647. "rgba(237, 129, 40, 0.89)",
  1648. "rgba(245, 54, 54, 0.9)"
  1649. ],
  1650. "datasource": "${DS_PROMETHEUS}",
  1651. "decimals": 2,
  1652. "editable": true,
  1653. "error": false,
  1654. "format": "bytes",
  1655. "gauge": {
  1656. "maxValue": 100,
  1657. "minValue": 0,
  1658. "show": false,
  1659. "thresholdLabels": false,
  1660. "thresholdMarkers": true
  1661. },
  1662. "height": "1px",
  1663. "hideTimeOverride": true,
  1664. "id": 13,
  1665. "interval": null,
  1666. "isNew": true,
  1667. "links": [],
  1668. "mappingType": 1,
  1669. "mappingTypes": [{
  1670. "name": "value to text",
  1671. "value": 1
  1672. }, {
  1673. "name": "range to text",
  1674. "value": 2
  1675. }],
  1676. "maxDataPoints": 100,
  1677. "nullPointMode": "connected",
  1678. "nullText": null,
  1679. "postfix": "",
  1680. "postfixFontSize": "50%",
  1681. "prefix": "",
  1682. "prefixFontSize": "50%",
  1683. "rangeMaps": [{
  1684. "from": "null",
  1685. "text": "N/A",
  1686. "to": "null"
  1687. }],
  1688. "span": 2,
  1689. "sparkline": {
  1690. "fillColor": "rgba(31, 118, 189, 0.18)",
  1691. "full": false,
  1692. "lineColor": "rgb(31, 120, 193)",
  1693. "show": false
  1694. },
  1695. "targets": [{
  1696. "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"})",
  1697. "interval": "10s",
  1698. "intervalFactor": 1,
  1699. "refId": "A",
  1700. "step": 10
  1701. }],
  1702. "thresholds": "",
  1703. "timeFrom": "1m",
  1704. "title": "Used",
  1705. "type": "singlestat",
  1706. "valueFontSize": "50%",
  1707. "valueMaps": [{
  1708. "op": "=",
  1709. "text": "N/A",
  1710. "value": "null"
  1711. }],
  1712. "valueName": "current"
  1713. }, {
  1714. "cacheTimeout": null,
  1715. "colorBackground": false,
  1716. "colorValue": false,
  1717. "colors": [
  1718. "rgba(50, 172, 45, 0.97)",
  1719. "rgba(237, 129, 40, 0.89)",
  1720. "rgba(245, 54, 54, 0.9)"
  1721. ],
  1722. "datasource": "${DS_PROMETHEUS}",
  1723. "decimals": 2,
  1724. "editable": true,
  1725. "error": false,
  1726. "format": "bytes",
  1727. "gauge": {
  1728. "maxValue": 100,
  1729. "minValue": 0,
  1730. "show": false,
  1731. "thresholdLabels": false,
  1732. "thresholdMarkers": true
  1733. },
  1734. "height": "1px",
  1735. "hideTimeOverride": true,
  1736. "id": 14,
  1737. "interval": null,
  1738. "isNew": true,
  1739. "links": [],
  1740. "mappingType": 1,
  1741. "mappingTypes": [{
  1742. "name": "value to text",
  1743. "value": 1
  1744. }, {
  1745. "name": "range to text",
  1746. "value": 2
  1747. }],
  1748. "maxDataPoints": 100,
  1749. "nullPointMode": "connected",
  1750. "nullText": null,
  1751. "postfix": "",
  1752. "postfixFontSize": "50%",
  1753. "prefix": "",
  1754. "prefixFontSize": "50%",
  1755. "rangeMaps": [{
  1756. "from": "null",
  1757. "text": "N/A",
  1758. "to": "null"
  1759. }],
  1760. "span": 2,
  1761. "sparkline": {
  1762. "fillColor": "rgba(31, 118, 189, 0.18)",
  1763. "full": false,
  1764. "lineColor": "rgb(31, 120, 193)",
  1765. "show": false
  1766. },
  1767. "targets": [{
  1768. "expr": "sum (container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"})",
  1769. "interval": "10s",
  1770. "intervalFactor": 1,
  1771. "refId": "A",
  1772. "step": 10
  1773. }],
  1774. "thresholds": "",
  1775. "timeFrom": "1m",
  1776. "title": "Total",
  1777. "type": "singlestat",
  1778. "valueFontSize": "50%",
  1779. "valueMaps": [{
  1780. "op": "=",
  1781. "text": "N/A",
  1782. "value": "null"
  1783. }],
  1784. "valueName": "current"
  1785. }, {
  1786. "aliasColors": {},
  1787. "bars": false,
  1788. "datasource": "${DS_PROMETHEUS}",
  1789. "decimals": 2,
  1790. "editable": true,
  1791. "error": false,
  1792. "fill": 1,
  1793. "grid": {
  1794. "threshold1": null,
  1795. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  1796. "threshold2": null,
  1797. "threshold2Color": "rgba(234, 112, 112, 0.22)",
  1798. "thresholdLine": false
  1799. },
  1800. "height": "200px",
  1801. "id": 32,
  1802. "isNew": true,
  1803. "legend": {
  1804. "alignAsTable": true,
  1805. "avg": true,
  1806. "current": true,
  1807. "max": false,
  1808. "min": false,
  1809. "rightSide": true,
  1810. "show": true,
  1811. "sideWidth": 200,
  1812. "sort": "current",
  1813. "sortDesc": true,
  1814. "total": false,
  1815. "values": true
  1816. },
  1817. "lines": true,
  1818. "linewidth": 2,
  1819. "links": [],
  1820. "nullPointMode": "connected",
  1821. "percentage": false,
  1822. "pointradius": 5,
  1823. "points": false,
  1824. "renderer": "flot",
  1825. "seriesOverrides": [],
  1826. "span": 12,
  1827. "stack": false,
  1828. "steppedLine": false,
  1829. "targets": [{
  1830. "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))",
  1831. "interval": "",
  1832. "intervalFactor": 2,
  1833. "legendFormat": "receive",
  1834. "metric": "network",
  1835. "refId": "A",
  1836. "step": 240
  1837. }, {
  1838. "expr": "- sum(rate(container_network_transmit_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))",
  1839. "interval": "",
  1840. "intervalFactor": 2,
  1841. "legendFormat": "transmit",
  1842. "metric": "network",
  1843. "refId": "B",
  1844. "step": 240
  1845. }],
  1846. "timeFrom": null,
  1847. "timeShift": null,
  1848. "title": "Network",
  1849. "tooltip": {
  1850. "msResolution": false,
  1851. "shared": true,
  1852. "sort": 0,
  1853. "value_type": "cumulative"
  1854. },
  1855. "transparent": false,
  1856. "type": "graph",
  1857. "xaxis": {
  1858. "show": true
  1859. },
  1860. "yaxes": [{
  1861. "format": "Bps",
  1862. "label": "transmit / receive",
  1863. "logBase": 1,
  1864. "max": null,
  1865. "min": null,
  1866. "show": true
  1867. }, {
  1868. "format": "Bps",
  1869. "label": null,
  1870. "logBase": 1,
  1871. "max": null,
  1872. "min": null,
  1873. "show": false
  1874. }]
  1875. }],
  1876. "showTitle": true,
  1877. "title": "all pods"
  1878. }, {
  1879. "collapse": false,
  1880. "editable": true,
  1881. "height": "250px",
  1882. "panels": [{
  1883. "aliasColors": {},
  1884. "bars": false,
  1885. "datasource": "${DS_PROMETHEUS}",
  1886. "decimals": 3,
  1887. "editable": true,
  1888. "error": false,
  1889. "fill": 0,
  1890. "grid": {
  1891. "threshold1": null,
  1892. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  1893. "threshold2": null,
  1894. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  1895. },
  1896. "height": "",
  1897. "id": 17,
  1898. "isNew": true,
  1899. "legend": {
  1900. "alignAsTable": true,
  1901. "avg": true,
  1902. "current": true,
  1903. "hideEmpty": true,
  1904. "hideZero": true,
  1905. "max": false,
  1906. "min": false,
  1907. "rightSide": true,
  1908. "show": true,
  1909. "sideWidth": null,
  1910. "sort": "current",
  1911. "sortDesc": true,
  1912. "total": false,
  1913. "values": true
  1914. },
  1915. "lines": true,
  1916. "linewidth": 2,
  1917. "links": [],
  1918. "nullPointMode": "connected",
  1919. "percentage": false,
  1920. "pointradius": 5,
  1921. "points": false,
  1922. "renderer": "flot",
  1923. "seriesOverrides": [],
  1924. "span": 12,
  1925. "stack": false,
  1926. "steppedLine": false,
  1927. "targets": [{
  1928. "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[1m])) by (pod_name)",
  1929. "interval": "",
  1930. "intervalFactor": 2,
  1931. "legendFormat": "{{ pod_name }}",
  1932. "metric": "container_cpu",
  1933. "refId": "A",
  1934. "step": 240
  1935. }],
  1936. "timeFrom": null,
  1937. "timeShift": null,
  1938. "title": "Cpu Usage",
  1939. "tooltip": {
  1940. "msResolution": true,
  1941. "shared": false,
  1942. "sort": 2,
  1943. "value_type": "cumulative"
  1944. },
  1945. "transparent": false,
  1946. "type": "graph",
  1947. "xaxis": {
  1948. "show": true
  1949. },
  1950. "yaxes": [{
  1951. "format": "none",
  1952. "label": "cores",
  1953. "logBase": 1,
  1954. "max": null,
  1955. "min": null,
  1956. "show": true
  1957. }, {
  1958. "format": "short",
  1959. "label": null,
  1960. "logBase": 1,
  1961. "max": null,
  1962. "min": null,
  1963. "show": false
  1964. }]
  1965. }, {
  1966. "aliasColors": {},
  1967. "bars": false,
  1968. "datasource": "${DS_PROMETHEUS}",
  1969. "decimals": 2,
  1970. "editable": true,
  1971. "error": false,
  1972. "fill": 0,
  1973. "grid": {
  1974. "threshold1": null,
  1975. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  1976. "threshold2": null,
  1977. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  1978. },
  1979. "id": 33,
  1980. "isNew": true,
  1981. "legend": {
  1982. "alignAsTable": true,
  1983. "avg": true,
  1984. "current": true,
  1985. "hideEmpty": true,
  1986. "hideZero": true,
  1987. "max": false,
  1988. "min": false,
  1989. "rightSide": true,
  1990. "show": true,
  1991. "sideWidth": null,
  1992. "sort": "current",
  1993. "sortDesc": true,
  1994. "total": false,
  1995. "values": true
  1996. },
  1997. "lines": true,
  1998. "linewidth": 2,
  1999. "links": [],
  2000. "nullPointMode": "null",
  2001. "percentage": false,
  2002. "pointradius": 5,
  2003. "points": false,
  2004. "renderer": "flot",
  2005. "seriesOverrides": [],
  2006. "span": 12,
  2007. "stack": false,
  2008. "steppedLine": false,
  2009. "targets": [{
  2010. "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)",
  2011. "interval": "",
  2012. "intervalFactor": 2,
  2013. "legendFormat": "{{ pod_name }}",
  2014. "metric": "",
  2015. "refId": "A",
  2016. "step": 240
  2017. }],
  2018. "timeFrom": null,
  2019. "timeShift": null,
  2020. "title": "Memory Working Set",
  2021. "tooltip": {
  2022. "msResolution": false,
  2023. "shared": false,
  2024. "sort": 2,
  2025. "value_type": "cumulative"
  2026. },
  2027. "type": "graph",
  2028. "xaxis": {
  2029. "show": true
  2030. },
  2031. "yaxes": [{
  2032. "format": "bytes",
  2033. "label": "used",
  2034. "logBase": 1,
  2035. "max": null,
  2036. "min": null,
  2037. "show": true
  2038. }, {
  2039. "format": "short",
  2040. "label": null,
  2041. "logBase": 1,
  2042. "max": null,
  2043. "min": null,
  2044. "show": false
  2045. }]
  2046. }, {
  2047. "aliasColors": {},
  2048. "bars": false,
  2049. "datasource": "${DS_PROMETHEUS}",
  2050. "decimals": 2,
  2051. "editable": true,
  2052. "error": false,
  2053. "fill": 1,
  2054. "grid": {
  2055. "threshold1": null,
  2056. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  2057. "threshold2": null,
  2058. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  2059. },
  2060. "id": 16,
  2061. "isNew": true,
  2062. "legend": {
  2063. "alignAsTable": true,
  2064. "avg": true,
  2065. "current": true,
  2066. "hideEmpty": true,
  2067. "hideZero": true,
  2068. "max": false,
  2069. "min": false,
  2070. "rightSide": true,
  2071. "show": true,
  2072. "sideWidth": 200,
  2073. "sort": "avg",
  2074. "sortDesc": true,
  2075. "total": false,
  2076. "values": true
  2077. },
  2078. "lines": true,
  2079. "linewidth": 2,
  2080. "links": [],
  2081. "nullPointMode": "null",
  2082. "percentage": false,
  2083. "pointradius": 5,
  2084. "points": false,
  2085. "renderer": "flot",
  2086. "seriesOverrides": [],
  2087. "span": 12,
  2088. "stack": false,
  2089. "steppedLine": false,
  2090. "targets": [{
  2091. "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)",
  2092. "interval": "",
  2093. "intervalFactor": 2,
  2094. "legendFormat": "{{ pod_name }} < in",
  2095. "metric": "network",
  2096. "refId": "A",
  2097. "step": 240
  2098. }, {
  2099. "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)",
  2100. "interval": "",
  2101. "intervalFactor": 2,
  2102. "legendFormat": "{{ pod_name }} > out",
  2103. "metric": "network",
  2104. "refId": "B",
  2105. "step": 240
  2106. }],
  2107. "timeFrom": null,
  2108. "timeShift": null,
  2109. "title": "Network",
  2110. "tooltip": {
  2111. "msResolution": false,
  2112. "shared": false,
  2113. "sort": 2,
  2114. "value_type": "cumulative"
  2115. },
  2116. "type": "graph",
  2117. "xaxis": {
  2118. "show": true
  2119. },
  2120. "yaxes": [{
  2121. "format": "Bps",
  2122. "label": "transmit / receive",
  2123. "logBase": 1,
  2124. "max": null,
  2125. "min": null,
  2126. "show": true
  2127. }, {
  2128. "format": "short",
  2129. "label": null,
  2130. "logBase": 1,
  2131. "max": null,
  2132. "min": null,
  2133. "show": false
  2134. }]
  2135. }, {
  2136. "aliasColors": {},
  2137. "bars": false,
  2138. "datasource": "${DS_PROMETHEUS}",
  2139. "decimals": 2,
  2140. "editable": true,
  2141. "error": false,
  2142. "fill": 1,
  2143. "grid": {
  2144. "threshold1": null,
  2145. "threshold1Color": "rgba(216, 200, 27, 0.27)",
  2146. "threshold2": null,
  2147. "threshold2Color": "rgba(234, 112, 112, 0.22)"
  2148. },
  2149. "id": 34,
  2150. "isNew": true,
  2151. "legend": {
  2152. "alignAsTable": true,
  2153. "avg": true,
  2154. "current": true,
  2155. "hideEmpty": true,
  2156. "hideZero": true,
  2157. "max": false,
  2158. "min": false,
  2159. "rightSide": true,
  2160. "show": true,
  2161. "sideWidth": 200,
  2162. "sort": "current",
  2163. "sortDesc": true,
  2164. "total": false,
  2165. "values": true
  2166. },
  2167. "lines": true,
  2168. "linewidth": 2,
  2169. "links": [],
  2170. "nullPointMode": "null",
  2171. "percentage": false,
  2172. "pointradius": 5,
  2173. "points": false,
  2174. "renderer": "flot",
  2175. "seriesOverrides": [],
  2176. "span": 12,
  2177. "stack": false,
  2178. "steppedLine": false,
  2179. "targets": [{
  2180. "expr": "sum(container_fs_usage_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)",
  2181. "interval": "",
  2182. "intervalFactor": 2,
  2183. "legendFormat": "{{ pod_name }}",
  2184. "metric": "network",
  2185. "refId": "A",
  2186. "step": 240
  2187. }],
  2188. "timeFrom": null,
  2189. "timeShift": null,
  2190. "title": "Filesystem",
  2191. "tooltip": {
  2192. "msResolution": false,
  2193. "shared": false,
  2194. "sort": 2,
  2195. "value_type": "cumulative"
  2196. },
  2197. "type": "graph",
  2198. "xaxis": {
  2199. "show": true
  2200. },
  2201. "yaxes": [{
  2202. "format": "bytes",
  2203. "label": "used",
  2204. "logBase": 1,
  2205. "max": null,
  2206. "min": null,
  2207. "show": true
  2208. }, {
  2209. "format": "short",
  2210. "label": null,
  2211. "logBase": 1,
  2212. "max": null,
  2213. "min": null,
  2214. "show": false
  2215. }]
  2216. }],
  2217. "showTitle": true,
  2218. "title": "each pod"
  2219. }],
  2220. "time": {
  2221. "from": "now-3d",
  2222. "to": "now"
  2223. },
  2224. "timepicker": {
  2225. "refresh_intervals": [
  2226. "5s",
  2227. "10s",
  2228. "30s",
  2229. "1m",
  2230. "5m",
  2231. "15m",
  2232. "30m",
  2233. "1h",
  2234. "2h",
  2235. "1d"
  2236. ],
  2237. "time_options": [
  2238. "5m",
  2239. "15m",
  2240. "1h",
  2241. "6h",
  2242. "12h",
  2243. "24h",
  2244. "2d",
  2245. "7d",
  2246. "30d"
  2247. ]
  2248. },
  2249. "templating": {
  2250. "list": [{
  2251. "allValue": ".*",
  2252. "current": {},
  2253. "datasource": "${DS_PROMETHEUS}",
  2254. "hide": 0,
  2255. "includeAll": true,
  2256. "label": "Instance",
  2257. "multi": false,
  2258. "name": "instance",
  2259. "options": [],
  2260. "query": "label_values(instance)",
  2261. "refresh": 1,
  2262. "regex": "",
  2263. "type": "query"
  2264. }, {
  2265. "current": {},
  2266. "datasource": "${DS_PROMETHEUS}",
  2267. "hide": 0,
  2268. "includeAll": true,
  2269. "label": "Namespace",
  2270. "multi": true,
  2271. "name": "namespace",
  2272. "options": [],
  2273. "query": "label_values(namespace)",
  2274. "refresh": 1,
  2275. "regex": "",
  2276. "type": "query"
  2277. }]
  2278. },
  2279. "annotations": {
  2280. "list": []
  2281. },
  2282. "refresh": false,
  2283. "schemaVersion": 12,
  2284. "version": 8,
  2285. "links": [],
  2286. "gnetId": 737
  2287. }
  2288. prometheus-datasource.json: |
  2289. {
  2290. "name": "prometheus",
  2291. "type": "prometheus",
  2292. "url": "http://prometheus:9090",
  2293. "access": "proxy",
  2294. "basicAuth": false
  2295. }
  2296. kind: ConfigMap
  2297. metadata:
  2298. creationTimestamp: null
  2299. name: grafana-import-dashboards
  2300. namespace: monitoring
  2301. ---
  2302. apiVersion: batch/v1
  2303. kind: Job
  2304. metadata:
  2305. name: grafana-import-dashboards
  2306. namespace: monitoring
  2307. labels:
  2308. app: grafana
  2309. component: import-dashboards
  2310. spec:
  2311. template:
  2312. metadata:
  2313. name: grafana-import-dashboards
  2314. labels:
  2315. app: grafana
  2316. component: import-dashboards
  2317. spec:
  2318. serviceAccountName: prometheus-k8s
  2319. initContainers:
  2320. - name: wait-for-grafana
  2321. image: giantswarm/tiny-tools
  2322. args:
  2323. - /bin/sh
  2324. - -c
  2325. - >
  2326. set -x;
  2327. while [ $(curl -sw '%{http_code}' "http://grafana:3000" -o /dev/null) -ne 200]; do
  2328. echo '.'
  2329. sleep 15;
  2330. done
  2331. containers:
  2332. - name: grafana-import-dashboards
  2333. image: giantswarm/tiny-tools
  2334. command: ["/bin/sh", "-c"]
  2335. workingDir: /opt/grafana-import-dashboards
  2336. args:
  2337. - >
  2338. for file in *-datasource.json ; do
  2339. if [ -e "$file" ] ; then
  2340. echo "importing $file" &&
  2341. curl --silent --fail --show-error \
  2342. --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/datasources \
  2343. --header "Content-Type: application/json" \
  2344. --data-binary "@$file" ;
  2345. echo "" ;
  2346. fi
  2347. done ;
  2348. for file in *-dashboard.json ; do
  2349. if [ -e "$file" ] ; then
  2350. echo "importing $file" &&
  2351. ( echo '{"dashboard":'; \
  2352. cat "$file"; \
  2353. echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \
  2354. | jq -c '.' \
  2355. | curl --silent --fail --show-error \
  2356. --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/dashboards/import \
  2357. --header "Content-Type: application/json" \
  2358. --data-binary "@-" ;
  2359. echo "" ;
  2360. fi
  2361. done
  2362.  
  2363. env:
  2364. - name: GF_ADMIN_USER
  2365. valueFrom:
  2366. secretKeyRef:
  2367. name: grafana
  2368. key: admin-username
  2369. - name: GF_ADMIN_PASSWORD
  2370. valueFrom:
  2371. secretKeyRef:
  2372. name: grafana
  2373. key: admin-password
  2374. volumeMounts:
  2375. - name: config-volume
  2376. mountPath: /opt/grafana-import-dashboards
  2377. restartPolicy: Never
  2378. volumes:
  2379. - name: config-volume
  2380. configMap:
  2381. name: grafana-import-dashboards
  2382. ---
  2383. # apiVersion: extensions/v1beta1
  2384. # kind: Ingress
  2385. # metadata:
  2386. # name: grafana
  2387. # namespace: monitoring
  2388. # spec:
  2389. # rules:
  2390. # - host: <yourchoice>.<cluster-id>.k8s.gigantic.io
  2391. # http:
  2392. # paths:
  2393. # - path: /
  2394. # backend:
  2395. # serviceName: grafana
  2396. # servicePort: 3000
  2397. ---
  2398. apiVersion: v1
  2399. kind: Secret
  2400. data:
  2401. admin-password: YWRtaW4=
  2402. admin-username: YWRtaW4=
  2403. metadata:
  2404. name: grafana
  2405. namespace: monitoring
  2406. type: Opaque
  2407. ---
  2408. apiVersion: v1
  2409. kind: Service
  2410. metadata:
  2411. name: grafana
  2412. namespace: monitoring
  2413. labels:
  2414. app: grafana
  2415. component: core
  2416. spec:
  2417. type: NodePort
  2418. ports:
  2419. - port: 3000
  2420. selector:
  2421. app: grafana
  2422. component: core
  2423. ---
  2424. apiVersion: v1
  2425. data:
  2426. prometheus.yaml: |
  2427. global:
  2428. scrape_interval: 10s
  2429. scrape_timeout: 10s
  2430. evaluation_interval: 10s
  2431. rule_files:
  2432. - "/etc/prometheus-rules/*.rules"
  2433. scrape_configs:
  2434.  
  2435. # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37
  2436. - job_name: 'kubernetes-nodes'
  2437. tls_config:
  2438. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  2439. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  2440. kubernetes_sd_configs:
  2441. - role: node
  2442. relabel_configs:
  2443. - source_labels: [__address__]
  2444. regex: '(.*):10250'
  2445. replacement: '${1}:10255'
  2446. target_label: __address__
  2447.  
  2448. # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79
  2449. - job_name: 'kubernetes-endpoints'
  2450. kubernetes_sd_configs:
  2451. - role: endpoints
  2452. relabel_configs:
  2453. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
  2454. action: keep
  2455. regex: true
  2456. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
  2457. action: replace
  2458. target_label: __scheme__
  2459. regex: (https?)
  2460. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
  2461. action: replace
  2462. target_label: __metrics_path__
  2463. regex: (.+)
  2464. - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
  2465. action: replace
  2466. target_label: __address__
  2467. regex: (.+)(?::\d+);(\d+)
  2468. replacement: $1:$2
  2469. - action: labelmap
  2470. regex: __meta_kubernetes_service_label_(.+)
  2471. - source_labels: [__meta_kubernetes_namespace]
  2472. action: replace
  2473. target_label: kubernetes_namespace
  2474. - source_labels: [__meta_kubernetes_service_name]
  2475. action: replace
  2476. target_label: kubernetes_name
  2477.  
  2478. # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119
  2479. - job_name: 'kubernetes-services'
  2480. metrics_path: /probe
  2481. params:
  2482. module: [http_2xx]
  2483. kubernetes_sd_configs:
  2484. - role: service
  2485. relabel_configs:
  2486. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
  2487. action: keep
  2488. regex: true
  2489. - source_labels: [__address__]
  2490. target_label: __param_target
  2491. - target_label: __address__
  2492. replacement: blackbox
  2493. - source_labels: [__param_target]
  2494. target_label: instance
  2495. - action: labelmap
  2496. regex: __meta_kubernetes_service_label_(.+)
  2497. - source_labels: [__meta_kubernetes_namespace]
  2498. target_label: kubernetes_namespace
  2499. - source_labels: [__meta_kubernetes_service_name]
  2500. target_label: kubernetes_name
  2501.  
  2502. # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156
  2503. - job_name: 'kubernetes-pods'
  2504. kubernetes_sd_configs:
  2505. - role: pod
  2506. relabel_configs:
  2507. - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
  2508. action: keep
  2509. regex: true
  2510. - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
  2511. action: replace
  2512. target_label: __metrics_path__
  2513. regex: (.+)
  2514. - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
  2515. action: replace
  2516. regex: (.+):(?:\d+);(\d+)
  2517. replacement: ${1}:${2}
  2518. target_label: __address__
  2519. - action: labelmap
  2520. regex: __meta_kubernetes_pod_label_(.+)
  2521. - source_labels: [__meta_kubernetes_namespace]
  2522. action: replace
  2523. target_label: kubernetes_namespace
  2524. - source_labels: [__meta_kubernetes_pod_name]
  2525. action: replace
  2526. target_label: kubernetes_pod_name
  2527. - source_labels: [__meta_kubernetes_pod_container_port_number]
  2528. action: keep
  2529. regex: 9\d{3}
  2530.  
  2531. - job_name: 'kubernetes-cadvisor'
  2532. scheme: https
  2533. tls_config:
  2534. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  2535. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  2536. kubernetes_sd_configs:
  2537. - role: node
  2538. relabel_configs:
  2539. - action: labelmap
  2540. - action: labelmap
  2541. regex: __meta_kubernetes_node_label_(.+)
  2542. - target_label: __address__
  2543. replacement: kubernetes.default.svc:443
  2544. - source_labels: [__meta_kubernetes_node_name]
  2545. regex: (.+)
  2546. target_label: __metrics_path__
  2547. replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
  2548.  
  2549. kind: ConfigMap
  2550. metadata:
  2551. creationTimestamp: null
  2552. name: prometheus-core
  2553. namespace: monitoring
  2554. ---
  2555. apiVersion: extensions/v1beta1
  2556. kind: Deployment
  2557. metadata:
  2558. name: prometheus-core
  2559. namespace: monitoring
  2560. labels:
  2561. app: prometheus
  2562. component: core
  2563. spec:
  2564. replicas: 1
  2565. template:
  2566. metadata:
  2567. name: prometheus-main
  2568. labels:
  2569. app: prometheus
  2570. component: core
  2571. spec:
  2572. serviceAccountName: prometheus-k8s
  2573. containers:
  2574. - name: prometheus
  2575. image: prom/prometheus:v1.7.0
  2576. args:
  2577. - '-storage.local.retention=12h'
  2578. - '-storage.local.memory-chunks=500000'
  2579. - '-config.file=/etc/prometheus/prometheus.yaml'
  2580. - '-alertmanager.url=http://alertmanager:9093/'
  2581. ports:
  2582. - name: webui
  2583. containerPort: 9090
  2584. resources:
  2585. requests:
  2586. cpu: 500m
  2587. memory: 500M
  2588. limits:
  2589. cpu: 500m
  2590. memory: 500M
  2591. volumeMounts:
  2592. - name: config-volume
  2593. mountPath: /etc/prometheus
  2594. - name: rules-volume
  2595. mountPath: /etc/prometheus-rules
  2596. volumes:
  2597. - name: config-volume
  2598. configMap:
  2599. name: prometheus-core
  2600. - name: rules-volume
  2601. configMap:
  2602. name: prometheus-rules
  2603. ---
  2604. apiVersion: extensions/v1beta1
  2605. kind: Deployment
  2606. metadata:
  2607. name: kube-state-metrics
  2608. namespace: monitoring
  2609. spec:
  2610. replicas: 1
  2611. template:
  2612. metadata:
  2613. labels:
  2614. app: kube-state-metrics
  2615. spec:
  2616. serviceAccountName: kube-state-metrics
  2617. containers:
  2618. - name: kube-state-metrics
  2619. image: gcr.io/google_containers/kube-state-metrics:v0.5.0
  2620. ports:
  2621. - containerPort: 8080
  2622. ---
  2623. # ---
  2624. # apiVersion: rbac.authorization.k8s.io/v1beta1
  2625. # kind: ClusterRoleBinding
  2626. # metadata:
  2627. # name: kube-state-metrics
  2628. # roleRef:
  2629. # apiGroup: rbac.authorization.k8s.io
  2630. # kind: ClusterRole
  2631. # name: kube-state-metrics
  2632. # subjects:
  2633. # - kind: ServiceAccount
  2634. # name: kube-state-metrics
  2635. # namespace: monitoring
  2636. # ---
  2637. # apiVersion: rbac.authorization.k8s.io/v1beta1
  2638. # kind: ClusterRole
  2639. # metadata:
  2640. # name: kube-state-metrics
  2641. # rules:
  2642. # - apiGroups: [""]
  2643. # resources:
  2644. # - nodes
  2645. # - pods
  2646. # - services
  2647. # - resourcequotas
  2648. # - replicationcontrollers
  2649. # - limitranges
  2650. # verbs: ["list", "watch"]
  2651. # - apiGroups: ["extensions"]
  2652. # resources:
  2653. # - daemonsets
  2654. # - deployments
  2655. # - replicasets
  2656. # verbs: ["list", "watch"]
  2657. # ---
  2658. apiVersion: v1
  2659. kind: ServiceAccount
  2660. metadata:
  2661. name: kube-state-metrics
  2662. namespace: monitoring
  2663. ---
  2664. apiVersion: v1
  2665. kind: Service
  2666. metadata:
  2667. annotations:
  2668. prometheus.io/scrape: 'true'
  2669. name: kube-state-metrics
  2670. namespace: monitoring
  2671. labels:
  2672. app: kube-state-metrics
  2673. spec:
  2674. ports:
  2675. - name: kube-state-metrics
  2676. port: 8080
  2677. protocol: TCP
  2678. selector:
  2679. app: kube-state-metrics
  2680.  
  2681. ---
  2682. apiVersion: extensions/v1beta1
  2683. kind: DaemonSet
  2684. metadata:
  2685. name: node-directory-size-metrics
  2686. namespace: monitoring
  2687. annotations:
  2688. description: |
  2689. This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes.
  2690. The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
  2691. The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus.
  2692. These are scheduled on every node in the Kubernetes cluster.
  2693. To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`.
  2694. spec:
  2695. template:
  2696. metadata:
  2697. labels:
  2698. app: node-directory-size-metrics
  2699. annotations:
  2700. prometheus.io/scrape: 'true'
  2701. prometheus.io/port: '9102'
  2702. description: |
  2703. This `Pod` provides metrics in Prometheus format about disk usage on the node.
  2704. The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
  2705. The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus.
  2706. This `Pod` is scheduled on every node in the Kubernetes cluster.
  2707. To choose directories from the node to check just mount them on `read-du` below `/mnt`.
  2708. spec:
  2709. containers:
  2710. - name: read-du
  2711. image: giantswarm/tiny-tools
  2712. imagePullPolicy: Always
  2713. # FIXME threshold via env var
  2714. # The
  2715. command:
  2716. - fish
  2717. - --command
  2718. - |
  2719. touch /tmp/metrics-temp
  2720. while true
  2721. for directory in (du --bytes --separate-dirs --threshold=100M /mnt)
  2722. echo $directory | read size path
  2723. echo "node_directory_size_bytes{path=\"$path\"} $size" \
  2724. >> /tmp/metrics-temp
  2725. end
  2726. mv /tmp/metrics-temp /tmp/metrics
  2727. sleep 300
  2728. end
  2729. volumeMounts:
  2730. - name: host-fs-var
  2731. mountPath: /mnt/var
  2732. readOnly: true
  2733. - name: metrics
  2734. mountPath: /tmp
  2735. - name: caddy
  2736. image: dockermuenster/caddy:0.9.3
  2737. command:
  2738. - "caddy"
  2739. - "-port=9102"
  2740. - "-root=/var/www"
  2741. ports:
  2742. - containerPort: 9102
  2743. volumeMounts:
  2744. - name: metrics
  2745. mountPath: /var/www
  2746. volumes:
  2747. - name: host-fs-var
  2748. hostPath:
  2749. path: /var
  2750. - name: metrics
  2751. emptyDir:
  2752. medium: Memory
  2753. ---
  2754. apiVersion: extensions/v1beta1
  2755. kind: DaemonSet
  2756. metadata:
  2757. name: prometheus-node-exporter
  2758. namespace: monitoring
  2759. labels:
  2760. app: prometheus
  2761. component: node-exporter
  2762. spec:
  2763. template:
  2764. metadata:
  2765. name: prometheus-node-exporter
  2766. labels:
  2767. app: prometheus
  2768. component: node-exporter
  2769. spec:
  2770. containers:
  2771. - image: prom/node-exporter:v0.14.0
  2772. name: prometheus-node-exporter
  2773. ports:
  2774. - name: prom-node-exp
  2775. #^ must be an IANA_SVC_NAME (at most 15 characters, ..)
  2776. containerPort: 9100
  2777. hostPort: 9100
  2778. hostNetwork: true
  2779. hostPID: true
  2780. ---
  2781. apiVersion: v1
  2782. kind: Service
  2783. metadata:
  2784. annotations:
  2785. prometheus.io/scrape: 'true'
  2786. name: prometheus-node-exporter
  2787. namespace: monitoring
  2788. labels:
  2789. app: prometheus
  2790. component: node-exporter
  2791. spec:
  2792. clusterIP: None
  2793. ports:
  2794. - name: prometheus-node-exporter
  2795. port: 9100
  2796. protocol: TCP
  2797. selector:
  2798. app: prometheus
  2799. component: node-exporter
  2800. type: ClusterIP
  2801. ---
  2802. apiVersion: v1
  2803. data:
  2804. cpu-usage.rules: |
  2805. ALERT NodeCPUUsage
  2806. IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75
  2807. FOR 2m
  2808. LABELS {
  2809. severity="page"
  2810. }
  2811. ANNOTATIONS {
  2812. SUMMARY = "{{$labels.instance}}: High CPU usage detected",
  2813. DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})"
  2814. }
  2815. instance-availability.rules: |
  2816. ALERT InstanceDown
  2817. IF up == 0
  2818. FOR 1m
  2819. LABELS { severity = "page" }
  2820. ANNOTATIONS {
  2821. summary = "Instance {{ $labels.instance }} down",
  2822. description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
  2823. }
  2824. low-disk-space.rules: |
  2825. ALERT NodeLowRootDisk
  2826. IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75
  2827. FOR 2m
  2828. LABELS {
  2829. severity="page"
  2830. }
  2831. ANNOTATIONS {
  2832. SUMMARY = "{{$labels.instance}}: Low root disk space",
  2833. DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})"
  2834. }
  2835.  
  2836. ALERT NodeLowDataDisk
  2837. IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75
  2838. FOR 2m
  2839. LABELS {
  2840. severity="page"
  2841. }
  2842. ANNOTATIONS {
  2843. SUMMARY = "{{$labels.instance}}: Low data disk space",
  2844. DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})"
  2845. }
  2846. mem-usage.rules: |
  2847. ALERT NodeSwapUsage
  2848. IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75
  2849. FOR 2m
  2850. LABELS {
  2851. severity="page"
  2852. }
  2853. ANNOTATIONS {
  2854. SUMMARY = "{{$labels.instance}}: Swap usage detected",
  2855. DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})"
  2856. }
  2857.  
  2858. ALERT NodeMemoryUsage
  2859. IF (((node_memory_MemTotal-node_memory_MemFree-node_memory_Cached)/(node_memory_MemTotal)*100)) > 75
  2860. FOR 2m
  2861. LABELS {
  2862. severity="page"
  2863. }
  2864. ANNOTATIONS {
  2865. SUMMARY = "{{$labels.instance}}: High memory usage detected",
  2866. DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})"
  2867. }
  2868. kind: ConfigMap
  2869. metadata:
  2870. creationTimestamp: null
  2871. name: prometheus-rules
  2872. namespace: monitoring
  2873. ---
  2874. apiVersion: v1
  2875. kind: Service
  2876. metadata:
  2877. name: prometheus
  2878. namespace: monitoring
  2879. labels:
  2880. app: prometheus
  2881. component: core
  2882. annotations:
  2883. prometheus.io/scrape: 'true'
  2884. spec:
  2885. type: NodePort
  2886. ports:
  2887. - port: 9090
  2888. protocol: TCP
  2889. name: webui
  2890. selector:
  2891. app: prometheus
  2892. component: core
Add Comment
Please, Sign In to add comment