From a1f608ed7454aec467fabfd7fe4c785396ce1d56 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Tue, 28 Nov 2017 17:27:59 -0600 Subject: [PATCH] Add Grafana chart to OSH infra Moves the grafana chart to OSH infra along with basic rbac rules that may be tightened with future work. Change-Id: Ie14627530a73d4b7b01eb93ca5f7174d99d9caec --- grafana/Chart.yaml | 24 + grafana/requirements.yaml | 18 + grafana/templates/bin/_datasource.sh.tpl | 24 + grafana/templates/bin/_grafana.sh.tpl | 29 + grafana/templates/configmap-bin.yaml | 31 + grafana/templates/configmap-etc.yaml | 31 + grafana/templates/deployment.yaml | 107 + grafana/templates/ingress-grafana.yaml | 60 + grafana/templates/job-image-repo-sync.yaml | 68 + .../templates/job-prometheus-datasource.yaml | 71 + grafana/templates/secret-admin-creds.yaml | 28 + grafana/templates/service-ingress.yaml | 32 + grafana/templates/service.yaml | 36 + grafana/values.yaml | 11973 ++++++++++++++++ tools/gate/chart-deploys/default.yaml | 15 + 15 files changed, 12547 insertions(+) create mode 100644 grafana/Chart.yaml create mode 100644 grafana/requirements.yaml create mode 100644 grafana/templates/bin/_datasource.sh.tpl create mode 100644 grafana/templates/bin/_grafana.sh.tpl create mode 100644 grafana/templates/configmap-bin.yaml create mode 100644 grafana/templates/configmap-etc.yaml create mode 100644 grafana/templates/deployment.yaml create mode 100644 grafana/templates/ingress-grafana.yaml create mode 100644 grafana/templates/job-image-repo-sync.yaml create mode 100644 grafana/templates/job-prometheus-datasource.yaml create mode 100644 grafana/templates/secret-admin-creds.yaml create mode 100644 grafana/templates/service-ingress.yaml create mode 100644 grafana/templates/service.yaml create mode 100644 grafana/values.yaml diff --git a/grafana/Chart.yaml b/grafana/Chart.yaml new file mode 100644 index 000000000..bb5921771 --- /dev/null +++ b/grafana/Chart.yaml @@ -0,0 +1,24 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +description: OpenStack-Helm Grafana +name: grafana +version: 0.1.0 +home: https://grafana.com/ +sources: + - https://github.com/grafana/grafana + - https://git.openstack.org/cgit/openstack/openstack-helm-addons +maintainers: + - name: OpenStack-Helm Authors diff --git a/grafana/requirements.yaml b/grafana/requirements.yaml new file mode 100644 index 000000000..53782e69b --- /dev/null +++ b/grafana/requirements.yaml @@ -0,0 +1,18 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dependencies: + - name: helm-toolkit + repository: http://localhost:8879/charts + version: 0.1.0 diff --git a/grafana/templates/bin/_datasource.sh.tpl b/grafana/templates/bin/_datasource.sh.tpl new file mode 100644 index 000000000..4db9ec2cd --- /dev/null +++ b/grafana/templates/bin/_datasource.sh.tpl @@ -0,0 +1,24 @@ +#!/bin/bash +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex + +exec curl "http://${GF_SECURITY_ADMIN_USER}:${GF_SECURITY_ADMIN_PASSWORD}@${GRAFANA_SERVICE}:{{ .Values.network.grafana.port }}/api/datasources" \ + -H "Content-Type: application/json;charset=UTF-8" --data-binary \ + {{- with .Values.conf.datasource }} + "{\"name\":\"{{ .name }}\",\"type\":\"{{ .type }}\",\"url\":\"$PROMETHEUS_URL\",\"database\":\"{{ .database }}\",\"jsonData\":{ {{ .jsonData }} },\"access\":\"{{ .access }}\",\"isDefault\":{{ .isDefault }}}" + {{- end }} diff --git a/grafana/templates/bin/_grafana.sh.tpl b/grafana/templates/bin/_grafana.sh.tpl new file mode 100644 index 000000000..5213591fa --- /dev/null +++ b/grafana/templates/bin/_grafana.sh.tpl @@ -0,0 +1,29 @@ +#!/bin/bash +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex +COMMAND="${@:-start}" + +function start () { + exec /usr/sbin/grafana-server -homepath=/usr/share/grafana -config=/etc/grafana/grafana.ini +} + +function stop () { + kill -TERM 1 +} + +$COMMAND diff --git a/grafana/templates/configmap-bin.yaml b/grafana/templates/configmap-bin.yaml new file mode 100644 index 000000000..e107bbbfe --- /dev/null +++ b/grafana/templates/configmap-bin.yaml @@ -0,0 +1,31 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_bin }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-bin +data: + image-repo-sync.sh: |+ +{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} + datasource.sh: | +{{ tuple "bin/_datasource.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + grafana.sh: | +{{ tuple "bin/_grafana.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} +{{- end }} diff --git a/grafana/templates/configmap-etc.yaml b/grafana/templates/configmap-etc.yaml new file mode 100644 index 000000000..db42d493e --- /dev/null +++ b/grafana/templates/configmap-etc.yaml @@ -0,0 +1,31 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_etc }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-etc +data: + grafana.ini: |+ +{{ include "helm-toolkit.utils.to_ini" .Values.conf.grafana | indent 4 }} +{{ range $key, $value := .Values.conf.dashboards }} + {{$key}}.json: |+ +{{ toJson $value | indent 4 }} +{{ end }} +{{- end }} diff --git a/grafana/templates/deployment.yaml b/grafana/templates/deployment.yaml new file mode 100644 index 000000000..2551856a8 --- /dev/null +++ b/grafana/templates/deployment.yaml @@ -0,0 +1,107 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.deployment }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.grafana .Values.conditional_dependencies.local_image_registry) -}} +{{- else -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.grafana -}} +{{- end -}} + +{{- $mounts_grafana := .Values.pod.mounts.grafana.grafana }} + +{{- $serviceAccountName := "grafana" }} +{{ tuple $envAll $envAll.Values.pod_dependency $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: grafana +spec: + replicas: {{ .Values.pod.replicas.grafana }} +{{ tuple $envAll | include "helm-toolkit.snippets.kubernetes_upgrades_deployment" | indent 2 }} + template: + metadata: + labels: +{{ tuple $envAll "grafana" "dashboard" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + annotations: + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} + configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }} + spec: + serviceAccountName: {{ $serviceAccountName }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: grafana +{{ tuple $envAll "grafana" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.grafana | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + command: + - /tmp/grafana.sh + - start + ports: + - name: dashboard + containerPort: {{ .Values.network.grafana.port }} + readinessProbe: + httpGet: + path: /login + port: 3000 + initialDelaySeconds: 30 + timeoutSeconds: 30 + env: + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: grafana-admin-creds + key: GRAFANA_ADMIN_USERNAME + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana-admin-creds + key: GRAFANA_ADMIN_PASSWORD + volumeMounts: + - name: pod-etc-grafana + mountPath: /etc/grafana + - name: grafana-bin + mountPath: /tmp/grafana.sh + subPath: grafana.sh + readOnly: true + - name: grafana-etc + mountPath: /etc/grafana/grafana.ini + subPath: grafana.ini + - name: data + mountPath: /var/lib/grafana/data + {{- range $key, $value := .Values.conf.dashboards }} + - name: grafana-etc + mountPath: /var/lib/grafana/dashboards/{{$key}}.json + subPath: {{$key}}.json + {{- end }} +{{ if $mounts_grafana.volumeMounts }}{{ toYaml $mounts_grafana.volumeMounts | indent 10 }}{{ end }} + volumes: + - name: pod-etc-grafana + emptyDir: {} + - name: grafana-bin + configMap: + name: grafana-bin + defaultMode: 0555 + - name: grafana-etc + configMap: + name: grafana-etc + defaultMode: 0444 + - name: data + emptyDir: {} +{{ if $mounts_grafana.volumes }}{{ toYaml $mounts_grafana.volumes | indent 8 }}{{ end }} +{{- end }} diff --git a/grafana/templates/ingress-grafana.yaml b/grafana/templates/ingress-grafana.yaml new file mode 100644 index 000000000..43d6a6216 --- /dev/null +++ b/grafana/templates/ingress-grafana.yaml @@ -0,0 +1,60 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.ingress }} +{{- $envAll := . }} +{{- if .Values.network.grafana.ingress.public }} +{{- $backendServiceType := "grafana" }} +{{- $backendPort := "dashboard" }} +{{- $ingressName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $backendName := tuple $backendServiceType "internal" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $hostName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $hostNameNamespaced := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_namespaced_endpoint_lookup" }} +{{- $hostNameFull := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }} +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: {{ $ingressName }} + annotations: + kubernetes.io/ingress.class: "nginx" + ingress.kubernetes.io/rewrite-target: / + ingress.kubernetes.io/proxy-body-size: {{ .Values.network.grafana.ingress.proxy_body_size }} +spec: + rules: +{{ if ne $hostNameNamespaced $hostNameFull }} +{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced $hostNameFull }} + - host: {{ $vHost }} + http: + paths: + - path: / + backend: + serviceName: {{ $backendName }} + servicePort: {{ $backendPort }} +{{- end }} +{{- else }} +{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced }} + - host: {{ $vHost }} + http: + paths: + - path: / + backend: + serviceName: {{ $backendName }} + servicePort: {{ $backendPort }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/grafana/templates/job-image-repo-sync.yaml b/grafana/templates/job-image-repo-sync.yaml new file mode 100644 index 000000000..55a994c55 --- /dev/null +++ b/grafana/templates/job-image-repo-sync.yaml @@ -0,0 +1,68 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_image_repo_sync }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}} + +{{- $serviceAccountName := "grafana-image-repo-sync" }} +{{ tuple $envAll $envAll.Values.pod_dependency $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: grafana-image-repo-sync +spec: + template: + metadata: + labels: +{{ tuple $envAll "grafana" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + serviceAccountName: {{ $serviceAccountName }} + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: image-repo-sync +{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: LOCAL_REPO + value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}" + - name: IMAGE_SYNC_LIST + value: "{{ include "helm-toolkit.utils.image_sync_list" . }}" + command: + - /tmp/image-repo-sync.sh + volumeMounts: + - name: grafana-bin + mountPath: /tmp/image-repo-sync.sh + subPath: image-repo-sync.sh + readOnly: true + - name: docker-socket + mountPath: /var/run/docker.sock + volumes: + - name: grafana-bin + configMap: + name: grafana-bin + defaultMode: 0555 + - name: docker-socket + hostPath: + path: /var/run/docker.sock +{{- end }} +{{- end }} diff --git a/grafana/templates/job-prometheus-datasource.yaml b/grafana/templates/job-prometheus-datasource.yaml new file mode 100644 index 000000000..45221f555 --- /dev/null +++ b/grafana/templates/job-prometheus-datasource.yaml @@ -0,0 +1,71 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_datasource }} +{{- $envAll := . }} + +{{- $_ := set .Values "pod_dependency" .Values.dependencies.register_datasource -}} +{{- $serviceAccountName := "grafana-register-datasource" }} +{{ tuple $envAll $envAll.Values.pod_dependency $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: grafana-register-datasource +spec: + template: + metadata: + labels: +{{ tuple $envAll "grafana" "datasource" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + serviceAccountName: {{ $serviceAccountName }} + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.jobs.node_selector_key }}: {{ .Values.labels.jobs.node_selector_value }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: grafana-datasource +{{ tuple $envAll "datasource" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.datasource | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + command: + - /tmp/datasource.sh + env: + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: grafana-admin-creds + key: GRAFANA_ADMIN_USERNAME + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana-admin-creds + key: GRAFANA_ADMIN_PASSWORD + - name: GRAFANA_SERVICE + value: {{ tuple "grafana" "internal" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} + - name: PROMETHEUS_URL + value: {{ tuple "monitoring" "internal" "api" $envAll | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" }} + volumeMounts: + - name: grafana-bin + mountPath: /tmp/datasource.sh + subPath: datasource.sh + readOnly: true + volumes: + - name: grafana-bin + configMap: + name: grafana-bin + defaultMode: 0555 +{{- end }} diff --git a/grafana/templates/secret-admin-creds.yaml b/grafana/templates/secret-admin-creds.yaml new file mode 100644 index 000000000..2cb168d47 --- /dev/null +++ b/grafana/templates/secret-admin-creds.yaml @@ -0,0 +1,28 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.secret_admin }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: grafana-admin-creds +type: Opaque +data: + GRAFANA_ADMIN_PASSWORD: {{ .Values.endpoints.grafana.auth.admin.password | b64enc }} + GRAFANA_ADMIN_USERNAME: {{ .Values.endpoints.grafana.auth.admin.username | b64enc }} +{{- end }} diff --git a/grafana/templates/service-ingress.yaml b/grafana/templates/service-ingress.yaml new file mode 100644 index 000000000..5dbb337dd --- /dev/null +++ b/grafana/templates/service-ingress.yaml @@ -0,0 +1,32 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service_ingress }} +{{- $envAll := . }} +{{- if .Values.network.grafana.ingress.public }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "grafana" "public" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +spec: + ports: + - name: http + port: 80 + selector: + app: ingress-api +{{- end }} +{{- end }} diff --git a/grafana/templates/service.yaml b/grafana/templates/service.yaml new file mode 100644 index 000000000..3255f7ae3 --- /dev/null +++ b/grafana/templates/service.yaml @@ -0,0 +1,36 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "grafana" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +spec: + ports: + - name: dashboard + port: {{ .Values.network.grafana.port }} + {{ if .Values.network.grafana.node_port.enabled }} + nodePort: {{ .Values.network.grafana.node_port.port }} + {{ end }} + selector: +{{ tuple $envAll "grafana" "dashboard" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + {{ if .Values.network.grafana.node_port.enabled }} + type: NodePort + {{ end }} +{{- end }} diff --git a/grafana/values.yaml b/grafana/values.yaml new file mode 100644 index 000000000..72c800324 --- /dev/null +++ b/grafana/values.yaml @@ -0,0 +1,11973 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for grafana +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +images: + tags: + grafana: docker.io/grafana/grafana:4.5.2 + datasource: docker.io/kolla/ubuntu-source-heat-engine:3.0.3 + dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1 + image_repo_sync: docker.io/docker:17.07.0 + pull_policy: IfNotPresent + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync + +labels: + jobs: + node_selector_key: openstack-control-plane + node_selector_value: enabled + +pod: + affinity: + anti: + type: + default: preferredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname + mounts: + grafana: + init_container: null + grafana: + replicas: + grafana: 1 + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 1 + max_surge: 3 + termination_grace_period: + grafana: + timeout: 600 + resources: + enabled: false + jobs: + image_repo_sync: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + bootstrap: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + grafana: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + +endpoints: + cluster_domain_suffix: cluster.local + grafana: + name: grafana + namespace: null + auth: + admin: + username: admin + password: admin + hosts: + default: grafana-dashboard + public: grafana + host_fqdn_override: + default: null + path: + default: null + scheme: + default: http + port: + grafana: + default: 3000 + monitoring: + name: prometheus + namespace: null + hosts: + default: prom-metrics + public: prometheus + host_fqdn_override: + default: null + path: + default: null + scheme: + default: http + port: + api: + default: 9090 + public: 80 + +dependencies: + register_datasource: + jobs: + services: + - service: grafana + endpoint: internal + grafana: + services: null + image_repo_sync: + services: + - service: local_image_registry + endpoint: internal + +conditional_dependencies: + local_image_registry: + jobs: + - grafana-image-repo-sync + services: + - service: local_image_registry + endpoint: node + +network: + grafana: + port: 3000 + node_port: + enabled: false + port: 30902 + ingress: + public: true + proxy_body_size: 1024M + + +manifests: + configmap_bin: true + configmap_dashboards: true + configmap_etc: true + deployment: true + ingress: true + job_datasource: true + job_image_repo_sync: true + secret_admin: true + service: true + service_ingress: true + +conf: + datasource: + name: prometheus + type: prometheus + database: + access: proxy + isDefault: true + grafana: + paths: + data: /var/lib/grafana/data + plugins: /var/lib/grafana/plugins + server: + protocol: http + http_port: 3000 + session: + provider: file + provider_config: sessions + cookie_name: grafana_sess + cookie_secure: false + session_life_time: 86400 + security: + admin_user: ${GF_SECURITY_ADMIN_USER} + admin_password: ${GF_SECURITY_ADMIN_PASSWORD} + cookie_username: grafana_user + cookie_remember_name: grafana_remember + login_remember_days: 7 + users: + allow_sign_up: false + allow_org_create: false + auto_assign_org: true + auto_assign_org_role: Admin + default_theme: dark + log: + mode: console + level: info + log.console: + level: info + format: console + dashboards.json: + enabled: true + path: /var/lib/grafana/dashboards + grafana_net: + url: https://grafana.net + dashboards: + ceph_cluster: + __inputs: + - name: prometheus + label: Prometheus + description: Prometheus.IO + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: panel + id: graph + name: Graph + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + id: + title: Ceph - Cluster + tags: + - ceph + - cluster + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 150px + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 21 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: count(ceph_health_status) + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '0,1' + title: Status + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + - op: "=" + text: WARNING + value: '0' + - op: "=" + text: HEALTHY + value: '1' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 14 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_monitor_quorum_count + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '2,3' + title: Monitors In Quorum + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 22 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: count(ceph_pool_available_bytes) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '' + title: Pools + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 33 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: ceph_cluster_capacity_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: 0.025,0.1 + title: Cluster Capacity + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 34 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: ceph_cluster_used_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: 0.025,0.1 + title: Used Capacity + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percentunit + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 23 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_cluster_available_bytes/ceph_cluster_capacity_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '70,80' + title: Available Capacity + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + title: New row + - collapse: false + editable: true + height: 100px + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 26 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osds_in + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '' + title: OSDs IN + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 40, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 27 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osds - ceph_osds_in + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '1,1' + title: OSDs OUT + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 28 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum(ceph_osd_up) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '' + title: OSDs UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 40, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 29 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osds_down + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '1,1' + title: OSDs DOWN + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 30 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: avg(ceph_osd_pgs) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '250,300' + title: Agerage PGs per OSD + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: s + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 31 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: avg(ceph_osd_perf_apply_latency_seconds) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: 0.01,0.05 + title: Agerage OSD Apply Latency + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: s + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 32 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: avg(ceph_osd_perf_commit_latency_seconds) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: 0.01,0.05 + title: Agerage OSD Commit Latency + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: s + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 24 + interval: 1m + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + repeat: + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: true + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: avg(ceph_monitor_latency_seconds) + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '70,80' + title: Average Monitor Latency + transparent: false + type: singlestat + valueFontSize: 100% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + title: New row + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: + Available: "#EAB839" + Total Capacity: "#447EBC" + Used: "#BF1B00" + total_avail: "#6ED0E0" + total_space: "#7EB26D" + total_used: "#890F02" + bars: false + datasource: prometheus + editable: true + error: false + fill: 4 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '300' + id: 1 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 0 + links: [] + minSpan: + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: Total Capacity + fill: 0 + linewidth: 3 + stack: false + span: 4 + stack: true + steppedLine: false + targets: + - expr: ceph_cluster_available_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Available + refId: A + step: 60 + - expr: ceph_cluster_used_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Used + refId: B + step: 60 + - expr: ceph_cluster_capacity_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Total Capacity + refId: C + step: 60 + timeFrom: + timeShift: + title: Capacity + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: + Total Capacity: "#7EB26D" + Used: "#BF1B00" + total_avail: "#6ED0E0" + total_space: "#7EB26D" + total_used: "#890F02" + bars: false + datasource: prometheus + decimals: 0 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: false + height: '300' + id: 3 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + minSpan: + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: true + steppedLine: false + targets: + - expr: ceph_client_io_write_ops + interval: "$interval" + intervalFactor: 1 + legendFormat: Write + refId: A + step: 60 + - expr: ceph_client_io_read_ops + interval: "$interval" + intervalFactor: 1 + legendFormat: Read + refId: B + step: 60 + timeFrom: + timeShift: + title: IOPS + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: none + label: '' + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '300' + id: 7 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: true + steppedLine: false + targets: + - expr: ceph_client_io_write_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Write + refId: A + step: 60 + - expr: ceph_client_io_read_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Read + refId: B + step: 60 + timeFrom: + timeShift: + title: Throughput + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + repeat: + showTitle: true + title: CLUSTER + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 18 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + stack: false + span: 12 + stack: true + steppedLine: false + targets: + - expr: ceph_cluster_objects + interval: "$interval" + intervalFactor: 1 + legendFormat: Total + refId: A + step: 60 + - expr: ceph_degraded_objects + interval: "$interval" + intervalFactor: 1 + legendFormat: Degraded + refId: B + step: 60 + - expr: ceph_misplaced_objects + interval: "$interval" + intervalFactor: 1 + legendFormat: Misplaced + refId: C + step: 60 + timeFrom: + timeShift: + title: Objects in the Cluster + tooltip: + msResolution: false + shared: true + sort: 1 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 19 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + stack: false + span: 6 + stack: true + steppedLine: false + targets: + - expr: sum(ceph_osd_pgs) + interval: "$interval" + intervalFactor: 1 + legendFormat: Total + refId: A + step: 60 + - expr: ceph_degraded_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Degraded + refId: B + step: 60 + - expr: ceph_stale_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Stale + refId: C + step: 60 + - expr: ceph_unclean_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Unclean + refId: D + step: 60 + - expr: ceph_undersized_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Undersized + refId: E + step: 60 + - expr: ceph_stuck_degraded_pgs + ceph_stuck_stale_pgs + ceph_stuck_unclean_pgs + + ceph_stuck_undersized_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Stuck + refId: F + step: 60 + timeFrom: + timeShift: + title: PGs + tooltip: + msResolution: false + shared: true + sort: 1 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 20 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + stack: false + span: 6 + stack: true + steppedLine: false + targets: + - expr: ceph_stuck_degraded_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Degraded + refId: F + step: 60 + - expr: ceph_stuck_stale_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Stale + refId: A + step: 60 + - expr: ceph_stuck_unclean_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Unclean + refId: B + step: 60 + - expr: ceph_stuck_undersized_pgs + interval: "$interval" + intervalFactor: 1 + legendFormat: Undersized + refId: C + step: 60 + timeFrom: + timeShift: + title: Stuck PGs + tooltip: + msResolution: false + shared: true + sort: 1 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + title: New row + - collapse: false + editable: true + height: 150px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 15 + isNew: true + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: ceph_recovery_io_bytes + interval: "$interval" + intervalFactor: 1 + legendFormat: Bytes + refId: A + step: 60 + timeFrom: + timeShift: + title: Bytes + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 16 + isNew: true + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^.*/" + color: "#E0752D" + span: 4 + stack: false + steppedLine: false + targets: + - expr: ceph_recovery_io_keys + interval: "$interval" + intervalFactor: 1 + legendFormat: Keys + refId: A + step: 60 + timeFrom: + timeShift: + title: Keys + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 17 + isNew: true + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^.*$/" + color: "#890F02" + span: 4 + stack: false + steppedLine: false + targets: + - expr: ceph_recovery_io_objects + interval: "$interval" + intervalFactor: 1 + legendFormat: Objects + refId: A + step: 60 + timeFrom: + timeShift: + title: Objects + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + showTitle: true + title: Recovery + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - auto: true + auto_count: 10 + auto_min: 1m + current: + tags: [] + text: 1m + value: 1m + datasource: + hide: 0 + includeAll: false + label: Interval + multi: false + name: interval + options: + - selected: false + text: auto + value: "$__auto_interval" + - selected: true + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 0 + type: interval + annotations: + list: [] + refresh: 1m + schemaVersion: 12 + version: 26 + links: [] + gnetId: 917 + description: "Ceph Cluster overview.\r\n" + ceph_osd: + __inputs: + - name: prometheus + label: Prometheus + description: Prometheus.IO + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: panel + id: graph + name: Graph + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + id: + title: Ceph - OSD + tags: + - ceph + - osd + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 100px + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 40, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 6 + interval: + isNew: true + links: [] + mappingType: 2 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + - from: '0' + text: DOWN + to: '0.99' + - from: '0.99' + text: UP + to: '1' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osd_up{osd="$osd"} + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '0,1' + timeFrom: + title: Status + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: DOWN + value: '0' + - op: "=" + text: UP + value: '1' + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 40, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 8 + interval: + isNew: true + links: [] + mappingType: 2 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + - from: '0' + text: OUT + to: '0.99' + - from: '0.99' + text: IN + to: '1' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osd_in{osd="$osd"} + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '0,1' + timeFrom: + title: Available + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: DOWN + value: '0' + - op: "=" + text: UP + value: '1' + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 10 + interval: + isNew: true + links: [] + mappingType: 2 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ceph_osds + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '0,1' + timeFrom: + title: Total OSDs + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: DOWN + value: '0' + - op: "=" + text: UP + value: '1' + - op: "=" + text: N/A + value: 'null' + valueName: current + title: New row + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: 250 + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: 300 + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: true + id: 5 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Average.*/" + fill: 0 + stack: false + span: 10 + stack: true + steppedLine: false + targets: + - expr: ceph_osd_pgs{osd=~"$osd"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Number of PGs - {{ osd }} + refId: A + step: 60 + - expr: avg(ceph_osd_pgs) + interval: "$interval" + intervalFactor: 1 + legendFormat: Average Number of PGs in the Cluster + refId: B + step: 60 + timeFrom: + timeShift: + title: PGs + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: true + targets: + - expr: ceph_osd_utilization{osd="$osd"} + interval: "$interval" + intervalFactor: 1 + legendFormat: '' + refId: A + step: 60 + thresholds: '60,80' + timeFrom: + title: Utilization + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + showTitle: true + title: 'OSD: $osd' + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: false + id: 4 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: false + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 2 + points: true + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: ceph_osd_perf_apply_latency_seconds{osd=~"$osd"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Apply Latency (s) - {{ osd }} + refId: A + step: 60 + - expr: ceph_osd_perf_commit_latency_seconds{osd=~"$osd"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Commit Latency (s) - {{ osd }} + refId: B + step: 60 + timeFrom: + timeShift: + title: Latency + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: s + label: + logBase: 1 + max: + min: 0 + show: true + - format: s + label: + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 2 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: true + steppedLine: false + targets: + - expr: ceph_osd_avail_bytes{osd=~"$osd"} + hide: false + interval: "$interval" + intervalFactor: 1 + legendFormat: Available - {{ osd }} + metric: ceph_osd_avail_bytes + refId: A + step: 60 + - expr: ceph_osd_used_bytes{osd=~"$osd"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Used - {{ osd }} + metric: ceph_osd_avail_bytes + refId: B + step: 60 + timeFrom: + timeShift: + title: OSD Storage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 5 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 9 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: false + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 2 + points: true + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: ceph_osd_variance{osd=~"$osd"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Available - {{ osd }} + metric: ceph_osd_avail_bytes + refId: A + step: 60 + timeFrom: + timeShift: + title: Utilization Variance + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: + logBase: 1 + max: + min: + show: true + - format: none + label: + logBase: 1 + max: + min: + show: true + title: New row + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - auto: true + auto_count: 10 + auto_min: 1m + current: + selected: true + text: 1m + value: 1m + datasource: + hide: 0 + includeAll: false + label: Interval + multi: false + name: interval + options: + - selected: false + text: auto + value: "$__auto_interval" + - selected: true + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 0 + type: interval + - current: {} + datasource: prometheus + hide: 0 + includeAll: false + label: OSD + multi: false + name: osd + options: [] + query: label_values(ceph_osd_up, osd) + refresh: 1 + regex: '' + type: query + annotations: + list: [] + refresh: 15m + schemaVersion: 12 + version: 18 + links: [] + gnetId: 923 + description: CEPH OSD Status. + ceph_pool: + __inputs: + - name: prometheus + label: Prometheus + description: Prometheus.IO + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: graph + name: Graph + version: '' + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + id: + title: Ceph - Pools + tags: + - ceph + - pools + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 4 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 2 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + rightSide: true + show: true + total: false + values: true + lines: true + linewidth: 0 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: "/^Total.*$/" + fill: 0 + linewidth: 4 + stack: false + - alias: "/^Raw.*$/" + color: "#BF1B00" + fill: 0 + linewidth: 4 + span: 10 + stack: true + steppedLine: false + targets: + - expr: ceph_pool_available_bytes{pool=~"$pool"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Avilable - {{ pool }} + metric: ceph_pool_available_bytes + refId: A + step: 60 + - expr: ceph_pool_used_bytes{pool=~"$pool"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Used - {{ pool }} + metric: ceph_pool + refId: B + step: 60 + - expr: ceph_pool_used_bytes{pool=~"$pool"} + ceph_pool_available_bytes{pool=~"$pool"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Total - {{ pool }} + metric: ceph_pool + refId: C + step: 60 + - expr: ceph_pool_raw_used_bytes{pool=~"$pool"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Raw - {{ pool }} + metric: ceph_pool + refId: D + step: 60 + timeFrom: + timeShift: + title: Pool Storage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: percentunit + gauge: + maxValue: 1 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 10 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: ' ceph_pool_used_bytes{pool="$pool"} / (ceph_pool_available_bytes{pool="$pool"} + + ceph_pool_used_bytes{pool="$pool"})' + interval: "$interval" + intervalFactor: 1 + refId: A + step: 60 + thresholds: '' + title: Usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + showTitle: true + title: 'Pool: $pool' + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 7 + isNew: true + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: ceph_pool_objects_total{pool=~"$pool"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Objects - {{ pool }} + refId: A + step: 60 + - expr: ceph_pool_dirty_objects_total{pool=~"$pool"} + interval: "$interval" + intervalFactor: 1 + legendFormat: Dirty Objects - {{ pool }} + refId: B + step: 60 + timeFrom: + timeShift: + title: Objects in Pool + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: 0 + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: false + id: 4 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: true + steppedLine: false + targets: + - expr: irate(ceph_pool_read_total{pool=~"$pool"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Read - {{ pool }} + refId: B + step: 60 + - expr: irate(ceph_pool_write_total{pool=~"$pool"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Write - {{ pool }} + refId: A + step: 60 + timeFrom: + timeShift: + title: IOPS + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: none + label: IOPS + logBase: 1 + max: + min: 0 + show: true + - format: short + label: IOPS + logBase: 1 + max: + min: 0 + show: false + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 5 + interval: "$interval" + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: true + steppedLine: false + targets: + - expr: irate(ceph_pool_read_bytes_total{pool="$pool"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Read Bytes - {{ pool }} + refId: A + step: 60 + - expr: irate(ceph_pool_write_bytes_total{pool="$pool"}[3m]) + interval: "$interval" + intervalFactor: 1 + legendFormat: Written Bytes - {{ pool }} + refId: B + step: 60 + timeFrom: + timeShift: + title: Throughput + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: 0 + show: true + - format: Bps + label: + logBase: 1 + max: + min: 0 + show: true + title: New row + time: + from: now-3h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - auto: true + auto_count: 10 + auto_min: 1m + current: + selected: true + text: 1m + value: 1m + datasource: + hide: 0 + includeAll: false + label: Interval + multi: false + name: interval + options: + - selected: false + text: auto + value: "$__auto_interval" + - selected: true + text: 1m + value: 1m + - selected: false + text: 10m + value: 10m + - selected: false + text: 30m + value: 30m + - selected: false + text: 1h + value: 1h + - selected: false + text: 6h + value: 6h + - selected: false + text: 12h + value: 12h + - selected: false + text: 1d + value: 1d + - selected: false + text: 7d + value: 7d + - selected: false + text: 14d + value: 14d + - selected: false + text: 30d + value: 30d + query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d + refresh: 0 + type: interval + - current: {} + datasource: prometheus + hide: 0 + includeAll: false + label: Pool + multi: false + name: pool + options: [] + query: label_values(ceph_pool_objects_total, pool) + refresh: 1 + regex: '' + type: query + annotations: + list: [] + refresh: 1m + schemaVersion: 12 + version: 22 + links: [] + gnetId: 926 + description: Ceph Pools dashboard. + etcd: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + editable: true + gnetId: 3070 + graphTooltip: 0 + hideControls: false + id: + links: [] + rows: + - collapse: false + height: 250 + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 44 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: max(etcd_server_has_leader) + format: time_series + intervalFactor: 2 + refId: A + step: 600 + thresholds: '0,1' + title: Etcd has a leader? + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: 'YES' + value: '1' + - op: "=" + text: 'NO' + value: '0' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 42 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: max(etcd_server_leader_changes_seen_total) + format: time_series + intervalFactor: 2 + refId: A + step: 600 + thresholds: '' + title: The number of leader changes seen + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 43 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: max(etcd_server_leader_changes_seen_total) + format: time_series + intervalFactor: 2 + refId: A + step: 600 + thresholds: '' + title: The total number of failed proposals seen + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 252 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + id: 23 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(grpc_server_started_total{grpc_type="unary"}[5m])) + format: time_series + intervalFactor: 2 + legendFormat: RPC Rate + metric: grpc_server_started_total + refId: A + step: 60 + - expr: sum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m])) + format: time_series + intervalFactor: 2 + legendFormat: RPC Failed Rate + metric: grpc_server_handled_total + refId: B + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: RPC Rate + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: ops + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + id: 41 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: true + steppedLine: false + targets: + - expr: sum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) + - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) + format: time_series + intervalFactor: 2 + legendFormat: Watch Streams + metric: grpc_server_handled_total + refId: A + step: 60 + - expr: sum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) + - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) + format: time_series + intervalFactor: 2 + legendFormat: Lease Streams + metric: grpc_server_handled_total + refId: B + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Active Streams + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: '' + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Row + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + decimals: + editable: true + error: false + fill: 0 + grid: {} + id: 1 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: etcd_debugging_mvcc_db_total_size_in_bytes + format: time_series + hide: false + interval: '' + intervalFactor: 2 + legendFormat: "{{instance}} DB Size" + metric: '' + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: DB Size + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + logBase: 1 + max: + min: + show: true + - format: short + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + grid: {} + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 1 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: true + targets: + - expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) + by (instance, le)) + format: time_series + hide: false + intervalFactor: 2 + legendFormat: "{{instance}} WAL fsync" + metric: etcd_disk_wal_fsync_duration_seconds_bucket + refId: A + step: 120 + - expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) + by (instance, le)) + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}} DB fsync" + metric: etcd_disk_backend_commit_duration_seconds_bucket + refId: B + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Disk Sync Duration + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: s + logBase: 1 + max: + min: + show: true + - format: short + logBase: 1 + max: + min: + show: false + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + id: 29 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - expr: process_resident_memory_bytes + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}} Resident Memory" + metric: process_resident_memory_bytes + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Memory + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 5 + id: 22 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: rate(etcd_network_client_grpc_received_bytes_total[5m]) + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}} Client Traffic In" + metric: etcd_network_client_grpc_received_bytes_total + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Client Traffic In + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 5 + id: 21 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: true + steppedLine: false + targets: + - expr: rate(etcd_network_client_grpc_sent_bytes_total[5m]) + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}} Client Traffic Out" + metric: etcd_network_client_grpc_sent_bytes_total + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Client Traffic Out + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + id: 20 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance) + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}} Peer Traffic In" + metric: etcd_network_peer_received_bytes_total + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Peer Traffic In + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + decimals: + editable: true + error: false + fill: 0 + grid: {} + id: 16 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 3 + stack: false + steppedLine: false + targets: + - expr: sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance) + format: time_series + hide: false + interval: '' + intervalFactor: 2 + legendFormat: "{{instance}} Peer Traffic Out" + metric: etcd_network_peer_sent_bytes_total + refId: A + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Peer Traffic Out + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: Bps + logBase: 1 + max: + min: + show: true + - format: short + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + id: 40 + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(etcd_server_proposals_failed_total[5m])) + format: time_series + intervalFactor: 2 + legendFormat: Proposal Failure Rate + metric: etcd_server_proposals_failed_total + refId: A + step: 60 + - expr: sum(etcd_server_proposals_pending) + format: time_series + intervalFactor: 2 + legendFormat: Proposal Pending Total + metric: etcd_server_proposals_pending + refId: B + step: 60 + - expr: sum(rate(etcd_server_proposals_committed_total[5m])) + format: time_series + intervalFactor: 2 + legendFormat: Proposal Commit Rate + metric: etcd_server_proposals_committed_total + refId: C + step: 60 + - expr: sum(rate(etcd_server_proposals_applied_total[5m])) + format: time_series + intervalFactor: 2 + legendFormat: Proposal Apply Rate + refId: D + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Raft Proposals + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: '' + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + decimals: 0 + editable: true + error: false + fill: 0 + id: 19 + legend: + alignAsTable: false + avg: false + current: false + max: false + min: false + rightSide: false + show: false + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: changes(etcd_server_leader_changes_seen_total[1d]) + format: time_series + intervalFactor: 2 + legendFormat: "{{instance}} Total Leader Elections Per Day" + metric: etcd_server_leader_changes_seen_total + refId: A + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Total Leader Elections Per Day + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + description: |- + proposals_committed_total records the total number of consensus proposals committed. This gauge should increase over time if the cluster is healthy. Several healthy members of an etcd cluster may have different total committed proposals at once. This discrepancy may be due to recovering from peers after starting, lagging behind the leader, or being the leader and therefore having the most commits. It is important to monitor this metric across all the members in the cluster; a consistently large lag between a single member and its leader indicates that member is slow or unhealthy. + + proposals_applied_total records the total number of consensus proposals applied. The etcd server applies every committed proposal asynchronously. The difference between proposals_committed_total and proposals_applied_total should usually be small (within a few thousands even under high load). If the difference between them continues to rise, it indicates that the etcd server is overloaded. This might happen when applying expensive queries like heavy range queries or large txn operations. + fill: 1 + id: 2 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: false + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(etcd_server_proposals_committed_total[5m])) + format: time_series + intervalFactor: 2 + legendFormat: total number of consensus proposals committed + metric: '' + refId: A + step: 60 + - expr: sum(rate(etcd_server_proposals_applied_total[5m])) + format: time_series + intervalFactor: 2 + legendFormat: total number of consensus proposals applied + metric: '' + refId: B + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: The total number of consensus proposals committed + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: '' + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + description: indicates how many proposals are queued to commit. Rising pending + proposals suggests there is a high client load or the member cannot commit proposals. + fill: 1 + id: 5 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(etcd_server_proposals_pending) + format: time_series + intervalFactor: 2 + legendFormat: Proposals pending + refId: A + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: Proposals pending + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + fill: 1 + id: 7 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m])) + format: time_series + intervalFactor: 2 + legendFormat: "\tThe latency distributions of fsync called by wal" + refId: A + step: 30 + - expr: sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m])) + format: time_series + intervalFactor: 2 + legendFormat: The latency distributions of commit called by backend + refId: B + step: 30 + thresholds: [] + timeFrom: + timeShift: + title: Disks operations + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + fill: 1 + id: 8 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum(rate(etcd_network_client_grpc_received_bytes_total[1m])) + format: time_series + intervalFactor: 2 + legendFormat: The total number of bytes received by grpc clients + refId: A + step: 30 + - expr: sum(rate(etcd_network_client_grpc_sent_bytes_total[1m])) + format: time_series + intervalFactor: 2 + legendFormat: The total number of bytes sent to grpc clients + refId: B + step: 30 + thresholds: [] + timeFrom: + timeShift: + title: Network + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + description: Abnormally high snapshot duration (snapshot_save_total_duration_seconds) + indicates disk issues and might cause the cluster to be unstable. + fill: 1 + id: 9 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum(rate(etcd_debugging_snap_save_total_duration_seconds_sum[1m])) + format: time_series + intervalFactor: 2 + legendFormat: The total latency distributions of save called by snapshot + refId: A + step: 30 + thresholds: [] + timeFrom: + timeShift: + title: Snapshot duration + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: [] + time: + from: now-6h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Etcd by Prometheus + version: 2 + description: Etcd Dashboard for Prometheus metrics scraper + hosts_containers: + __inputs: + - name: prometheus + label: Prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: panel + id: graph + name: Graph + version: '' + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: grafana + id: grafana + name: Grafana + version: 3.1.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.3.0 + id: + title: Kubernetes cluster monitoring (via Prometheus) + description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU + / Memory / Filesystem usage as well as individual pod, containers, systemd services + statistics. Uses cAdvisor metrics only. + tags: + - kubernetes + style: dark + timezone: browser + editable: true + hideControls: false + sharedCrosshair: false + rows: + - collapse: false + editable: true + height: 200px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + thresholdLine: false + height: 200px + id: 32 + isNew: true + legend: + alignAsTable: false + avg: true + current: true + max: false + min: false + rightSide: false + show: false + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m])) + interval: 10s + intervalFactor: 1 + legendFormat: Received + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))' + interval: 10s + intervalFactor: 1 + legendFormat: Sent + metric: network + refId: B + step: 10 + timeFrom: + timeShift: + title: Network I/O pressure + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + transparent: false + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: Bps + label: + logBase: 1 + max: + min: + show: false + title: Network I/O pressure + - collapse: false + editable: true + height: 250px + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + height: 180px + id: 4 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) + / sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100 + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: 65, 90 + title: Cluster memory usage + transparent: false + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + height: 180px + id: 6 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + / sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100 + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: 65, 90 + title: Cluster CPU usage (5m avg) + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + height: 180px + id: 7 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 4 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + / sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + * 100 + interval: 10s + intervalFactor: 1 + legendFormat: '' + metric: '' + refId: A + step: 10 + thresholds: 65, 90 + title: Cluster filesystem usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 9 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 20% + prefix: '' + prefixFontSize: 20% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Used + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 10 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Total + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 11 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: " cores" + postfixFontSize: 30% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Used + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 12 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: " cores" + postfixFontSize: 30% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Total + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 13 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Used + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: 2 + editable: true + error: false + format: bytes + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + height: 1px + id: 14 + interval: + isNew: true + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 2 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) + interval: 10s + intervalFactor: 1 + refId: A + step: 10 + thresholds: '' + title: Total + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + showTitle: false + title: Total usage + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 17 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (pod_name) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ pod_name }}" + metric: container_cpu + refId: A + step: 10 + timeFrom: + timeShift: + title: Pods CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + transparent: false + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + showTitle: false + title: Pods CPU usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 23 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (systemd_service_name) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "{{ systemd_service_name }}" + metric: container_cpu + refId: A + step: 10 + timeFrom: + timeShift: + title: System services CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: System services CPU usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + height: '' + id: 24 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + hideEmpty: false + hideZero: false + max: false + min: false + rightSide: true + show: true + sideWidth: + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (container_name, pod_name) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: 'pod: {{ pod_name }} | {{ container_name }}' + metric: container_cpu + refId: A + step: 10 + - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, name, image) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' + metric: container_cpu + refId: B + step: 10 + - expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, rkt_container_name) + interval: 10s + intervalFactor: 1 + legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' + metric: container_cpu + refId: C + step: 10 + timeFrom: + timeShift: + title: Containers CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Containers CPU usage + - collapse: true + editable: true + height: 500px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 3 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 20 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: false + show: true + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (id) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "{{ id }}" + metric: container_cpu + refId: A + step: 10 + timeFrom: + timeShift: + title: All processes CPU usage (5m avg) + tooltip: + msResolution: true + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: none + label: cores + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + repeat: + showTitle: false + title: All processes CPU usage + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 25 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) + by (pod_name) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ pod_name }}" + metric: container_memory_usage:sort_desc + refId: A + step: 10 + timeFrom: + timeShift: + title: Pods memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Pods memory usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 26 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}) + by (systemd_service_name) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ systemd_service_name }}" + metric: container_memory_usage:sort_desc + refId: A + step: 10 + timeFrom: + timeShift: + title: System services memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: System services memory usage + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 27 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}) + by (container_name, pod_name) + interval: 10s + intervalFactor: 1 + legendFormat: 'pod: {{ pod_name }} | {{ container_name }}' + metric: container_memory_usage:sort_desc + refId: A + step: 10 + - expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) + by (kubernetes_io_hostname, name, image) + interval: 10s + intervalFactor: 1 + legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' + metric: container_memory_usage:sort_desc + refId: B + step: 10 + - expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}) + by (kubernetes_io_hostname, rkt_container_name) + interval: 10s + intervalFactor: 1 + legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' + metric: container_memory_usage:sort_desc + refId: C + step: 10 + timeFrom: + timeShift: + title: Containers memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Containers memory usage + - collapse: true + editable: true + height: 500px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 0 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 28 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: false + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: true + targets: + - expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"}) + by (id) + interval: 10s + intervalFactor: 1 + legendFormat: "{{ id }}" + metric: container_memory_usage:sort_desc + refId: A + step: 10 + timeFrom: + timeShift: + title: All processes memory usage + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: All processes memory usage + - collapse: false + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 16 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (pod_name) + interval: 10s + intervalFactor: 1 + legendFormat: "-> {{ pod_name }}" + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (pod_name)' + interval: 10s + intervalFactor: 1 + legendFormat: "<- {{ pod_name }}" + metric: network + refId: B + step: 10 + timeFrom: + timeShift: + title: Pods network I/O (5m avg) + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Pods network I/O + - collapse: true + editable: true + height: 250px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 30 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: true + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (container_name, pod_name) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "-> pod: {{ pod_name }} | {{ container_name }}" + metric: network + refId: B + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (container_name, pod_name)' + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "<- pod: {{ pod_name }} | {{ container_name }}" + metric: network + refId: D + step: 10 + - expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, name, image) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name + }})" + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, name, image)' + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name + }})" + metric: network + refId: C + step: 10 + - expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, rkt_container_name) + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name + }}" + metric: network + refId: E + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (kubernetes_io_hostname, rkt_container_name)' + hide: false + interval: 10s + intervalFactor: 1 + legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name + }}" + metric: network + refId: F + step: 10 + timeFrom: + timeShift: + title: Containers network I/O (5m avg) + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: Containers network I/O + - collapse: true + editable: true + height: 500px + panels: + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 2 + editable: true + error: false + fill: 1 + grid: + threshold1: + threshold1Color: rgba(216, 200, 27, 0.27) + threshold2: + threshold2Color: rgba(234, 112, 112, 0.22) + id: 29 + isNew: true + legend: + alignAsTable: true + avg: true + current: true + max: false + min: false + rightSide: false + show: true + sideWidth: 200 + sort: current + sortDesc: true + total: false + values: true + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (id) + interval: 10s + intervalFactor: 1 + legendFormat: "-> {{ id }}" + metric: network + refId: A + step: 10 + - expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) + by (id)' + interval: 10s + intervalFactor: 1 + legendFormat: "<- {{ id }}" + metric: network + refId: B + step: 10 + timeFrom: + timeShift: + title: All processes network I/O (5m avg) + tooltip: + msResolution: false + shared: true + sort: 2 + value_type: cumulative + type: graph + xaxis: + show: true + yaxes: + - format: Bps + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: false + title: All processes network I/O + time: + from: now-5m + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + templating: + list: + - allValue: ".*" + current: {} + datasource: prometheus + hide: 0 + includeAll: true + multi: false + name: Node + options: [] + query: label_values(kubernetes_io_hostname) + refresh: 1 + type: query + annotations: + list: [] + refresh: 10s + schemaVersion: 12 + version: 13 + links: [] + gnetId: 315 + rabbitmq: + __inputs: + - name: prometheus + label: Prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.2.0 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + editable: true + gnetId: 2121 + graphTooltip: 0 + hideControls: false + id: + links: [] + refresh: 5s + rows: + - collapse: false + height: 266 + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 13 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + targets: + - expr: rabbitmq_up + intervalFactor: 2 + metric: rabbitmq_up + refId: A + step: 2 + thresholds: Up,Down + timeFrom: 30s + title: RabbitMQ Server + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + - op: "=" + text: Down + value: '0' + - op: "=" + text: Up + value: '1' + valueName: current + - alert: + conditions: + - evaluator: + params: + - 1 + type: lt + operator: + type: and + query: + params: + - A + - 10s + - now + reducer: + params: [] + type: last + type: query + - evaluator: + params: [] + type: no_value + operator: + type: and + query: + params: + - A + - 10s + - now + reducer: + params: [] + type: last + type: query + executionErrorState: alerting + frequency: 60s + handler: 1 + message: Some of the RabbitMQ node is down + name: Node Stats alert + noDataState: no_data + notifications: [] + aliasColors: {} + bars: true + datasource: prometheus + decimals: 0 + fill: 1 + id: 12 + legend: + alignAsTable: true + avg: false + current: true + max: false + min: false + show: true + total: false + values: true + lines: false + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 9 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_running + intervalFactor: 2 + legendFormat: "{{node}}" + metric: rabbitmq_running + refId: A + step: 2 + thresholds: + - colorMode: critical + fill: true + line: true + op: lt + value: 1 + timeFrom: 30s + timeShift: + title: Node up Stats + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 0 + fill: 1 + id: 6 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_exchangesTotal + intervalFactor: 2 + legendFormat: "{{instance}}:exchanges" + metric: rabbitmq_exchangesTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Exchanges + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 0 + fill: 1 + id: 4 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_channelsTotal + intervalFactor: 2 + legendFormat: "{{instance}}:channels" + metric: rabbitmq_channelsTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Channels + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 0 + fill: 1 + id: 3 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_consumersTotal + intervalFactor: 2 + legendFormat: "{{instance}}:consumers" + metric: rabbitmq_consumersTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Consumers + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 0 + fill: 1 + id: 5 + legend: + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_connectionsTotal + intervalFactor: 2 + legendFormat: "{{instance}}:connections" + metric: rabbitmq_connectionsTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Connections + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + fill: 1 + id: 7 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 4 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_queuesTotal + intervalFactor: 2 + legendFormat: "{{instance}}:queues" + metric: rabbitmq_queuesTotal + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Queues + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 0 + fill: 1 + id: 8 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum by (vhost)(rabbitmq_queue_messages_ready) + intervalFactor: 2 + legendFormat: "{{vhost}}:ready" + metric: rabbitmq_queue_messages_ready + refId: A + step: 2 + - expr: sum by (vhost)(rabbitmq_queue_messages_published_total) + intervalFactor: 2 + legendFormat: "{{vhost}}:published" + metric: rabbitmq_queue_messages_published_total + refId: B + step: 2 + - expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total) + intervalFactor: 2 + legendFormat: "{{vhost}}:delivered" + metric: rabbitmq_queue_messages_delivered_total + refId: C + step: 2 + - expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged) + intervalFactor: 2 + legendFormat: "{{vhost}}:unack" + metric: ack + refId: D + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Messages/host + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + decimals: 0 + fill: 1 + id: 2 + legend: + alignAsTable: true + avg: false + current: true + max: false + min: false + rightSide: false + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_queue_messages + intervalFactor: 2 + legendFormat: "{{queue}}:{{durable}}" + metric: rabbitmq_queue_messages + refId: A + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Messages / Queue + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + fill: 1 + id: 9 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_node_mem_used + intervalFactor: 2 + legendFormat: "{{node}}:used" + metric: rabbitmq_node_mem_used + refId: A + step: 2 + - expr: rabbitmq_node_mem_limit + intervalFactor: 2 + legendFormat: "{{node}}:limit" + metric: node_mem + refId: B + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Memory + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: decbytes + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + fill: 1 + id: 10 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_fd_used + intervalFactor: 2 + legendFormat: "{{node}}:used" + metric: '' + refId: A + step: 2 + - expr: rabbitmq_fd_total + intervalFactor: 2 + legendFormat: "{{node}}:total" + metric: node_mem + refId: B + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: FIle descriptors + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + datasource: prometheus + fill: 1 + id: 11 + legend: + alignAsTable: true + avg: true + current: true + max: true + min: true + show: true + total: false + values: true + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + span: 6 + stack: false + steppedLine: false + targets: + - expr: rabbitmq_sockets_used + intervalFactor: 2 + legendFormat: "{{node}}:used" + metric: '' + refId: A + step: 2 + - expr: rabbitmq_sockets_total + intervalFactor: 2 + legendFormat: "{{node}}:total" + metric: '' + refId: B + step: 2 + thresholds: [] + timeFrom: + timeShift: + title: Sockets + tooltip: + shared: true + sort: 0 + value_type: individual + transparent: false + type: graph + xaxis: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: + - current: + tags: [] + text: Prometheus + value: Prometheus + hide: 0 + label: + name: datasource + options: [] + query: prometheus + refresh: 1 + regex: '' + type: datasource + time: + from: now-5m + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: RabbitMQ Metrics + version: 17 + description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections, + Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.' + kubernetes_capacity_planning: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + description: '' + editable: true + gnetId: 22 + graphTooltip: 0 + hideControls: false + id: + links: [] + refresh: false + rows: + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100 + hide: false + intervalFactor: 10 + legendFormat: '' + refId: A + step: 50 + thresholds: [] + timeFrom: + timeShift: + title: Idle cpu + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percent + label: cpu usage + logBase: 1 + max: + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 9 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(node_load1) + intervalFactor: 4 + legendFormat: load 1m + refId: A + step: 20 + target: '' + - expr: sum(node_load5) + intervalFactor: 4 + legendFormat: load 5m + refId: B + step: 20 + target: '' + - expr: sum(node_load15) + intervalFactor: 4 + legendFormat: load 15m + refId: C + step: 20 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: System load + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percentunit + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 4 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} + yaxis: 2 + spaceLength: 10 + span: 9 + stack: true + steppedLine: false + targets: + - expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) + - sum(node_memory_Cached) + intervalFactor: 2 + legendFormat: memory usage + metric: memo + refId: A + step: 10 + target: '' + - expr: sum(node_memory_Buffers) + interval: '' + intervalFactor: 2 + legendFormat: memory buffers + metric: memo + refId: B + step: 10 + target: '' + - expr: sum(node_memory_Cached) + interval: '' + intervalFactor: 2 + legendFormat: memory cached + metric: memo + refId: C + step: 10 + target: '' + - expr: sum(node_memory_MemFree) + interval: '' + intervalFactor: 2 + legendFormat: memory free + metric: memo + refId: D + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Memory usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) + - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" + intervalFactor: 2 + metric: '' + refId: A + step: 60 + target: '' + thresholds: 80, 90 + title: Memory usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 246 + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 6 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: read + yaxis: 1 + - alias: '{instance="172.17.0.1:9100"}' + yaxis: 2 + - alias: io time + yaxis: 2 + spaceLength: 10 + span: 9 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_disk_bytes_read[5m])) + hide: false + intervalFactor: 4 + legendFormat: read + refId: A + step: 20 + target: '' + - expr: sum(rate(node_disk_bytes_written[5m])) + intervalFactor: 4 + legendFormat: written + refId: B + step: 20 + - expr: sum(rate(node_disk_io_time_ms[5m])) + intervalFactor: 4 + legendFormat: io time + refId: C + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Disk I/O + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: ms + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percentunit + gauge: + maxValue: 1 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 12 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) + / sum(node_filesystem_size{device!="rootfs"}) + intervalFactor: 2 + refId: A + step: 60 + target: '' + thresholds: 0.75, 0.9 + title: Disk space usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 8 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m])) + hide: false + intervalFactor: 2 + legendFormat: '' + refId: A + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network received + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 10 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m])) + hide: false + intervalFactor: 2 + legendFormat: '' + refId: B + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network transmitted + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 276 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + fill: 1 + id: 11 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 9 + stack: false + steppedLine: false + targets: + - expr: sum(kube_pod_info) + format: time_series + intervalFactor: 2 + legendFormat: Current number of Pods + refId: A + step: 10 + - expr: sum(kube_node_status_capacity_pods) + format: time_series + intervalFactor: 2 + legendFormat: Maximum capacity of pods + refId: B + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Cluster Pod Utilization + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) + * 100 + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 60 + target: '' + thresholds: '80,90' + title: Pod Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: [] + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Kubernetes Capacity Planning + version: 4 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true + kubernetes_cluster_health: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + editable: true + gnetId: + graphTooltip: 0 + hideControls: false + id: + links: [] + rows: + - collapse: false + height: 254 + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 1 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(up{job=~"kube-apiserver|kube-scheduler|kube-controller-manager"} == + 0) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Control Plane Components Down + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: Everything UP and healthy + value: 'null' + - op: "=" + text: '' + value: '' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 2 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '3,5' + title: Alerts Firing + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 3 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(ALERTS{alertstate="pending",alertname!="DeadMansSwitch"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '3,5' + title: Alerts Pending + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 4 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: count(increase(kube_pod_container_status_restarts[1h]) > 5) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Crashlooping Pods + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(kube_node_status_condition{condition="Ready",status!="true"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Node Not Ready + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 6 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(kube_node_status_condition{condition="DiskPressure",status="true"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Node Disk Pressure + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(kube_node_status_condition{condition="MemoryPressure",status="true"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Node Memory Pressure + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 8 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(kube_node_spec_unschedulable) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Nodes Unschedulable + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: [] + time: + from: now-6h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: '' + title: Kubernetes Cluster Health + version: 9 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true + kubernetes_cluster_status: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + editable: true + gnetId: + graphTooltip: 0 + hideControls: false + id: + links: [] + rows: + - collapse: false + height: 129 + panels: + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 6 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Control Plane UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: UP + value: 'null' + valueName: total + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 6 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 6 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '3,5' + title: Alerts Firing + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Cluster Health + titleSize: h6 + - collapse: false + height: 168 + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + decimals: + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 1 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '50,80' + title: API Servers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + decimals: + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 2 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"})) + * 100 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '50,80' + title: Controller Managers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + decimals: + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 3 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"})) + * 100 + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '50,80' + title: Schedulers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + - cacheTimeout: + colorBackground: false + colorValue: true + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + decimals: + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + hideTimeOverride: false + id: 4 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h]) + > 5) + format: time_series + interval: '' + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '1,3' + title: Crashlooping Control Plane Pods + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Control Plane Status + titleSize: h6 + - collapse: false + height: 158 + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 8 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m])) + * 100)) / count(node_cpu{job="node-exporter",mode="idle"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: CPU Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) + - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: Memory Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 9 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) + / sum(node_filesystem_size{device!="rootfs"}) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: Filesystem Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 10 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) + * 100 + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '80,90' + title: Pod Utilization + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Capacity Planing + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: [] + time: + from: now-6h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: '' + title: Kubernetes Cluster Status + version: 3 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true + kubernetes_control_plane: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + editable: true + gnetId: + graphTooltip: 0 + hideControls: false + id: + links: [] + rows: + - collapse: false + height: 250px + panels: + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 1 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="apiserver"} == 1) / sum(up{job="apiserver"})) * 100 + format: time_series + intervalFactor: 2 + refId: A + step: 600 + thresholds: '50,80' + title: API Servers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 2 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / sum(up{job="kube-controller-manager-discovery"})) + * 100 + format: time_series + intervalFactor: 2 + refId: A + step: 600 + thresholds: '50,80' + title: Controller Managers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(245, 54, 54, 0.9) + - rgba(237, 129, 40, 0.89) + - rgba(50, 172, 45, 0.97) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 3 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / sum(up{job="kube-scheduler-discovery"})) + * 100 + format: time_series + intervalFactor: 2 + refId: A + step: 600 + thresholds: '50,80' + title: Schedulers UP + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 4 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: max(sum by(instance) (rate(apiserver_request_count{code=~"5.."}[5m])) + / sum by(instance) (rate(apiserver_request_count[5m]))) * 100 + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 600 + thresholds: '5,10' + title: API Server Request Error Rate + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: '0' + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + fill: 1 + id: 7 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 12 + stack: false + steppedLine: false + targets: + - expr: sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0) + format: time_series + intervalFactor: 2 + legendFormat: '' + refId: A + step: 30 + thresholds: [] + timeFrom: + timeShift: + title: API Server Request Latency + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + - collapse: false + height: 250 + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + fill: 1 + id: 5 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: cluster:scheduler_e2e_scheduling_latency_seconds:quantile + format: time_series + intervalFactor: 2 + refId: A + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: End to end scheduling latency + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: dtdurations + label: + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + fill: 1 + id: 6 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: 'null' + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: sum by(instance) (rate(apiserver_request_count{code!~"2.."}[5m])) + format: time_series + intervalFactor: 2 + legendFormat: Error Rate + refId: A + step: 60 + - expr: sum by(instance) (rate(apiserver_request_count[5m])) + format: time_series + intervalFactor: 2 + legendFormat: Request Rate + refId: B + step: 60 + thresholds: [] + timeFrom: + timeShift: + title: API Server Request Rates + tooltip: + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: Dashboard Row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: [] + time: + from: now-6h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: '' + title: Kubernetes Control Plane Status + version: 3 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true + nodes: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.4.1 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + annotations: + list: [] + description: Dashboard to get an overview of one server + editable: true + gnetId: 22 + graphTooltip: 0 + hideControls: false + id: + links: [] + refresh: false + rows: + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 3 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m])) + * 100) + hide: false + intervalFactor: 10 + legendFormat: "{{cpu}}" + refId: A + step: 50 + thresholds: [] + timeFrom: + timeShift: + title: Idle cpu + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percent + label: cpu usage + logBase: 1 + max: 100 + min: 0 + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 9 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: node_load1{instance="$server"} + intervalFactor: 4 + legendFormat: load 1m + refId: A + step: 20 + target: '' + - expr: node_load5{instance="$server"} + intervalFactor: 4 + legendFormat: load 5m + refId: B + step: 20 + target: '' + - expr: node_load15{instance="$server"} + intervalFactor: 4 + legendFormat: load 15m + refId: C + step: 20 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: System load + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: percentunit + label: + logBase: 1 + max: + min: + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 4 + legend: + alignAsTable: false + avg: false + current: false + hideEmpty: false + hideZero: false + max: false + min: false + rightSide: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} + yaxis: 2 + spaceLength: 10 + span: 9 + stack: true + steppedLine: false + targets: + - expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} + - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"} + hide: false + interval: '' + intervalFactor: 2 + legendFormat: memory used + metric: '' + refId: C + step: 10 + - expr: node_memory_Buffers{instance="$server"} + interval: '' + intervalFactor: 2 + legendFormat: memory buffers + metric: '' + refId: E + step: 10 + - expr: node_memory_Cached{instance="$server"} + intervalFactor: 2 + legendFormat: memory cached + metric: '' + refId: F + step: 10 + - expr: node_memory_MemFree{instance="$server"} + intervalFactor: 2 + legendFormat: memory free + metric: '' + refId: D + step: 10 + thresholds: [] + timeFrom: + timeShift: + title: Memory usage + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: individual + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: '0' + show: true + - format: short + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percent + gauge: + maxValue: 100 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - + node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}) + / node_memory_MemTotal{instance="$server"}) * 100 + intervalFactor: 2 + refId: A + step: 60 + target: '' + thresholds: 80, 90 + title: Memory usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: avg + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 6 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: read + yaxis: 1 + - alias: '{instance="172.17.0.1:9100"}' + yaxis: 2 + - alias: io time + yaxis: 2 + spaceLength: 10 + span: 9 + stack: false + steppedLine: false + targets: + - expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m])) + hide: false + intervalFactor: 4 + legendFormat: read + refId: A + step: 20 + target: '' + - expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m])) + intervalFactor: 4 + legendFormat: written + refId: B + step: 20 + - expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m])) + intervalFactor: 4 + legendFormat: io time + refId: C + step: 20 + thresholds: [] + timeFrom: + timeShift: + title: Disk I/O + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: ms + label: + logBase: 1 + max: + min: + show: true + - cacheTimeout: + colorBackground: false + colorValue: false + colors: + - rgba(50, 172, 45, 0.97) + - rgba(237, 129, 40, 0.89) + - rgba(245, 54, 54, 0.9) + datasource: prometheus + editable: true + error: false + format: percentunit + gauge: + maxValue: 1 + minValue: 0 + show: true + thresholdLabels: false + thresholdMarkers: true + id: 7 + interval: + links: [] + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 3 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"})) + / sum(node_filesystem_size{device!="rootfs",instance="$server"}) + intervalFactor: 2 + refId: A + step: 60 + target: '' + thresholds: 0.75, 0.9 + title: Disk space usage + type: singlestat + valueFontSize: 80% + valueMaps: + - op: "=" + text: N/A + value: 'null' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + - collapse: false + height: 250px + panels: + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 8 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m]) + hide: false + intervalFactor: 2 + legendFormat: "{{device}}" + refId: A + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network received + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - alerting: {} + aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 1 + grid: {} + id: 10 + legend: + avg: false + current: false + max: false + min: false + show: true + total: false + values: false + lines: true + linewidth: 2 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: + - alias: 'transmitted ' + yaxis: 2 + spaceLength: 10 + span: 6 + stack: false + steppedLine: false + targets: + - expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m]) + hide: false + intervalFactor: 2 + legendFormat: "{{device}}" + refId: B + step: 10 + target: '' + thresholds: [] + timeFrom: + timeShift: + title: Network transmitted + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: bytes + label: + logBase: 1 + max: + min: + show: true + - format: bytes + label: + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: false + title: New row + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + list: + - allValue: + current: {} + datasource: prometheus + hide: 0 + includeAll: false + label: + multi: false + name: server + options: [] + query: label_values(node_boot_time, instance) + refresh: 1 + regex: '' + sort: 0 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + time: + from: now-1h + to: now + timepicker: + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + timezone: browser + title: Nodes + version: 2 + inputs: + - name: prometheus + pluginId: prometheus + type: datasource + value: prometheus + overwrite: true + openstack_control_plane: + __inputs: + - name: prometheus + label: prometheus + description: '' + type: datasource + pluginId: prometheus + pluginName: Prometheus + __requires: + - type: grafana + id: grafana + name: Grafana + version: 4.5.2 + - type: panel + id: graph + name: Graph + version: '' + - type: datasource + id: prometheus + name: Prometheus + version: 1.0.0 + - type: panel + id: singlestat + name: Singlestat + version: '' + - type: panel + id: text + name: Text + version: '' + annotations: + list: [] + editable: true + gnetId: + graphTooltip: 1 + hideControls: false + id: + links: [] + refresh: 1m + rows: + - collapse: false + height: 250px + panels: + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 24 + interval: "> 60s" + links: + - dashboard: Keystone + name: Drilldown dashboard + title: Keystone + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + dsType: influxdb + expr: check_keystone_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Keystone + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 23 + interval: "> 60s" + links: + - dashboard: Glance + name: Drilldown dashboard + title: Glance + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + dsType: influxdb + expr: check_glance_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Glance + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(202, 58, 40, 0.86) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 22 + interval: "> 60s" + links: + - dashboard: Heat + name: Drilldown dashboard + title: Heat + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + dsType: influxdb + expr: check_heat_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Heat + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 21 + interval: "> 60s" + links: + - dashboard: Neutron + name: Drilldown dashboard + title: Neutron + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + dsType: influxdb + expr: check_neutron_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Neutron + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(208, 53, 34, 0.82) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 5 + interval: "> 60s" + links: + - dashboard: Nova + name: Drilldown dashboard + title: Nova + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + dsType: influxdb + expr: check_nova_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Nova + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(200, 54, 35, 0.88) + - rgba(118, 245, 40, 0.73) + - rgba(225, 177, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: none + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 25 + interval: "> 60s" + links: + - dashboard: Ceph + name: Drilldown dashboard + title: Ceph + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + condition: '' + dsType: influxdb + expr: check_swift_api{job="openstack-metrics", region="$region"} + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + groupby_field: '' + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + thresholds: '1,2' + title: Ceph + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: CRIT + value: '0' + - op: "=" + text: OK + value: '1' + - op: "=" + text: UNKW + value: '2' + valueName: current + - content: '' + editable: true + error: false + id: 20 + links: [] + mode: markdown + span: 1 + style: {} + title: '' + type: text + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(71, 212, 59, 0.4) + - rgba(245, 150, 40, 0.73) + - rgba(225, 40, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: short + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 16 + interval: ">60s" + links: + - dashboard: RabbitMQ + name: Drilldown dashboard + title: RabbitMQ + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + dsType: influxdb + expr: '' + fill: '' + format: time_series + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + thresholds: '' + title: RabbitMQ + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: OKAY + value: '0' + - op: "=" + text: WARN + value: '1' + - op: "=" + text: UNKW + value: '2' + - op: "=" + text: CRIT + value: '3' + - op: "=" + text: DOWN + value: '4' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(71, 212, 59, 0.4) + - rgba(245, 150, 40, 0.73) + - rgba(225, 40, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: short + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 15 + interval: ">60s" + links: + - dashboard: MySQL + name: Drilldown dashboard + title: MySQL + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + dsType: influxdb + fill: '' + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + interval: '' + policy: default + rawQuery: false + refId: A + resultFormat: time_series + thresholds: '' + title: MySQL + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: OKAY + value: '0' + - op: "=" + text: WARN + value: '1' + - op: "=" + text: UNKW + value: '2' + - op: "=" + text: CRIT + value: '3' + - op: "=" + text: DOWN + value: '4' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(71, 212, 59, 0.4) + - rgba(245, 150, 40, 0.73) + - rgba(225, 40, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: short + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 18 + interval: ">60s" + links: + - dashUri: db/apache + dashboard: Apache + name: Drilldown dashboard + title: Apache + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + dsType: influxdb + fill: '' + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + interval: '' + policy: default + rawQuery: false + refId: A + resultFormat: time_series + thresholds: '' + title: Apache + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: OKAY + value: '0' + - op: "=" + text: WARN + value: '1' + - op: "=" + text: UNKW + value: '2' + - op: "=" + text: CRIT + value: '3' + - op: "=" + text: DOWN + value: '4' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(71, 212, 59, 0.4) + - rgba(245, 150, 40, 0.73) + - rgba(225, 40, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: short + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 10 + interval: ">60s" + links: + - dashUri: db/haproxy + dashboard: HAProxy + name: Drilldown dashboard + title: HAProxy + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + dsType: influxdb + fill: '' + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + interval: '' + policy: default + rawQuery: false + refId: A + resultFormat: time_series + thresholds: '' + title: haproxy + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: OKAY + value: '0' + - op: "=" + text: WARN + value: '1' + - op: "=" + text: UNKW + value: '2' + - op: "=" + text: CRIT + value: '3' + - op: "=" + text: DOWN + value: '4' + valueName: current + - cacheTimeout: + colorBackground: true + colorValue: false + colors: + - rgba(71, 212, 59, 0.4) + - rgba(245, 150, 40, 0.73) + - rgba(225, 40, 40, 0.59) + datasource: prometheus + editable: true + error: false + format: short + gauge: + maxValue: 100 + minValue: 0 + show: false + thresholdLabels: false + thresholdMarkers: true + id: 17 + interval: ">60s" + links: + - dashUri: db/memcached + dashboard: Memcached + name: Drilldown dashboard + title: Memcached + type: dashboard + mappingType: 1 + mappingTypes: + - name: value to text + value: 1 + - name: range to text + value: 2 + maxDataPoints: 100 + nullPointMode: connected + nullText: + postfix: '' + postfixFontSize: 50% + prefix: '' + prefixFontSize: 50% + rangeMaps: + - from: 'null' + text: N/A + to: 'null' + span: 1 + sparkline: + fillColor: rgba(31, 118, 189, 0.18) + full: false + lineColor: rgb(31, 120, 193) + show: false + tableColumn: '' + targets: + - column: value + dsType: influxdb + fill: '' + function: last + groupBy: + - params: + - "$interval" + type: time + - params: + - 'null' + type: fill + groupByTags: [] + interval: '' + policy: default + rawQuery: false + refId: A + resultFormat: time_series + thresholds: '' + title: memcached + type: singlestat + valueFontSize: 50% + valueMaps: + - op: "=" + text: no data + value: 'null' + - op: "=" + text: OKAY + value: '0' + - op: "=" + text: WARN + value: '1' + - op: "=" + text: UNKW + value: '2' + - op: "=" + text: CRIT + value: '3' + - op: "=" + text: DOWN + value: '4' + valueName: current + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: OpenStack Services + titleSize: h6 + - collapse: false + height: 250px + panels: + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + grid: {} + id: 11 + interval: "> 60s" + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - alias: free + column: value + dsType: influxdb + expr: total_used_vcpus{job="openstack-metrics", region="$region"} + total_free_vcpus{job="openstack-metrics", + region="$region"} + format: time_series + function: min + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + - alias: used + column: value + dsType: influxdb + expr: total_used_vcpus{job="openstack-metrics", region="$region"} + format: time_series + function: max + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: B + resultFormat: time_series + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: VCPUs (total vs used) + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: short + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + grid: {} + id: 12 + interval: "> 60s" + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - alias: free + column: value + dsType: influxdb + expr: total_used_ram_MB{job="openstack-metrics", region="$region"} + total_free_ram_MB{job="openstack-metrics", + region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + - alias: used + column: value + dsType: influxdb + expr: total_used_ram_MB{job="openstack-metrics", region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + interval: '' + intervalFactor: 2 + policy: default + rawQuery: false + refId: B + resultFormat: time_series + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: RAM (total vs used) + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: mbytes + label: '' + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + - aliasColors: {} + bars: false + dashLength: 10 + dashes: false + datasource: prometheus + editable: true + error: false + fill: 0 + grid: {} + id: 13 + interval: "> 60s" + legend: + avg: false + current: false + max: false + min: false + show: false + total: false + values: false + lines: true + linewidth: 1 + links: [] + nullPointMode: connected + percentage: false + pointradius: 5 + points: false + renderer: flot + seriesOverrides: [] + spaceLength: 10 + span: 4 + stack: false + steppedLine: false + targets: + - alias: free + column: value + dsType: influxdb + expr: total_used_disk_GB{job="openstack-metrics", region="$region"} + total_free_disk_GB{job="openstack-metrics", + region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: A + resultFormat: time_series + step: 120 + - alias: used + column: value + dsType: influxdb + expr: total_used_disk_GB{job="openstack-metrics", region="$region"} + format: time_series + function: mean + groupBy: + - params: + - "$interval" + type: time + - params: + - '0' + type: fill + groupByTags: [] + intervalFactor: 2 + policy: default + rawQuery: false + refId: B + resultFormat: time_series + step: 120 + thresholds: [] + timeFrom: + timeShift: + title: Disk (used vs total) + tooltip: + msResolution: false + shared: true + sort: 0 + value_type: cumulative + type: graph + xaxis: + buckets: + mode: time + name: + show: true + values: [] + yaxes: + - format: gbytes + logBase: 1 + max: + min: 0 + show: true + - format: short + logBase: 1 + max: + min: + show: true + repeat: + repeatIteration: + repeatRowId: + showTitle: true + title: Virtual compute resources + titleSize: h6 + schemaVersion: 14 + style: dark + tags: [] + templating: + enable: true + list: + - allValue: + current: {} + datasource: prometheus + hide: 0 + includeAll: false + label: + multi: false + name: region + options: [] + query: label_values(openstack_exporter_cache_refresh_duration_seconds, region) + refresh: 1 + regex: '' + sort: 0 + tagValuesQuery: '' + tags: [] + tagsQuery: '' + type: query + useTags: false + time: + from: now-1h + to: now + timepicker: + collapse: false + enable: true + notice: false + now: true + refresh_intervals: + - 5s + - 10s + - 30s + - 1m + - 5m + - 15m + - 30m + - 1h + - 2h + - 1d + status: Stable + time_options: + - 5m + - 15m + - 1h + - 6h + - 12h + - 24h + - 2d + - 7d + - 30d + type: timepicker + timezone: browser + title: Openstack Main1 + version: 2 diff --git a/tools/gate/chart-deploys/default.yaml b/tools/gate/chart-deploys/default.yaml index 717e6a114..911eaccb8 100644 --- a/tools/gate/chart-deploys/default.yaml +++ b/tools/gate/chart-deploys/default.yaml @@ -27,6 +27,7 @@ chart_groups: - prometheus_node_exporter - prometheus_kube_state_metrics - prometheus_alertmanager + - grafana - name: openstack_infra_logging timeout: 600 @@ -130,6 +131,20 @@ charts: ingress: public: false + grafana: + chart_name: grafana + release: prometheus-grafana + namespace: openstack + test: + enabled: false + timeout: 300 + output: false + values: + network: + grafana: + ingress: + public: false + openstack_elasticsearch: chart_name: elasticsearch release: elasticsearch