openstack-helm-infra/grafana/values.yaml
Jean-Philippe Evrard 5f5e988fb3 Point to OSH-images images
We now have a process for OSH-images image building,
using Zuul, so we should point the images by default to those
images, instead of pointing to stale images.

Without this, the osh-images build process is completely not
in use (and completely opaque to deployers), and updating the
osh-images process or patching its code has no impact on OSH.

This should fix it.

Change-Id: Ic00bd98c151669dc2485cd88e0e8c2ab05445959
2019-05-17 08:17:32 +00:00

16760 lines
429 KiB
YAML

# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for grafana
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
images:
tags:
grafana: docker.io/grafana/grafana:5.0.0
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
db_init: docker.io/openstackhelm/heat:newton-ubuntu_xenial
grafana_db_session_sync: docker.io/openstackhelm/heat:newton-ubuntu_xenial
helm_tests: docker.io/openstackhelm/heat:newton-ubuntu_xenial
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
grafana:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
security_context:
dashboard:
pod:
runAsUser: 104
container:
grafana:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
db_init:
pod:
runAsUser: 104
container:
grafana_db_init_session:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
grafana_db_init:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
db_session_sync:
pod:
runAsUser: 104
container:
grafana_db_session_sync:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
set_admin_user:
pod:
runAsUser: 104
container:
grafana_set_admin_password:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
test:
pod:
runAsUser: 104
container:
helm_tests:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
mounts:
grafana:
init_container: null
grafana:
replicas:
grafana: 1
lifecycle:
upgrades:
deployments:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
grafana:
timeout: 600
resources:
enabled: false
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
bootstrap:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
db_init:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
db_init_session:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
grafana_db_session_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
set_admin_user:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
tests:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
grafana:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
oslo_db:
namespace: null
auth:
admin:
username: root
password: password
user:
username: grafana
password: password
hosts:
default: mariadb
host_fqdn_override:
default: null
path: /grafana
scheme: mysql+pymysql
port:
mysql:
default: 3306
oslo_db_session:
namespace: null
auth:
admin:
username: root
password: password
user:
username: grafana_session
password: password
hosts:
default: mariadb
host_fqdn_override:
default: null
path: /grafana_session
scheme: mysql+pymysql
port:
mysql:
default: 3306
grafana:
name: grafana
namespace: null
auth:
admin:
username: admin
password: password
hosts:
default: grafana-dashboard
public: grafana
host_fqdn_override:
default: null
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
# endpoints using the following format:
# public:
# host: null
# tls:
# crt: null
# key: null
path:
default: null
scheme:
default: http
port:
grafana:
default: 3000
public: 80
monitoring:
name: prometheus
namespace: null
auth:
user:
username: admin
password: changeme
hosts:
default: prom-metrics
public: prometheus
host_fqdn_override:
default: null
path:
default: null
scheme:
default: http
port:
api:
default: 80
public: 80
ldap:
hosts:
default: ldap
auth:
admin:
bind_dn: "cn=admin,dc=cluster,dc=local"
password: password
host_fqdn_override:
default: null
path:
default: "ou=People,dc=cluster,dc=local"
scheme:
default: ldap
port:
ldap:
default: 389
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- grafana-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
db_init:
services:
- endpoint: internal
service: oslo_db
db_init_session:
services:
- endpoint: internal
service: oslo_db
db_session_sync:
jobs:
- grafana-db-init-session
services:
- endpoint: internal
service: oslo_db
grafana:
jobs:
- grafana-db-init
- grafana-db-session-sync
- grafana-set-admin-user
services:
- endpoint: internal
service: oslo_db
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
set_admin_user:
jobs:
- grafana-db-init
services:
- endpoint: internal
service: oslo_db
tests:
services:
- endpoint: internal
service: grafana
network:
grafana:
node_port:
enabled: false
port: 30902
ingress:
public: true
classes:
namespace: "nginx"
cluster: "nginx-cluster"
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
network_policy:
grafana:
ingress:
- {}
egress:
- {}
secrets:
oslo_db:
admin: grafana-db-admin
user: grafana-db-user
oslo_db_session:
admin: grafana-session-db-admin
user: grafana-session-db-user
tls:
grafana:
grafana:
public: grafana-tls-public
prometheus:
user: prometheus-user-creds
manifests:
configmap_bin: true
configmap_dashboards: true
configmap_etc: true
deployment: true
ingress: true
helm_tests: true
job_db_init: true
job_db_init_session: true
job_db_session_sync: true
job_image_repo_sync: true
job_set_admin_user: true
network_policy: false
secret_db: true
secret_db_session: true
secret_admin_creds: true
secret_ingress_tls: true
secret_prom_creds: true
service: true
service_ingress: true
conf:
ldap:
config:
base_dns:
search: "dc=cluster,dc=local"
group_search: "ou=Groups,dc=cluster,dc=local"
filters:
search: "(uid=%s)"
group_search: "(&(objectclass=posixGroup)(memberUID=uid=%s,ou=People,dc=cluster,dc=local))"
template: |
verbose_logging = false
[[servers]]
host = "{{ tuple "ldap" "internal" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}"
port = {{ tuple "ldap" "internal" "ldap" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
use_ssl = false
start_tls = false
ssl_skip_verify = false
bind_dn = "{{ .Values.endpoints.ldap.auth.admin.bind_dn }}"
bind_password = '{{ .Values.endpoints.ldap.auth.admin.password }}'
search_filter = "{{ .Values.conf.ldap.config.filters.search }}"
search_base_dns = ["{{ .Values.conf.ldap.config.base_dns.search }}"]
group_search_filter = "{{ .Values.conf.ldap.config.filters.group_search }}"
group_search_base_dns = ["{{ .Values.conf.ldap.config.base_dns.group_search }}"]
[servers.attributes]
username = "uid"
surname = "sn"
member_of = "cn"
email = "mail"
[[servers.group_mappings]]
group_dn = "{{.Values.endpoints.ldap.auth.admin.bind_dn }}"
org_role = "Admin"
[[servers.group_mappings]]
group_dn = "*"
org_role = "Viewer"
provisioning:
dashboards:
apiVersion: 1
providers:
- name: 'osh-infra-dashboards'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: false
options:
path: /var/lib/grafana/dashboards
datasources:
#NOTE(srwilkers): The top key for each datasource (eg: monitoring) must
# map to the key name for the datasource's endpoint entry in the endpoints
# tree
monitoring:
name: prometheus
type: prometheus
access: proxy
orgId: 1
editable: true
basicAuth: true
grafana:
auth.ldap:
enabled: true
config_file: /etc/grafana/ldap.toml
paths:
data: /var/lib/grafana/data
plugins: /var/lib/grafana/plugins
provisioning: /var/lib/grafana/provisioning
server:
protocol: http
http_port: 3000
database:
type: mysql
session:
provider: mysql
provider_config: null
cookie_name: grafana_sess
cookie_secure: false
session_life_time: 86400
security:
admin_user: ${GF_SECURITY_ADMIN_USER}
admin_password: ${GF_SECURITY_ADMIN_PASSWORD}
cookie_username: grafana_user
cookie_remember_name: grafana_remember
login_remember_days: 7
users:
allow_sign_up: false
allow_org_create: false
auto_assign_org: true
default_theme: dark
log:
mode: console
level: info
grafana_net:
url: https://grafana.net
dashboards:
prometheus:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus which you want to monitor
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.6.0
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: text
name: Text
version: ''
annotations:
list:
- builtIn: 1
datasource: "-- Grafana --"
enable: true
hide: true
iconColor: rgba(0, 211, 255, 1)
name: Annotations & Alerts
type: dashboard
- datasource: "$datasource"
enable: true
expr: count(sum(up{instance="$instance"}) by (instance) < 1)
hide: false
iconColor: rgb(250, 44, 18)
limit: 100
name: downage
showIn: 0
step: 30s
tagKeys: instance
textFormat: prometheus down
titleFormat: Downage
type: alert
- datasource: "$datasource"
enable: true
expr: sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m]))
by (instance)
hide: false
iconColor: "#fceaca"
limit: 100
name: Reload
showIn: 0
step: 5m
tagKeys: instance
tags: []
titleFormat: Reload
type: tags
description: Dashboard for monitoring of Prometheus v2.x.x
editable: true
gnetId: 3681
graphTooltip: 1
hideControls: false
id:
links:
- icon: info
tags: []
targetBlank: true
title: 'Dashboard''s Github '
tooltip: Github repo of this dashboard
type: link
url: https://github.com/FUSAKLA/Prometheus2-grafana-dashboard
- icon: doc
tags: []
targetBlank: true
title: Prometheus Docs
tooltip: ''
type: link
url: http://prometheus.io/docs/introduction/overview/
refresh: 5m
rows:
- collapse: false
height: 161
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#bf1b00"
datasource: prometheus
decimals: 1
format: s
gauge:
maxValue: 1000000
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 41
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: time() - process_start_time_seconds{instance="$instance"}
format: time_series
instant: false
intervalFactor: 2
refId: A
thresholds: ''
title: Uptime
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#bf1b00"
datasource: prometheus
format: short
gauge:
maxValue: 1000000
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 42
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: prometheus_tsdb_head_series{instance="$instance"}
format: time_series
instant: false
intervalFactor: 2
refId: A
thresholds: '500000,800000,1000000'
title: Total count of time series
type: singlestat
valueFontSize: 150%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#d44a3a"
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 48
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: version
targets:
- expr: prometheus_build_info{instance="$instance"}
format: table
instant: true
intervalFactor: 2
refId: A
thresholds: ''
title: Version
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- "#299c46"
- rgba(237, 129, 40, 0.89)
- "#d44a3a"
datasource: prometheus
decimals: 2
format: ms
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 49
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: prometheus_tsdb_head_max_time{instance="$instance"} - prometheus_tsdb_head_min_time{instance="$instance"}
format: time_series
instant: true
intervalFactor: 2
refId: A
thresholds: ''
title: Actual head block length
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- content: <img src="https://cdn.worldvectorlogo.com/logos/prometheus.svg"/ height="140px">
height: ''
id: 50
links: []
mode: html
span: 1
title: ''
transparent: true
type: text
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- "#e6522c"
- rgba(237, 129, 40, 0.89)
- "#299c46"
datasource: prometheus
decimals: 1
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 52
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: '2'
format: time_series
intervalFactor: 2
refId: A
thresholds: '10,20'
title: ''
transparent: true
type: singlestat
valueFontSize: 200%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Header instance info
titleSize: h6
- collapse: false
height: '250'
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 15
legend:
avg: true
current: false
max: false
min: false
show: false
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: true
steppedLine: false
targets:
- expr: max(prometheus_engine_query_duration_seconds{instance="$instance"}) by
(instance, slice)
format: time_series
intervalFactor: 1
legendFormat: max duration for {{slice}}
metric: prometheus_local_storage_rushed_mode
refId: A
step: 900
thresholds: []
timeFrom:
timeShift:
title: Query elapsed time
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label: ''
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 17
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_tsdb_head_series_created_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: created on {{ instance }}
metric: prometheus_local_storage_maintain_series_duration_seconds_count
refId: A
step: 1800
- expr: sum(increase(prometheus_tsdb_head_series_removed_total{instance="$instance"}[$aggregation_interval]))
by (instance) * -1
format: time_series
intervalFactor: 2
legendFormat: removed on {{ instance }}
refId: B
thresholds: []
timeFrom:
timeShift:
title: Head series created/deleted
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 13
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: exceeded_sample_limit on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: A
step: 1800
- expr: sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: duplicate_timestamp on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: B
step: 1800
- expr: sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: out_of_bounds on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: C
step: 1800
- expr: sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: out_of_order on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: D
step: 1800
- expr: sum(increase(prometheus_rule_evaluation_failures_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: rule_evaluation_failure on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: G
step: 1800
- expr: sum(increase(prometheus_tsdb_compactions_failed_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: tsdb_compactions_failed on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: K
step: 1800
- expr: sum(increase(prometheus_tsdb_reloads_failures_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: tsdb_reloads_failures on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: L
step: 1800
- expr: sum(increase(prometheus_tsdb_head_series_not_found{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: head_series_not_found on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: N
step: 1800
- expr: sum(increase(prometheus_evaluator_iterations_missed_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: evaluator_iterations_missed on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: O
step: 1800
- expr: sum(increase(prometheus_evaluator_iterations_skipped_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: evaluator_iterations_skipped on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: P
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Prometheus errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Main info
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
description: ''
editable: true
error: false
fill: 1
grid: {}
id: 25
legend:
alignAsTable: true
avg: true
current: true
max: true
min: false
show: false
sort: max
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: prometheus_target_interval_length_seconds{instance="$instance",quantile="0.99"}
- 60
format: time_series
interval: 2m
intervalFactor: 1
legendFormat: "{{instance}}"
metric: ''
refId: A
step: 300
thresholds: []
timeFrom:
timeShift:
title: Scrape delay (counts with 1m scrape interval)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
logBase: 1
max:
min:
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 14
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: Queue length
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_evaluator_duration_seconds{instance="$instance"}) by (instance,
quantile)
format: time_series
intervalFactor: 2
legendFormat: Queue length
metric: prometheus_local_storage_indexing_queue_length
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Rule evaulation duration
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min: '0'
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Scrape & rule duration
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 18
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(http_requests_total{instance="$instance"}[$aggregation_interval]))
by (instance, handler) > 0
format: time_series
intervalFactor: 2
legendFormat: "{{ handler }} on {{ instance }}"
metric: ''
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Request count
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: none
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 16
legend:
avg: false
current: false
hideEmpty: true
hideZero: true
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: max(sum(http_request_duration_microseconds{instance="$instance"}) by (instance,
handler, quantile)) by (instance, handler) > 0
format: time_series
hide: false
intervalFactor: 2
legendFormat: "{{ handler }} on {{ instance }}"
refId: B
thresholds: []
timeFrom:
timeShift:
title: Request duration per handler
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: µs
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 19
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(http_request_size_bytes{instance="$instance", quantile="0.99"}[$aggregation_interval]))
by (instance, handler) > 0
format: time_series
hide: false
intervalFactor: 2
legendFormat: "{{ handler }} in {{ instance }}"
refId: B
thresholds: []
timeFrom:
timeShift:
title: Request size by handler
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Allocated bytes: "#F9BA8F"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max count collector: "#bf1b00"
Max count harvester: "#bf1b00"
Max to persist: "#3F6833"
RSS: "#890F02"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 8
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/Max.*/"
fill: 0
linewidth: 2
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_engine_queries{instance="$instance"}) by (instance, handler)
format: time_series
intervalFactor: 2
legendFormat: 'Current count '
metric: last
refId: A
step: 1800
- expr: sum(prometheus_engine_queries_concurrent_max{instance="$instance"}) by
(instance, handler)
format: time_series
intervalFactor: 2
legendFormat: Max count
metric: last
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Cont of concurent queries
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Requests & queries
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Alert queue capacity on o collector: "#bf1b00"
Alert queue capacity on o harvester: "#bf1b00"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 20
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/.*capacity.*/"
fill: 0
linewidth: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_notifications_queue_capacity{instance="$instance"})by (instance)
format: time_series
intervalFactor: 2
legendFormat: 'Alert queue capacity '
metric: prometheus_local_storage_checkpoint_last_size_bytes
refId: A
step: 1800
- expr: sum(prometheus_notifications_queue_length{instance="$instance"})by (instance)
format: time_series
intervalFactor: 2
legendFormat: 'Alert queue size on '
metric: prometheus_local_storage_checkpoint_last_size_bytes
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Alert queue size
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 21
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_notifications_alertmanagers_discovered{instance="$instance"})
by (instance)
format: time_series
intervalFactor: 2
legendFormat: Checkpoint chunks written/s
metric: prometheus_local_storage_checkpoint_series_chunks_written_sum
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Count of discovered alertmanagers
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: none
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 39
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_notifications_dropped_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: notifications_dropped on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: F
step: 1800
- expr: sum(increase(prometheus_rule_evaluation_failures_total{rule_type="alerting",instance="$instance"}[$aggregation_interval]))
by (rule_type,instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: rule_evaluation_failures on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Alerting errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Alerting
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 45
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: increase(prometheus_target_sync_length_seconds_count{scrape_job="kubernetes-service-endpoints"}[$aggregation_interval])
format: time_series
intervalFactor: 2
legendFormat: Count of target synces
refId: A
step: 240
thresholds: []
timeFrom:
timeShift:
title: Kubernetes SD sync count
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 46
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: exceeded_sample_limit on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: A
step: 1800
- expr: sum(increase(prometheus_sd_file_read_errors_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
interval: ''
intervalFactor: 2
legendFormat: sd_file_read_error on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: E
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Service discovery errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Service discovery
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 36
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_tsdb_reloads_total{instance="$instance"}[30m]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: "{{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: Reloaded block from disk
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 5
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_blocks_loaded{instance="$instance"}) by (instance)
format: time_series
intervalFactor: 2
legendFormat: Loaded data blocks
metric: prometheus_local_storage_memory_chunkdescs
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Loaded data blocks
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 3
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: prometheus_tsdb_head_series{instance="$instance"}
format: time_series
intervalFactor: 2
legendFormat: Time series count
metric: prometheus_local_storage_memory_series
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Time series total count
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 1
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(rate(prometheus_tsdb_head_samples_appended_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: samples/s {{instance}}
metric: prometheus_local_storage_ingested_samples_total
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Samples Appended per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: ''
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: TSDB stats
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
To persist: "#9AC48A"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 2
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/Max.*/"
fill: 0
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_head_chunks{instance="$instance"}) by (instance)
format: time_series
hide: false
intervalFactor: 2
legendFormat: Head chunk count
metric: prometheus_local_storage_memory_chunks
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Head chunks count
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 35
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: max(prometheus_tsdb_head_max_time{instance="$instance"}) by (instance)
- min(prometheus_tsdb_head_min_time{instance="$instance"}) by (instance)
format: time_series
intervalFactor: 2
legendFormat: "{{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: Length of head block
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 4
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(prometheus_tsdb_head_chunks_created_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: created on {{ instance }}
refId: B
- expr: sum(rate(prometheus_tsdb_head_chunks_removed_total{instance="$instance"}[$aggregation_interval]))
by (instance) * -1
format: time_series
intervalFactor: 2
legendFormat: deleted on {{ instance }}
refId: C
thresholds: []
timeFrom:
timeShift:
title: Head Chunks Created/Deleted per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Head block stats
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 33
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(increase(prometheus_tsdb_compaction_duration_sum{instance="$instance"}[30m])
/ increase(prometheus_tsdb_compaction_duration_count{instance="$instance"}[30m]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: "{{ instance }}"
refId: B
thresholds: []
timeFrom:
timeShift:
title: Compaction duration
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 34
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_head_gc_duration_seconds{instance="$instance"}) by
(instance, quantile)
format: time_series
intervalFactor: 2
legendFormat: "{{ quantile }} on {{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: Go Garbage collection duration
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 37
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(prometheus_tsdb_wal_truncate_duration_seconds{instance="$instance"})
by (instance, quantile)
format: time_series
intervalFactor: 2
legendFormat: "{{ quantile }} on {{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: WAL truncate duration seconds
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 38
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(tsdb_wal_fsync_duration_seconds{instance="$instance"}) by (instance,
quantile)
format: time_series
intervalFactor: 2
legendFormat: "{{ quantile }} {{ instance }}"
refId: A
thresholds: []
timeFrom:
timeShift:
title: WAL fsync duration seconds
tooltip:
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Data maintenance
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Allocated bytes: "#7EB26D"
Allocated bytes - 1m max: "#BF1B00"
Allocated bytes - 1m min: "#BF1B00"
Allocated bytes - 5m max: "#BF1B00"
Allocated bytes - 5m min: "#BF1B00"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
RSS: "#447EBC"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
decimals:
editable: true
error: false
fill: 1
id: 6
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/-/"
fill: 0
- alias: collector heap size
color: "#E0752D"
fill: 0
linewidth: 2
- alias: collector kubernetes memory limit
color: "#BF1B00"
fill: 0
linewidth: 3
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(process_resident_memory_bytes{instance="$instance"}) by (instance)
format: time_series
hide: false
intervalFactor: 2
legendFormat: Total resident memory - {{instance}}
metric: process_resident_memory_bytes
refId: B
step: 1800
- expr: sum(go_memstats_alloc_bytes{instance="$instance"}) by (instance)
format: time_series
hide: false
intervalFactor: 2
legendFormat: Total llocated bytes - {{instance}}
metric: go_memstats_alloc_bytes
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Memory
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Allocated bytes: "#F9BA8F"
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
RSS: "#890F02"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 7
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: rate(go_memstats_alloc_bytes_total{instance="$instance"}[$aggregation_interval])
format: time_series
intervalFactor: 2
legendFormat: Allocated Bytes/s
metric: go_memstats_alloc_bytes
refId: A
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Allocations per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
decimals: 2
editable: true
error: false
fill: 1
id: 9
legend:
alignAsTable: false
avg: false
current: false
hideEmpty: false
max: false
min: false
rightSide: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(process_cpu_seconds_total{instance="$instance"}[$aggregation_interval]))
by (instance)
format: time_series
intervalFactor: 2
legendFormat: CPU/s
metric: prometheus_local_storage_ingested_samples_total
refId: B
step: 1800
thresholds: []
timeFrom:
timeShift:
title: CPU per second
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values:
- avg
yaxes:
- format: none
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: RAM&CPU
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors:
Chunks: "#1F78C1"
Chunks to persist: "#508642"
Max chunks: "#052B51"
Max to persist: "#3F6833"
bars: false
dashLength: 10
dashes: false
datasource: "$datasource"
editable: true
error: false
fill: 1
id: 47
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum(increase(net_conntrack_dialer_conn_failed_total{instance="$instance"}[$aggregation_interval]))
by (instance) > 0
format: time_series
hide: false
interval: ''
intervalFactor: 2
legendFormat: conntrack_dialer_conn_failed on {{ instance }}
metric: prometheus_local_storage_chunk_ops_total
refId: M
step: 1800
thresholds: []
timeFrom:
timeShift:
title: Net errors
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Contrac errors
titleSize: h6
schemaVersion: 14
style: dark
tags:
- prometheus
templating:
list:
- auto: true
auto_count: 30
auto_min: 2m
current:
text: auto
value: "$__auto_interval"
hide: 0
label: aggregation intarval
name: aggregation_interval
options:
- selected: true
text: auto
value: "$__auto_interval"
- selected: false
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 2
type: interval
- allValue:
current: {}
datasource: "$datasource"
hide: 0
includeAll: false
label: Instance
multi: false
name: instance
options: []
query: label_values(prometheus_build_info, instance)
refresh: 2
regex: ''
sort: 2
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
- current:
text: Prometheus
value: Prometheus
hide: 0
label: Prometheus datasource
name: datasource
options: []
query: prometheus
refresh: 1
regex: ''
type: datasource
- current:
text: influxdb(heapster) - kokura
value: influxdb(heapster) - kokura
hide: 0
label: InfluxDB datasource
name: influx_datasource
options: []
query: influxdb
refresh: 1
regex: ''
type: datasource
time:
from: now-7d
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Prometheus2.0 (v1.0.0 by FUSAKLA)
version: 8
ceph_cluster:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: graph
name: Graph
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - Cluster
tags:
- ceph
- cluster
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 150px
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 21
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_health_status{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
title: Status
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
- op: "="
text: WARNING
value: '0'
- op: "="
text: HEALTHY
value: '1'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 22
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: count(ceph_pool_max_avail{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: Pools
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 33
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.025,0.1
title: Cluster Capacity
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 34
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.025,0.1
title: Used Capacity
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 23
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '70,80'
title: Current Utilization
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 100px
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 26
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: OSDs IN
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 40, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 27
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '1,1'
title: OSDs OUT
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 28
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: OSDs UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 40, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 29
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '1,1'
title: OSDs DOWN
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 30
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '250,300'
title: Average PGs per OSD
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 250px
panels:
- aliasColors:
Available: "#EAB839"
Total Capacity: "#447EBC"
Used: "#BF1B00"
total_avail: "#6ED0E0"
total_space: "#7EB26D"
total_used: "#890F02"
bars: false
datasource: prometheus
editable: true
error: false
fill: 4
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: '300'
id: 1
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 0
links: []
minSpan:
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: Total Capacity
fill: 0
linewidth: 3
stack: false
span: 4
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Available
refId: A
step: 60
- expr: ceph_cluster_total_used_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Used
refId: B
step: 60
- expr: ceph_cluster_total_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Total Capacity
refId: C
step: 60
timeFrom:
timeShift:
title: Capacity
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Total Capacity: "#7EB26D"
Used: "#BF1B00"
total_avail: "#6ED0E0"
total_space: "#7EB26D"
total_used: "#890F02"
bars: false
datasource: prometheus
decimals: 0
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
height: '300'
id: 3
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
minSpan:
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_op_w{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: sum(ceph_osd_op_r{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Read
refId: B
step: 60
timeFrom:
timeShift:
title: IOPS
tooltip:
msResolution: true
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: none
label: ''
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: '300'
id: 7
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_op_in_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: sum(ceph_osd_op_out_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Read
refId: B
step: 60
timeFrom:
timeShift:
title: Throughput
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
repeat:
showTitle: true
title: CLUSTER
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 18
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 12
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_total_objects{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Total
refId: A
step: 60
timeFrom:
timeShift:
title: Objects in the Cluster
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 19
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 6
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Total
refId: A
step: 60
- expr: sum(ceph_pg_active{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Active
refId: B
step: 60
- expr: sum(ceph_pg_inconsistent{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Inconsistent
refId: C
step: 60
- expr: sum(ceph_pg_creating{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Creating
refId: D
step: 60
- expr: sum(ceph_pg_recovering{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Recovering
refId: E
step: 60
- expr: sum(ceph_pg_down{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Down
refId: F
step: 60
timeFrom:
timeShift:
title: PGs
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 20
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 6
stack: true
steppedLine: false
targets:
- expr: sum(ceph_pg_degraded{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Degraded
refId: A
step: 60
- expr: sum(ceph_pg_stale{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Stale
refId: B
step: 60
- expr: sum(ceph_pg_undersized{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Undersized
refId: C
step: 60
timeFrom:
timeShift:
title: Stuck PGs
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
title: New row
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
current:
tags: []
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
annotations:
list: []
refresh: 5m
schemaVersion: 12
version: 26
links: []
gnetId: 917
description: "Ceph Cluster overview.\r\n"
ceph_osd:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: graph
name: Graph
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - OSD
tags:
- ceph
- osd
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 100px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 40, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
- from: '0'
text: DOWN
to: '0.99'
- from: '0.99'
text: UP
to: '1'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_up{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Status
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 40, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 8
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
- from: '0'
text: OUT
to: '0.99'
- from: '0.99'
text: IN
to: '1'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_in{ceph_daemon="$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Available
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Total OSDs
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1: 250
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2: 300
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: true
id: 5
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Average.*/"
fill: 0
stack: false
span: 10
stack: true
steppedLine: false
targets:
- expr: ceph_osd_numpg{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Number of PGs - {{ $osd }}
refId: A
step: 60
- expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Average Number of PGs in the Cluster
refId: B
step: 60
timeFrom:
timeShift:
title: PGs
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"})*100
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '60,80'
timeFrom:
title: Utilization
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: true
title: 'OSD: $osd'
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 2
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: true
steppedLine: false
targets:
- expr: ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ osd.$osd }}
metric: ceph_osd_used_bytes
refId: A
step: 60
- expr: ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"} - ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}
hide: false
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ $osd }}
metric: ceph_osd_avail_bytes
refId: B
step: 60
timeFrom:
timeShift:
title: OSD Storage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 5
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 9
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: false
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 2
points: true
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"$osd",application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ $osd }}
metric: ceph_osd_avail_bytes
refId: A
step: 60
timeFrom:
timeShift:
title: Utilization Variance
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label:
logBase: 1
max:
min:
show: true
- format: none
label:
logBase: 1
max:
min:
show: true
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
current:
selected: true
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: OSD
multi: false
name: osd
options: []
query: label_values(ceph_osd_metadata{release_group="$ceph_cluster"}, ceph_daemon)
refresh: 1
regex: ''
type: query
annotations:
list: []
refresh: 15m
schemaVersion: 12
version: 18
links: []
gnetId: 923
description: CEPH OSD Status.
ceph_pool:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: graph
name: Graph
version: ''
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - Pools
tags:
- ceph
- pools
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 4
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 2
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 0
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
fill: 0
linewidth: 4
stack: false
- alias: "/^Raw.*$/"
color: "#BF1B00"
fill: 0
linewidth: 4
span: 10
stack: true
steppedLine: false
targets:
- expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Total - {{ $pool }}
refId: A
step: 60
- expr: ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ $pool }}
refId: B
step: 60
- expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ $pool }}
refId: C
step: 60
- expr: ceph_pool_raw_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Raw - {{ $pool }}
refId: D
step: 60
timeFrom:
timeShift:
title: "[[pool_name]] Pool Storage"
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: (ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} / ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: ''
title: "[[pool_name]] Pool Usage"
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: true
title: 'Pool: $pool'
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 7
isNew: true
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: ceph_pool_objects{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Objects - {{ $pool_name }}
refId: A
step: 60
- expr: ceph_pool_dirty{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Dirty Objects - {{ $pool_name }}
refId: B
step: 60
timeFrom:
timeShift:
title: Objects in Pool [[pool_name]]
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
id: 4
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_rd{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read - {{ $pool_name }}
refId: B
step: 60
- expr: irate(ceph_pool_wr{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Write - {{ $pool_name }}
refId: A
step: 60
timeFrom:
timeShift:
title: "[[pool_name]] Pool IOPS"
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: none
label: IOPS
logBase: 1
max:
min: 0
show: true
- format: short
label: IOPS
logBase: 1
max:
min: 0
show: false
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 5
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_rd_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read Bytes - {{ $pool_name }}
refId: A
step: 60
- expr: irate(ceph_pool_wr_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Written Bytes - {{ $pool_name }}
refId: B
step: 60
timeFrom:
timeShift:
title: "[[pool_name]] Pool Throughput"
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
title: New row
time:
from: now-3h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
current:
selected: true
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: Pool
multi: false
name: pool
options: []
query: label_values(ceph_pool_objects{release_group="$ceph_cluster"}, pool_id)
refresh: 1
regex: ''
type: query
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: Pool
multi: false
name: pool_name
options: []
query: label_values(ceph_pool_metadata{release_group="$ceph_cluster",pool_id="[[pool]]" }, name)
refresh: 1
regex: ''
type: query
annotations:
list: []
refresh: 5m
schemaVersion: 12
version: 22
links: []
gnetId: 926
description: Ceph Pools dashboard.
elasticsearch:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.6.3
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list:
- builtIn: 1
datasource: "-- Grafana --"
enable: true
hide: true
iconColor: rgba(0, 211, 255, 1)
name: Annotations & Alerts
type: dashboard
editable: true
gnetId: 4358
graphTooltip: 1
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height:
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(178, 49, 13, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 8
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 5
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: (sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="green"})*2)+sum(elasticsearch_cluster_health_status{cluster=~"$cluster",color="yellow"})
format: time_series
intervalFactor: 3
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: '0,1,2'
title: Cluster health status
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: GREEN
value: '2'
- op: "="
text: YELLOW
value: '1'
- op: "="
text: RED
value: '0'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 10
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"})
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: ''
title: Nodes
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 9
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_number_of_data_nodes{cluster="$cluster"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: ''
title: Data nodes
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
hideTimeOverride: true
id: 16
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_number_of_pending_tasks{cluster="$cluster"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
metric: ''
refId: A
step: 40
thresholds: ''
title: Pending tasks
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Cluster
titleSize: h6
- collapse: false
height: ''
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 11
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
repeat: shard_type
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_active_primary_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: active primary shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 39
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_active_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: active shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 40
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_initializing_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: initializing shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 41
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_relocating_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: relocating shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: '50'
id: 42
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
minSpan: 2
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2.4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- expr: elasticsearch_cluster_health_delayed_unassigned_shards{cluster="$cluster"}
intervalFactor: 2
legendFormat: ''
refId: A
step: 40
thresholds: ''
title: unassigned shards
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Shards
titleSize: h6
- collapse: false
height:
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 30
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_master_node="true",name=~"$node"}
format: time_series
instant: false
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - master"
metric: ''
refId: A
step: 10
- expr: elasticsearch_process_cpu_percent{cluster="$cluster",es_data_node="true",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - data"
metric: ''
refId: B
step: 10
thresholds: []
timeFrom:
timeShift:
title: CPU usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: CPU usage
logBase: 1
max: 100
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 0
grid: {}
height: '400'
id: 31
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_jvm_memory_used_bytes{cluster="$cluster",name=~"$node",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - used: {{area}}"
metric: ''
refId: A
step: 10
- expr: elasticsearch_jvm_memory_committed_bytes{cluster="$cluster",name=~"$node",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - committed: {{area}}"
refId: B
step: 10
- expr: elasticsearch_jvm_memory_max_bytes{cluster="$cluster",name=~"$node",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - max: {{area}}"
refId: C
step: 10
thresholds: []
timeFrom:
timeShift:
title: JVM memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label: Memory
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 32
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: 1-(elasticsearch_filesystem_data_available_bytes{cluster="$cluster"}/elasticsearch_filesystem_data_size_bytes{cluster="$cluster",name=~"$node"})
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - {{path}}"
metric: ''
refId: A
step: 10
thresholds:
- colorMode: custom
fill: true
fillColor: rgba(216, 200, 27, 0.27)
op: gt
value: 0.8
- colorMode: custom
fill: true
fillColor: rgba(234, 112, 112, 0.22)
op: gt
value: 0.9
timeFrom:
timeShift:
title: Disk usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label: Disk Usage %
logBase: 1
max: 1
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 47
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sort: max
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: sent
transform: negative-Y
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_transport_tx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} -sent"
refId: D
step: 10
- expr: irate(elasticsearch_transport_rx_size_bytes_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} -received"
refId: C
step: 10
thresholds: []
timeFrom:
timeShift:
title: Network usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: Bps
label: Bytes/sec
logBase: 1
max:
min:
show: true
- format: pps
label: ''
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: System
titleSize: h6
- collapse: false
height: ''
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 1
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: elasticsearch_indices_docs{cluster="$cluster",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents count
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Documents
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 24
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents indexed rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: index calls/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 25
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_docs_deleted{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents deleted rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Documents/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 26
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Documents merged rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Documents/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Documents
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 48
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_indices_indexing_index_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - indexing"
metric: ''
refId: A
step: 4
- expr: irate(elasticsearch_indices_search_query_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - query"
refId: B
step: 4
- expr: irate(elasticsearch_indices_search_fetch_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - fetch"
refId: C
step: 4
- expr: irate(elasticsearch_indices_merges_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - merges"
refId: D
step: 4
- expr: irate(elasticsearch_indices_refresh_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - refresh"
refId: E
step: 4
- expr: irate(elasticsearch_indices_flush_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - flush"
refId: F
step: 4
thresholds: []
timeFrom:
timeShift:
title: Total Operations rate
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Operations/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 49
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ name }} - indexing"
metric: ''
refId: A
step: 4
- expr: irate(elasticsearch_indices_search_query_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - query"
refId: B
step: 4
- expr: irate(elasticsearch_indices_search_fetch_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - fetch"
refId: C
step: 4
- expr: irate(elasticsearch_indices_merges_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - merges"
refId: D
step: 4
- expr: irate(elasticsearch_indices_refresh_total_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - refresh"
refId: E
step: 4
- expr: irate(elasticsearch_indices_flush_time_ms_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{ name }} - flush"
refId: F
step: 4
thresholds: []
timeFrom:
timeShift:
title: Total Operations time
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Total Operations stats
titleSize: h6
- collapse: false
height: ''
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 33
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: 'rate(elasticsearch_indices_search_query_time_seconds{cluster="$cluster",name=~"$node"}[$interval]) '
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: Query time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 5
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_indexing_index_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: Indexing time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ms
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 3
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_merges_total_time_seconds_total{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: Merging time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Times
titleSize: h6
- collapse: false
height:
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 4
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: elasticsearch_indices_fielddata_memory_size_bytes{cluster="$cluster",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Field data memory size
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label: Memory
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 34
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_fielddata_evictions{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Field data evictions
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Evictions/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 35
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: elasticsearch_indices_query_cache_memory_size_bytes{cluster="$cluster",name=~"$node"}
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Query cache size
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label: Size
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 36
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_indices_query_cache_evictions{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}}"
metric: ''
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Query cache evictions
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: Evictions/s
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Caches
titleSize: h6
- collapse: false
height: 728
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
id: 45
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: ' irate(elasticsearch_thread_pool_rejected_count{cluster="$cluster",name=~"$node"}[$interval])'
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool operations rejected
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
id: 46
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool operations queued
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
height: ''
id: 43
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: elasticsearch_thread_pool_active_count{cluster="$cluster",name=~"$node"}
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool threads active
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
id: 44
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sort: avg
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: irate(elasticsearch_thread_pool_completed_count{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
intervalFactor: 2
legendFormat: "{{name}} - {{ type }}"
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Thread Pool operations completed
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Thread Pool
titleSize: h6
- collapse: false
height:
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 7
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: true
steppedLine: false
targets:
- expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}} - {{gc}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: GC count
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: GCs
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
height: '400'
id: 27
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: true
min: true
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(elasticsearch_jvm_gc_collection_seconds_count{cluster="$cluster",name=~"$node"}[$interval])
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{name}} - {{gc}}"
metric: ''
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: GC time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
label: Time
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: JVM Garbage Collection
titleSize: h6
schemaVersion: 14
style: dark
tags:
- elasticsearch
- App
templating:
list:
- auto: true
auto_count: 30
auto_min: 10s
current:
text: auto
value: "$__auto_interval"
hide: 0
label: Interval
name: interval
options:
- selected: true
text: auto
value: "$__auto_interval"
- selected: false
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 2
type: interval
- allValue:
current: {}
datasource: "prometheus"
hide: 0
includeAll: false
label: Instance
multi: false
name: cluster
options: []
query: label_values(elasticsearch_cluster_health_status,cluster)
refresh: 1
regex: ''
sort: 1
tagValuesQuery:
tags: []
tagsQuery:
type: query
useTags: false
- allValue:
current: {}
datasource: "prometheus"
hide: 0
includeAll: true
label: node
multi: true
name: node
options: []
query: label_values(elasticsearch_process_cpu_percent,name)
refresh: 1
regex: ''
sort: 1
tagValuesQuery:
tags: []
tagsQuery:
type: query
useTags: false
time:
from: now-12h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Elasticsearch
version: 1
description: Elasticsearch detailed dashboard
hosts_containers:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: graph
name: Graph
version: ''
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.3.0
id:
title: Container Metrics (cAdvisor)
description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU
/ Memory / Filesystem usage as well as individual pod, containers, systemd services
statistics. Uses cAdvisor metrics only.
tags:
- kubernetes
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 200px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
height: 200px
id: 32
isNew: true
legend:
alignAsTable: false
avg: true
current: true
max: false
min: false
rightSide: false
show: false
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))
interval: 10s
intervalFactor: 1
legendFormat: Received
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))'
interval: 10s
intervalFactor: 1
legendFormat: Sent
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: Network I/O pressure
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: Bps
label:
logBase: 1
max:
min:
show: false
title: Network I/O pressure
- collapse: false
editable: true
height: 250px
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 4
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"})
/ sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: 65, 90
title: Cluster memory usage
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 6
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
/ sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: 65, 90
title: Cluster CPU usage (5m avg)
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 7
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
/ sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
* 100
interval: 10s
intervalFactor: 1
legendFormat: ''
metric: ''
refId: A
step: 10
thresholds: 65, 90
title: Cluster filesystem usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 9
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 20%
prefix: ''
prefixFontSize: 20%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 10
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 11
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: " cores"
postfixFontSize: 30%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 12
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: " cores"
postfixFontSize: 30%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 13
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 14
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: false
title: Total usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 17
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ pod_name }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: Pods CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
transparent: false
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
showTitle: false
title: Pods CPU usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 23
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (systemd_service_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "{{ systemd_service_name }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: System services CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: System services CPU usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 24
isNew: true
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: false
min: false
rightSide: true
show: true
sideWidth:
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: 'pod: {{ pod_name }} | {{ container_name }}'
metric: container_cpu
refId: A
step: 10
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})'
metric: container_cpu
refId: B
step: 10
- expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)
interval: 10s
intervalFactor: 1
legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}'
metric: container_cpu
refId: C
step: 10
timeFrom:
timeShift:
title: Containers CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers CPU usage
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 20
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "{{ id }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: All processes CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
showTitle: false
title: All processes CPU usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 25
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"})
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ pod_name }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: Pods memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Pods memory usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 26
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"})
by (systemd_service_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ systemd_service_name }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: System services memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: System services memory usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 27
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"})
by (container_name, pod_name)
interval: 10s
intervalFactor: 1
legendFormat: 'pod: {{ pod_name }} | {{ container_name }}'
metric: container_memory_usage:sort_desc
refId: A
step: 10
- expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"})
by (kubernetes_io_hostname, name, image)
interval: 10s
intervalFactor: 1
legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})'
metric: container_memory_usage:sort_desc
refId: B
step: 10
- expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"})
by (kubernetes_io_hostname, rkt_container_name)
interval: 10s
intervalFactor: 1
legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}'
metric: container_memory_usage:sort_desc
refId: C
step: 10
timeFrom:
timeShift:
title: Containers memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers memory usage
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 28
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"})
by (id)
interval: 10s
intervalFactor: 1
legendFormat: "{{ id }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: All processes memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: All processes memory usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 16
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "-> {{ pod_name }}"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)'
interval: 10s
intervalFactor: 1
legendFormat: "<- {{ pod_name }}"
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: Pods network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Pods network I/O
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 30
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> pod: {{ pod_name }} | {{ container_name }}"
metric: network
refId: B
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- pod: {{ pod_name }} | {{ container_name }}"
metric: network
refId: D
step: 10
- expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name
}})"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name
}})"
metric: network
refId: C
step: 10
- expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name
}}"
metric: network
refId: E
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name
}}"
metric: network
refId: F
step: 10
timeFrom:
timeShift:
title: Containers network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers network I/O
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 29
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)
interval: 10s
intervalFactor: 1
legendFormat: "-> {{ id }}"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)'
interval: 10s
intervalFactor: 1
legendFormat: "<- {{ id }}"
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: All processes network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: All processes network I/O
time:
from: now-5m
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- allValue: ".*"
current: {}
datasource: prometheus
hide: 0
includeAll: true
multi: false
name: Node
options: []
query: label_values(kubernetes_io_hostname)
refresh: 1
type: query
annotations:
list: []
refresh: 5m
schemaVersion: 12
version: 13
links: []
gnetId: 315
rabbitmq:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.2.0
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId: 2121
graphTooltip: 0
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 266
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 13
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
metric: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
refId: A
step: 2
thresholds: Up,Down
timeFrom: 30s
title: RabbitMQ Server
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
- op: "="
text: Down
value: '0'
- op: "="
text: Up
value: '1'
valueName: current
- alert:
conditions:
- evaluator:
params:
- 1
type: lt
operator:
type: and
query:
params:
- A
- 10s
- now
reducer:
params: []
type: last
type: query
- evaluator:
params: []
type: no_value
operator:
type: and
query:
params:
- A
- 10s
- now
reducer:
params: []
type: last
type: query
executionErrorState: alerting
frequency: 60s
handler: 1
message: Some of the RabbitMQ node is down
name: Node Stats alert
noDataState: no_data
notifications: []
aliasColors: {}
bars: true
datasource: prometheus
decimals: 0
fill: 1
id: 12
legend:
alignAsTable: true
avg: false
current: true
max: false
min: false
show: true
total: false
values: true
lines: false
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 9
stack: false
steppedLine: false
targets:
- expr: rabbitmq_running{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}"
metric: rabbitmq_running
refId: A
step: 2
thresholds:
- colorMode: critical
fill: true
line: true
op: lt
value: 1
timeFrom: 30s
timeShift:
title: Node up Stats
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 6
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_exchangesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:exchanges"
metric: rabbitmq_exchangesTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Exchanges
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 4
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_channelsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:channels"
metric: rabbitmq_channelsTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Channels
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 3
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_consumersTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:consumers"
metric: rabbitmq_consumersTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Consumers
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 5
legend:
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_connectionsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:connections"
metric: rabbitmq_connectionsTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Connections
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 7
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_queuesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{instance}}:queues"
metric: rabbitmq_queuesTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Queues
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 8
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: sum by (vhost)(rabbitmq_queue_messages_ready{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:ready"
metric: rabbitmq_queue_messages_ready
refId: A
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_published_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:published"
metric: rabbitmq_queue_messages_published_total
refId: B
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:delivered"
metric: rabbitmq_queue_messages_delivered_total
refId: C
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged{application="prometheus_rabbitmq_exporter",release_group="$rabbit"})
intervalFactor: 2
legendFormat: "{{vhost}}:unack"
metric: ack
refId: D
step: 2
thresholds: []
timeFrom:
timeShift:
title: Messages/host
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 2
legend:
alignAsTable: true
avg: false
current: true
max: false
min: false
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_queue_messages{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{queue}}:{{durable}}"
metric: rabbitmq_queue_messages
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Messages / Queue
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 9
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_node_mem_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: rabbitmq_node_mem_used
refId: A
step: 2
- expr: rabbitmq_node_mem_limit{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:limit"
metric: node_mem
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: Memory
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: decbytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 10
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_fd_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: ''
refId: A
step: 2
- expr: rabbitmq_fd_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:total"
metric: node_mem
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: FIle descriptors
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 11
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_sockets_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: ''
refId: A
step: 2
- expr: rabbitmq_sockets_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}
intervalFactor: 2
legendFormat: "{{node}}:total"
metric: ''
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: Sockets
tooltip:
shared: true
sort: 0
value_type: individual
transparent: false
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list:
- current:
tags: []
text: Prometheus
value: Prometheus
hide: 0
label:
name: datasource
options: []
query: prometheus
refresh: 1
regex: ''
type: datasource
- current: {}
hide: 0
label: null
name: rabbit
options: []
type: query
query: label_values(rabbitmq_up, release_group)
refresh: 1
sort: 1
datasource: prometheus
time:
from: now-5m
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: RabbitMQ Metrics
version: 17
description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections,
Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.'
kubernetes_capacity_planning:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
description: ''
editable: true
gnetId: 22
graphTooltip: 0
hideControls: false
id:
links: []
refresh: false
rows:
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100
hide: false
intervalFactor: 10
legendFormat: ''
refId: A
step: 50
thresholds: []
timeFrom:
timeShift:
title: Idle cpu
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: cpu usage
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(node_load1)
intervalFactor: 4
legendFormat: load 1m
refId: A
step: 20
target: ''
- expr: sum(node_load5)
intervalFactor: 4
legendFormat: load 5m
refId: B
step: 20
target: ''
- expr: sum(node_load15)
intervalFactor: 4
legendFormat: load 15m
refId: C
step: 20
target: ''
thresholds: []
timeFrom:
timeShift:
title: System load
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 4
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"}
yaxis: 2
spaceLength: 10
span: 9
stack: true
steppedLine: false
targets:
- expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)
intervalFactor: 2
legendFormat: memory usage
metric: memo
refId: A
step: 10
target: ''
- expr: sum(node_memory_Buffers)
interval: ''
intervalFactor: 2
legendFormat: memory buffers
metric: memo
refId: B
step: 10
target: ''
- expr: sum(node_memory_Cached)
interval: ''
intervalFactor: 2
legendFormat: memory cached
metric: memo
refId: C
step: 10
target: ''
- expr: sum(node_memory_MemFree)
interval: ''
intervalFactor: 2
legendFormat: memory free
metric: memo
refId: D
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
intervalFactor: 2
metric: ''
refId: A
step: 60
target: ''
thresholds: 80, 90
title: Memory usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 246
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: read
yaxis: 1
- alias: '{instance="172.17.0.1:9100"}'
yaxis: 2
- alias: io time
yaxis: 2
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_disk_bytes_read[5m]))
hide: false
intervalFactor: 4
legendFormat: read
refId: A
step: 20
target: ''
- expr: sum(rate(node_disk_bytes_written[5m]))
intervalFactor: 4
legendFormat: written
refId: B
step: 20
- expr: sum(rate(node_disk_io_time_ms[5m]))
intervalFactor: 4
legendFormat: io time
refId: C
step: 20
thresholds: []
timeFrom:
timeShift:
title: Disk I/O
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: ms
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 12
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
/ sum(node_filesystem_size{device!="rootfs"})
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 0.75, 0.9
title: Disk space usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m]))
hide: false
intervalFactor: 2
legendFormat: ''
refId: A
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network received
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m]))
hide: false
intervalFactor: 2
legendFormat: ''
refId: B
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network transmitted
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 276
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 11
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum(kube_pod_info)
format: time_series
intervalFactor: 2
legendFormat: Current number of Pods
refId: A
step: 10
- expr: sum(kube_node_status_capacity_pods)
format: time_series
intervalFactor: 2
legendFormat: Maximum capacity of pods
refId: B
step: 10
thresholds: []
timeFrom:
timeShift:
title: Cluster Pod Utilization
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
* 100
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 60
target: ''
thresholds: '80,90'
title: Pod Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Kubernetes Capacity Planning
version: 4
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
kubernetes_cluster_status:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 129
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 6
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0)
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Control Plane UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: UP
value: 'null'
valueName: total
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 6
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '3,5'
title: Alerts Firing
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Cluster Health
titleSize: h6
- collapse: false
height: 168
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 1
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: API Servers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 2
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"}))
* 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: Controller Managers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 3
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"}))
* 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: Schedulers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
hideTimeOverride: false
id: 4
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h])
> 5)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Crashlooping Control Plane Pods
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Control Plane Status
titleSize: h6
- collapse: false
height: 158
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 8
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m]))
* 100)) / count(node_cpu{job="node-exporter",mode="idle"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: CPU Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Memory Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 9
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
/ sum(node_filesystem_size{device!="rootfs"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Filesystem Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
* 100
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Pod Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Capacity Planing
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-6h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: ''
title: Kubernetes Cluster Status
version: 3
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
nodes:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
description: Dashboard to get an overview of one server
editable: true
gnetId: 22
graphTooltip: 0
hideControls: false
id:
links: []
refresh: false
rows:
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m]))
* 100)
hide: false
intervalFactor: 10
legendFormat: "{{cpu}}"
refId: A
step: 50
thresholds: []
timeFrom:
timeShift:
title: Idle cpu
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: cpu usage
logBase: 1
max: 100
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: node_load1{instance="$server"}
intervalFactor: 4
legendFormat: load 1m
refId: A
step: 20
target: ''
- expr: node_load5{instance="$server"}
intervalFactor: 4
legendFormat: load 5m
refId: B
step: 20
target: ''
- expr: node_load15{instance="$server"}
intervalFactor: 4
legendFormat: load 15m
refId: C
step: 20
target: ''
thresholds: []
timeFrom:
timeShift:
title: System load
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 4
legend:
alignAsTable: false
avg: false
current: false
hideEmpty: false
hideZero: false
max: false
min: false
rightSide: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"}
yaxis: 2
spaceLength: 10
span: 9
stack: true
steppedLine: false
targets:
- expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"}
- node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}
hide: false
interval: ''
intervalFactor: 2
legendFormat: memory used
metric: ''
refId: C
step: 10
- expr: node_memory_Buffers{instance="$server"}
interval: ''
intervalFactor: 2
legendFormat: memory buffers
metric: ''
refId: E
step: 10
- expr: node_memory_Cached{instance="$server"}
intervalFactor: 2
legendFormat: memory cached
metric: ''
refId: F
step: 10
- expr: node_memory_MemFree{instance="$server"}
intervalFactor: 2
legendFormat: memory free
metric: ''
refId: D
step: 10
thresholds: []
timeFrom:
timeShift:
title: Memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} -
node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"})
/ node_memory_MemTotal{instance="$server"}) * 100
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 80, 90
title: Memory usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: read
yaxis: 1
- alias: '{instance="172.17.0.1:9100"}'
yaxis: 2
- alias: io time
yaxis: 2
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m]))
hide: false
intervalFactor: 4
legendFormat: read
refId: A
step: 20
target: ''
- expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m]))
intervalFactor: 4
legendFormat: written
refId: B
step: 20
- expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m]))
intervalFactor: 4
legendFormat: io time
refId: C
step: 20
thresholds: []
timeFrom:
timeShift:
title: Disk I/O
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: ms
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"}))
/ sum(node_filesystem_size{device!="rootfs",instance="$server"})
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 0.75, 0.9
title: Disk space usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m])
hide: false
intervalFactor: 2
legendFormat: "{{device}}"
refId: A
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network received
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m])
hide: false
intervalFactor: 2
legendFormat: "{{device}}"
refId: B
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network transmitted
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list:
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: false
name: server
options: []
query: label_values(node_boot_time, instance)
refresh: 1
regex: ''
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Nodes
version: 2
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
openstack_control_plane:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.5.2
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: text
name: Text
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 1
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 250px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 24
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=keystone
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_keystone_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Keystone
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 23
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=glance
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_glance_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Glance
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(202, 58, 40, 0.86)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 22
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=heat
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_heat_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Heat
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 21
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=neutron
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_neutron_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Neutron
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 20
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=nova
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_nova_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Nova
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 19
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=swift
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_swift_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Ceph
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 18
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=cinder
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_cinder_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Cinder
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 17
interval: "> 60s"
links:
- dashboard: Openstack Service
name: Drilldown dashboard
params: var-Service=placement
title: Openstack Service
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_placement_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Placement
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 16
interval: "> 60s"
links:
- dashboard: RabbitMQ Metrics
name: Drilldown dashboard
title: RabbitMQ Metrics
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: min(rabbitmq_up)
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: RabbitMQ
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 15
interval: "> 60s"
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: min(mysql_global_status_wsrep_ready)
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: MariaDB
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(225, 177, 40, 0.59)
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 14
interval: "> 60s"
links:
- dashboard: Nginx Stats
name: Drilldown dashboard
title: Nginx Stats
type: dashboard
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: '1'
text: OK
to: '99999999999999'
- from: '0'
text: CRIT
to: '0'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: sum_over_time(nginx_connections_total{type="active", namespace="openstack"}[5m])
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '0,1'
title: Nginx
type: singlestat
valueFontSize: 50%
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 13
interval: "> 60s"
links:
- dashboard: Memcached
name: Drilldown dashboard
title: Memcached
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: min(memcached_up)
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Memcached
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: OpenStack Services
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 11
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 3
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"} + openstack_total_free_vcpus{job="openstack-metrics",
region="$region"}
format: time_series
function: min
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
expr: openstack_total_used_vcpus{job="openstack-metrics", region="$region"}
format: time_series
function: max
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: VCPUs (total vs used)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 12
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 3
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"} + openstack_total_free_ram_MB{job="openstack-metrics",
region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
expr: openstack_total_used_ram_MB{job="openstack-metrics", region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: RAM (total vs used)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: mbytes
label: ''
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 13
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 3
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"} + openstack_total_free_disk_GB{job="openstack-metrics",
region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
expr: openstack_total_used_disk_GB{job="openstack-metrics", region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: Disk (used vs total)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: gbytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Virtual resources
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
enable: true
list:
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: false
name: region
options: []
query: label_values(openstack_exporter_cache_refresh_duration_seconds, region)
refresh: 1
regex: ''
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
collapse: false
enable: true
notice: false
now: true
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
status: Stable
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
type: timepicker
timezone: browser
title: OpenStack Metrics
version: 2
nginx_stats:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.5.2
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
annotations:
list: []
description: Show stats from the hnlq715/nginx-vts-exporter.
editable: true
gnetId: 2949
graphTooltip: 0
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 7
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum(nginx_upstream_responses_total{upstream=~"^$Upstream$"}) by (status_code,
upstream)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ status_code }}.{{ upstream }}"
metric: nginx_upstream_response
refId: A
step: 4
thresholds: []
timeFrom:
timeShift:
title: HTTP Response Codes by Upstream
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_upstream_requests_total{upstream=~"^$Upstream$"}[5m]))
by (upstream)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ upstream }}"
metric: nginx_upstream_requests
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Upstream Requests rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 5
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_upstream_bytes_total{upstream=~"^$Upstream$"}[5m])) by
(direction, upstream)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ direction }}.{{ upstream }}"
metric: nginx_upstream_bytes
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Upstream Bytes Transfer rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 1
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_connections_total[5m])) by (type)
format: time_series
intervalFactor: 2
legendFormat: "{{ type }}"
metric: nginx_server_connections
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Overall Connections rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 4
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_cache_total{ server_zone=~"$ingress"}[5m])) by (server_zone,
type)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ type }}.{{ server_zone }}"
metric: nginx_server_cache
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Cache Action rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_requests_total{ server_zone=~"$ingress" }[5m])) by (server_zone)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: "{{ server_zone }}"
metric: nginx_server_requests
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Overall Requests rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 2
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(irate(nginx_bytes_total{ server_zone=~"$ingress" }[5m])) by (direction,
server_zone)
format: time_series
intervalFactor: 2
legendFormat: "{{ direction }}.{{ server_zone }}"
metric: nginx_server_bytes
refId: A
step: 10
thresholds: []
timeFrom:
timeShift:
title: Overall Bytes Transferred rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags:
- prometheus
- nginx
templating:
list:
- allValue: ".*"
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: true
name: Upstream
options: []
query: label_values(nginx_upstream_bytes_total, upstream)
refresh: 1
regex: ''
sort: 1
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: true
name: ingress
options: []
query: label_values(nginx_bytes_total, server_zone)
refresh: 1
regex: "/^[^\\*_]+$/"
sort: 1
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Nginx Stats
version: 13
openstack-service:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.5.2
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
enable: true
list: []
editable: true
gnetId:
graphTooltip: 1
hideControls: false
id:
links: []
refresh: 5m
rows:
- collapse: false
height: 250px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(225, 177, 40, 0.59)
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval: "> 60s"
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
expr: openstack_check_[[Service]]_api{job="openstack-metrics"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '0,1'
title: ''
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: CRITICAL
value: '0'
- op: "="
text: OK
value: '1'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: "prometheus"
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 13
interval: "> 60s"
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
tableColumn: ''
targets:
- column: value
condition: ''
expr: sum(nginx_responses_total{server_zone=~"[[Service]].*", status_code="5xx"})
fill: ''
format: time_series
function: count
groupBy:
- interval: auto
params:
- auto
type: time
- params:
- '0'
type: fill
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
tags: []
thresholds: ''
title: HTTP 5xx errors
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 0
grid: {}
id: 7
interval: ">60s"
legend:
alignAsTable: true
avg: true
current: false
max: true
min: true
show: true
sortDesc: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 8
stack: false
steppedLine: false
targets:
- expr: sum(nginx_upstream_response_msecs_avg{upstream=~"openstack-[[Service]].*"})
by (upstream)
format: time_series
intervalFactor: 2
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: HTTP response time
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 1
grid: {}
id: 9
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: true
targets:
- alias: healthy
column: value
expr: openstack_check_[[Service]]_api
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
select: []
step: 120
tags: []
thresholds: []
timeFrom:
timeShift:
title: API Availability
tooltip:
msResolution: false
shared: false
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: none
label: ''
logBase: 1
max: 1
min: 0
show: false
- format: short
logBase: 1
max:
min:
show: false
- aliasColors:
'{status_code="2xx"}': "#629E51"
'{status_code="5xx"}': "#BF1B00"
bars: true
dashLength: 10
dashes: false
datasource: "prometheus"
editable: true
error: false
fill: 0
grid: {}
id: 8
interval: "> 60s"
legend:
alignAsTable: false
avg: false
current: false
hideEmpty: false
max: false
min: false
rightSide: false
show: true
total: false
values: false
lines: false
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 8
stack: true
steppedLine: false
targets:
- expr: sum(nginx_responses_total{server_zone=~"[[Service]].*"}) by (status_code)
format: time_series
intervalFactor: 2
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: Number of HTTP responses
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Service Status
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
enable: true
list:
- allValue:
current:
tags: []
text: cinder
value: cinder
hide: 0
includeAll: false
label:
multi: false
name: Service
options:
- selected: false
text: nova
value: nova
- selected: false
text: glance
value: glance
- selected: false
text: keystone
value: keystone
- selected: true
text: cinder
value: cinder
- selected: false
text: heat
value: heat
- selected: false
text: placement
value: placement
- selected: false
text: neutron
value: neutron
query: nova,glance,keystone,cinder,heat,placement,neutron
type: custom
time:
from: now-1h
to: now
timepicker:
collapse: false
enable: true
notice: false
now: true
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
status: Stable
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
type: timepicker
timezone: browser
title: Openstack Service
version: 4
coredns:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.3
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
annotations:
list: []
editable: true
gnetId: 5926
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 1
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
by (proto)
format: time_series
intervalFactor: 2
legendFormat: "{{proto}}"
refId: A
step: 60
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
format: time_series
intervalFactor: 2
legendFormat: total
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (total)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 12
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
- alias: other
yaxis: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_type_count_total{instance=~"$instance"}[5m]))
by (type)
intervalFactor: 2
legendFormat: "{{type}}"
refId: A
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (by qtype)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 2
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
by (zone)
intervalFactor: 2
legendFormat: "{{zone}}"
refId: A
step: 60
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
intervalFactor: 2
legendFormat: total
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (by zone)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: total
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_request_do_count_total{instance=~"$instance"}[5m]))
intervalFactor: 2
legendFormat: DO
refId: A
step: 40
- expr: sum(rate(coredns_dns_request_count_total{instance=~"$instance"}[5m]))
intervalFactor: 2
legendFormat: total
refId: B
step: 40
thresholds: []
timeFrom:
timeShift:
title: Requests (DO bit)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: tcp:90
yaxis: 2
- alias: 'tcp:99 '
yaxis: 2
- alias: tcp:50
yaxis: 2
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:99 "
refId: A
step: 60
- expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:90"
refId: B
step: 60
- expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:50"
refId: C
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (size, udp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 14
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: tcp:90
yaxis: 1
- alias: 'tcp:99 '
yaxis: 1
- alias: tcp:50
yaxis: 1
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:99 "
refId: A
step: 60
- expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:90"
refId: B
step: 60
- expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto))
intervalFactor: 2
legendFormat: "{{proto}}:50"
refId: C
step: 60
thresholds: []
timeFrom:
timeShift:
title: Requests (size,tcp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 5
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_dns_response_rcode_count_total{instance=~"$instance"}[5m]))
by (rcode)
intervalFactor: 2
legendFormat: "{{rcode}}"
refId: A
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (by rcode)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m]))
by (le, job))
intervalFactor: 2
legendFormat: 99%
refId: A
step: 40
- expr: histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m]))
by (le))
intervalFactor: 2
legendFormat: 90%
refId: B
step: 40
- expr: histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{instance=~"$instance"}[5m]))
by (le))
intervalFactor: 2
legendFormat: 50%
refId: C
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (duration)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: udp:50%
yaxis: 1
- alias: tcp:50%
yaxis: 2
- alias: tcp:90%
yaxis: 2
- alias: tcp:99%
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:99%"
refId: A
step: 40
- expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance="$instance",proto="udp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:90%"
refId: B
step: 40
- expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="udp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:50%"
metric: ''
refId: C
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (size, udp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 13
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: udp:50%
yaxis: 1
- alias: tcp:50%
yaxis: 1
- alias: tcp:90%
yaxis: 1
- alias: tcp:99%
yaxis: 1
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: 'histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:99%"
refId: A
step: 40
- expr: 'histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le,proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:90%"
refId: B
step: 40
- expr: 'histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~"$instance",proto="tcp"}[5m]))
by (le, proto)) '
intervalFactor: 2
legendFormat: "{{proto}}:50%"
metric: ''
refId: C
step: 40
thresholds: []
timeFrom:
timeShift:
title: Responses (size, tcp)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 15
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(coredns_cache_size{instance=~"$instance"}) by (type)
intervalFactor: 2
legendFormat: "{{type}}"
refId: A
step: 40
thresholds: []
timeFrom:
timeShift:
title: Cache (size)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 16
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: misses
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(coredns_cache_hits_total{instance=~"$instance"}[5m])) by (type)
intervalFactor: 2
legendFormat: hits:{{type}}
refId: A
step: 40
- expr: sum(rate(coredns_cache_misses_total{instance=~"$instance"}[5m])) by (type)
intervalFactor: 2
legendFormat: misses
refId: B
step: 40
thresholds: []
timeFrom:
timeShift:
title: Cache (hitrate)
tooltip:
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: pps
logBase: 1
max:
min: 0
show: true
- format: pps
logBase: 1
max:
min: 0
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
schemaVersion: 14
style: dark
tags:
- dns
- coredns
templating:
list:
- allValue: ".*"
current: {}
datasource: prometheus
hide: 0
includeAll: true
label: Instance
multi: false
name: instance
options: []
query: up{job="coredns"}
refresh: 1
regex: .*instance="(.*?)".*
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-3h
to: now
timepicker:
now: true
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: utc
title: CoreDNS
version: 3
description: A dashboard for the CoreDNS DNS server.