From d34d7ad7630a055cc1c4edb3c88a3c4bd5e1044b Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Thu, 21 Jun 2018 14:46:32 +1000 Subject: [PATCH] Consolidate nodepool graphs All of these dashboards are the same, and have mostly copied all the same issues with them. This makes updating anything a massive pain. This implements a single dashboard template with a small script to create individual dashboards for each provider and its regions. I have included a range of fixes. The y-axis format has changed in later versions of grafana. The API time tracking is no longer scaled, but we just tell grafana it is in ms and it displays it correctly. The test nodes history graph is moved to the top, as it is probably the most interesting graph (note this splits itself out per region, if mulitple regions are selected). Values for "null as zero" are consistently set. Various formatting fixes for the labels are included. Change-Id: I5fbffaec3c82aa1fce0947f771de67edd15f7dfc --- grafana/create-nodepool.sh | 29 +++ grafana/nodepool-citycloud.yaml | 248 ++++++++++++------------ grafana/nodepool-inap.yaml | 211 +++++++++++++-------- grafana/nodepool-limestone.yaml | 209 ++++++++++++-------- grafana/nodepool-linaro.yaml | 209 ++++++++++++-------- grafana/nodepool-ovh.yaml | 241 ++++++++++++----------- grafana/nodepool-packethost.yaml | 209 ++++++++++++-------- grafana/nodepool-rax-experimental.yaml | 177 ----------------- grafana/nodepool-rax.yaml | 252 +++++++++++++------------ grafana/nodepool-vexxhost.yaml | 214 +++++++++++++-------- grafana/nodepool.template | 197 +++++++++++++++++++ grafana/nodepool.yaml | 143 -------------- 12 files changed, 1243 insertions(+), 1096 deletions(-) create mode 100755 grafana/create-nodepool.sh delete mode 100644 grafana/nodepool-rax-experimental.yaml create mode 100644 grafana/nodepool.template delete mode 100644 grafana/nodepool.yaml diff --git a/grafana/create-nodepool.sh b/grafana/create-nodepool.sh new file mode 100755 index 0000000000..469f9547a8 --- /dev/null +++ b/grafana/create-nodepool.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# +# Creates graphs for nodepool regions from a given provider +# +# Note we are somewhat particular about keeping these separate to +# avoid the idea that we are providing some sort of cross-provider +# benchmark. +# + +function create { + local provider="$1" + local stat_list="$2" + local output_file="$3" + + sed -e "s/%PROVIDER%/${provider}/; " \ + -e "s/%STAT_LIST%/${stat_list}/" \ + -e "s/%OUTPUT_FILE%/${output_file}/" \ + nodepool.template > ${output_file} +} + +create Rackspace 'rax-*' nodepool-rax.yaml +create Inap 'inap-*' nodepool-inap.yaml +create Limestone 'limestone-*' nodepool-limestone.yaml +create Linaro 'linaro-*' nodepool-linaro.yaml +create OVH 'ovh-*' nodepool-ovh.yaml +create Packethost 'packethost-*' nodepool-packethost.yaml +create Vexxhost 'vexxhost-*' nodepool-vexxhost.yaml +create Citycloud 'citycloud-*' nodepool-citycloud.yaml diff --git a/grafana/nodepool-citycloud.yaml b/grafana/nodepool-citycloud.yaml index 4f4539725a..999fdc73b0 100644 --- a/grafana/nodepool-citycloud.yaml +++ b/grafana/nodepool-citycloud.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: - title: 'Nodepool: City Cloud' + title: 'Nodepool: Citycloud' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.citycloud-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Citycloud Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Citycloud. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-citycloud.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'citycloud-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'citycloud-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'citycloud-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'citycloud-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'citycloud-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,91 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'citycloud-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'citycloud-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'citycloud-', '') - - title: Test Nodes (Kna1) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (La1) + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (Lon1) + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (Sto2) - type: graph + lines: true + nullPointMode: connected span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-inap.yaml b/grafana/nodepool-inap.yaml index 6a2d0cd93c..a6ee224f92 100644 --- a/grafana/nodepool-inap.yaml +++ b/grafana/nodepool-inap.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: - title: 'Nodepool: INAP' + title: 'Nodepool: Inap' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.inap-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Inap Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Inap. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-inap.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'inap-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'inap-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'inap-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'inap-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'inap-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'inap-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'inap-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'inap-', '') - - title: Test Nodes (MTL01) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-limestone.yaml b/grafana/nodepool-limestone.yaml index a161b28293..03d35e56f8 100644 --- a/grafana/nodepool-limestone.yaml +++ b/grafana/nodepool-limestone.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Limestone' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.limestone-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Limestone Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Limestone. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-limestone.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'limestone-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'limestone-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'limestone-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'limestone-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'limestone-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'limestone-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'limestone-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'limestone-', '') - - title: Test Nodes (RegionOne) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-linaro.yaml b/grafana/nodepool-linaro.yaml index fa1efeb260..c88539d328 100644 --- a/grafana/nodepool-linaro.yaml +++ b/grafana/nodepool-linaro.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Linaro' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.linaro-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Linaro Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Linaro. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-linaro.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'linaro-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'linaro-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'linaro-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'linaro-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'linaro-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'linaro-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'linaro-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'linaro-', '') - - title: Test Nodes (MTL01) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-ovh.yaml b/grafana/nodepool-ovh.yaml index 5ae44a3a73..29ff41c303 100644 --- a/grafana/nodepool-ovh.yaml +++ b/grafana/nodepool-ovh.yaml @@ -1,15 +1,33 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: OVH' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.ovh-* + refresh: true + type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + OVH Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for OVH. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-ovh.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -20,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -29,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -38,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -47,63 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputePostServers.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputePostServers.mean, '0.001'), 'GRA1') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeGetServersDetail.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeGetServersDetail.mean, '0.001'), 'GRA1') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeDeleteServers.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeDeleteServers.mean, '0.001'), 'GRA1') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeGetServers.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeGetServers.mean, '0.001'), 'GRA1') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeGetLimits.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeGetLimits.mean, '0.001'), 'GRA1') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -111,74 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.ovh-bhs1.ready, '1m'), 'BHS1') - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.ovh-gra1.ready, '1m'), 'GRA1') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.ovh-bhs1.error.*), '1m'), 'BHS1') - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.ovh-gra1.error.*), '1m'), 'GRA1') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: alias(scale(stats.timers.nodepool.launch.provider.ovh-bhs1.ready.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.launch.provider.ovh-gra1.ready.mean, '0.001'), 'GRA1') - - title: Test Nodes (BHS1) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (GRA1) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Images + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations showTitle: true height: 250px panels: - - title: Image Uploads + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(scale(averageSeries(stats.timers.nodepool.image_update.*.ovh-bhs1.mean), '0.001'), 'BHS1') - - target: alias(scale(averageSeries(stats.timers.nodepool.image_update.*.ovh-gra1.mean), '0.001'), 'GRA1') + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-packethost.yaml b/grafana/nodepool-packethost.yaml index f1eb31e93e..70dfb43151 100644 --- a/grafana/nodepool-packethost.yaml +++ b/grafana/nodepool-packethost.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Packethost' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.packethost-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Packethost Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Packethost. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-packethost.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'packethost-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'packethost-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'packethost-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'packethost-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'packethost-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'packethost-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'packethost-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'packethost-', '') - - title: Test Nodes (us-west-1) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-rax-experimental.yaml b/grafana/nodepool-rax-experimental.yaml deleted file mode 100644 index 932f062e6c..0000000000 --- a/grafana/nodepool-rax-experimental.yaml +++ /dev/null @@ -1,177 +0,0 @@ -dashboard: - title: 'Nodepool: Rackspace Experimental' - templating: - - name: provider - includeAll: true - multi: true - query: stats.gauges.nodepool.provider.rax-* - refresh: true - type: query - rows: - - title: Description - height: 100px - panels: - - title: Description - content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** - - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-rax-experimental.yaml). - type: text - - title: Nodes - showTitle: true - height: 150px - panels: - - title: Building - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) - type: singlestat - valueName: current - - title: Ready - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) - type: singlestat - valueName: current - - title: In Use - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) - type: singlestat - valueName: current - - title: Deleting - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) - type: singlestat - valueName: current - - title: API Operations - showTitle: true - height: 250px - panels: - - title: Create Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'rax-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'rax-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'rax-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'rax-', '') - - title: Node Launches - showTitle: true - height: 250px - panels: - - title: Ready Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'rax-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'rax-', '') - - title: Time to Ready - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'rax-', '') - - title: Test Nodes (DFW) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (IAD) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-iad.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (ORD) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.in-use, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-ord.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False diff --git a/grafana/nodepool-rax.yaml b/grafana/nodepool-rax.yaml index 929e9cebb6..083f25aa77 100644 --- a/grafana/nodepool-rax.yaml +++ b/grafana/nodepool-rax.yaml @@ -1,15 +1,33 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Rackspace' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.rax-* + refresh: true + type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Rackspace Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Rackspace. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-rax.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -20,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -29,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -38,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -47,68 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputePostServers.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputePostServers.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputePostServers.mean, '0.001'), 'ORD') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetServersDetail.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetServersDetail.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetServersDetail.mean, '0.001'), 'ORD') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeDeleteServers.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeDeleteServers.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeDeleteServers.mean, '0.001'), 'ORD') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetServers.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetServers.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetServers.mean, '0.001'), 'ORD') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetLimits.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetLimits.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetLimits.mean, '0.001'), 'ORD') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -116,80 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-dfw.ready, '1m'), 'DFW') - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-iad.ready, '1m'), 'IAD') - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-ord.ready, '1m'), 'ORD') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-dfw.error.*), '1m'), 'DFW') - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-iad.error.*), '1m'), 'IAD') - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-ord.error.*), '1m'), 'ORD') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: alias(scale(stats.timers.nodepool.launch.provider.rax-dfw.ready.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.launch.provider.rax-iad.ready.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.launch.provider.rax-ord.ready.mean, '0.001'), 'ORD') - - title: Test Nodes (DFW) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (IAD) + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-iad.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (ORD) + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-ord.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-vexxhost.yaml b/grafana/nodepool-vexxhost.yaml index d6355dd6c3..46802f0864 100644 --- a/grafana/nodepool-vexxhost.yaml +++ b/grafana/nodepool-vexxhost.yaml @@ -1,15 +1,33 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Vexxhost' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.vexxhost-* + refresh: true + type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Vexxhost Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Vexxhost. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-vexxhost.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -20,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -29,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -38,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -47,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputePostServers.mean, '0.001'), 'YMQ') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeGetServersDetail.mean, '0.001'), 'YMQ') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeDeleteServers.mean, '0.001'), 'YMQ') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeGetServers.mean, '0.001'), 'YMQ') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeGetLimits.mean, '0.001'), 'YMQ') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -106,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.vexxhost-ca-ymq-1.ready, '1m'), 'YMQ') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.vexxhost-ca-ymq-1.error.*), '1m'), 'YMQ') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: alias(scale(stats.timers.nodepool.launch.provider.vexxhost-ca-ymq-1.ready.mean, '0.001'), 'YMQ') - - title: Test Nodes (YMQ) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool.template b/grafana/nodepool.template new file mode 100644 index 0000000000..c7a48491ba --- /dev/null +++ b/grafana/nodepool.template @@ -0,0 +1,197 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + +dashboard: + title: 'Nodepool: %PROVIDER%' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.%STAT_LIST% + refresh: true + type: query + rows: + - title: Description + height: 150px + panels: + - title: Description + content: | + %PROVIDER% Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for %PROVIDER%. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). + + type: text + + - title: Nodes + showTitle: true + height: 150px + panels: + - title: Building + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) + type: singlestat + valueName: current + - title: Ready + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) + type: singlestat + valueName: current + - title: In Use + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) + type: singlestat + valueName: current + - title: Deleting + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) + type: singlestat + valueName: current + + - title: Test Nodes + height: 400px + panels: + - title: Test Node History - $region + type: graph + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false + targets: + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + + - title: Node Launches + showTitle: true + height: 250px + panels: + - title: Ready Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') + - title: Time to Ready + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool.yaml b/grafana/nodepool.yaml deleted file mode 100644 index 3ed09528ce..0000000000 --- a/grafana/nodepool.yaml +++ /dev/null @@ -1,143 +0,0 @@ -dashboard: - title: Nodepool - rows: - - title: Description - height: 100px - panels: - - title: Description - content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** - - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.yaml). - type: text - - title: Nodes - showTitle: true - height: 150px - panels: - - title: Building - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.building) - type: singlestat - valueName: current - - title: Ready - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.ready) - type: singlestat - valueName: current - - title: In Use - span: 1 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.in-use) - type: singlestat - valueName: current - - title: Used - span: 1 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.used) - type: singlestat - valueName: current - - title: Deleting - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.deleting) - type: singlestat - valueName: current - - title: Hold - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.hold) - type: singlestat - valueName: current - - title: Failed - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.failed) - type: singlestat - valueName: current - - title: Nodepool Images - showTitle: true - height: 320px - panels: - - title: Building Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.building, 4) - type: graph - - title: Ready Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.ready, 4) - type: graph - - title: In-use Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.in-use, 4) - type: graph - - title: Deleting Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.deleting, 4) - type: graph - - title: Node Launches - showTitle: true - height: 250px - panels: - - title: Ready Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasByNode(summarize(stats_counts.nodepool.launch.provider.*.ready, '1m'), 4) - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.*.error.*), '1m'), 4) - - title: Time to Ready - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasByNode(scale(stats.timers.nodepool.launch.provider.*.ready.mean, '0.001'), 5) - - title: Job Runtimes - showTitle: true - height: 250px - panels: - - title: gate-tempest-dsvm-neutron-full - type: graph - span: 6 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(averageSeries(stats.timers.nodepool.job.gate-tempest-dsvm-neutron-full-ubuntu-xenial.master.ubuntu-xenial.runtime.mean), '0.001'), 'Average')