diff --git a/grafana/create-nodepool.sh b/grafana/create-nodepool.sh new file mode 100755 index 0000000000..469f9547a8 --- /dev/null +++ b/grafana/create-nodepool.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# +# Creates graphs for nodepool regions from a given provider +# +# Note we are somewhat particular about keeping these separate to +# avoid the idea that we are providing some sort of cross-provider +# benchmark. +# + +function create { + local provider="$1" + local stat_list="$2" + local output_file="$3" + + sed -e "s/%PROVIDER%/${provider}/; " \ + -e "s/%STAT_LIST%/${stat_list}/" \ + -e "s/%OUTPUT_FILE%/${output_file}/" \ + nodepool.template > ${output_file} +} + +create Rackspace 'rax-*' nodepool-rax.yaml +create Inap 'inap-*' nodepool-inap.yaml +create Limestone 'limestone-*' nodepool-limestone.yaml +create Linaro 'linaro-*' nodepool-linaro.yaml +create OVH 'ovh-*' nodepool-ovh.yaml +create Packethost 'packethost-*' nodepool-packethost.yaml +create Vexxhost 'vexxhost-*' nodepool-vexxhost.yaml +create Citycloud 'citycloud-*' nodepool-citycloud.yaml diff --git a/grafana/nodepool-citycloud.yaml b/grafana/nodepool-citycloud.yaml index 4f4539725a..999fdc73b0 100644 --- a/grafana/nodepool-citycloud.yaml +++ b/grafana/nodepool-citycloud.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: - title: 'Nodepool: City Cloud' + title: 'Nodepool: Citycloud' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.citycloud-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Citycloud Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Citycloud. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-citycloud.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'citycloud-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'citycloud-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'citycloud-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'citycloud-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'citycloud-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,91 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'citycloud-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'citycloud-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'citycloud-', '') - - title: Test Nodes (Kna1) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-kna1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (La1) + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-la1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (Lon1) + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-lon1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (Sto2) - type: graph + lines: true + nullPointMode: connected span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.citycloud-sto2.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-inap.yaml b/grafana/nodepool-inap.yaml index 6a2d0cd93c..a6ee224f92 100644 --- a/grafana/nodepool-inap.yaml +++ b/grafana/nodepool-inap.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: - title: 'Nodepool: INAP' + title: 'Nodepool: Inap' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.inap-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Inap Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Inap. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-inap.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'inap-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'inap-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'inap-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'inap-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'inap-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'inap-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'inap-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'inap-', '') - - title: Test Nodes (MTL01) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.inap-mtl01.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-limestone.yaml b/grafana/nodepool-limestone.yaml index a161b28293..03d35e56f8 100644 --- a/grafana/nodepool-limestone.yaml +++ b/grafana/nodepool-limestone.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Limestone' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.limestone-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Limestone Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Limestone. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-limestone.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'limestone-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'limestone-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'limestone-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'limestone-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'limestone-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'limestone-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'limestone-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'limestone-', '') - - title: Test Nodes (RegionOne) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.limestone-regionone.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-linaro.yaml b/grafana/nodepool-linaro.yaml index fa1efeb260..c88539d328 100644 --- a/grafana/nodepool-linaro.yaml +++ b/grafana/nodepool-linaro.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Linaro' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.linaro-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Linaro Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Linaro. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-linaro.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'linaro-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'linaro-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'linaro-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'linaro-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'linaro-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'linaro-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'linaro-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'linaro-', '') - - title: Test Nodes (MTL01) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.linaro-cn1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-ovh.yaml b/grafana/nodepool-ovh.yaml index 5ae44a3a73..29ff41c303 100644 --- a/grafana/nodepool-ovh.yaml +++ b/grafana/nodepool-ovh.yaml @@ -1,15 +1,33 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: OVH' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.ovh-* + refresh: true + type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + OVH Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for OVH. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-ovh.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -20,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -29,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -38,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -47,63 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.ovh-*.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputePostServers.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputePostServers.mean, '0.001'), 'GRA1') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeGetServersDetail.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeGetServersDetail.mean, '0.001'), 'GRA1') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeDeleteServers.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeDeleteServers.mean, '0.001'), 'GRA1') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeGetServers.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeGetServers.mean, '0.001'), 'GRA1') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.ovh-bhs1.ComputeGetLimits.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.task.ovh-gra1.ComputeGetLimits.mean, '0.001'), 'GRA1') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -111,74 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.ovh-bhs1.ready, '1m'), 'BHS1') - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.ovh-gra1.ready, '1m'), 'GRA1') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.ovh-bhs1.error.*), '1m'), 'BHS1') - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.ovh-gra1.error.*), '1m'), 'GRA1') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: alias(scale(stats.timers.nodepool.launch.provider.ovh-bhs1.ready.mean, '0.001'), 'BHS1') - - target: alias(scale(stats.timers.nodepool.launch.provider.ovh-gra1.ready.mean, '0.001'), 'GRA1') - - title: Test Nodes (BHS1) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.ovh-bhs1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (GRA1) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.ovh-gra1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Images + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations showTitle: true height: 250px panels: - - title: Image Uploads + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(scale(averageSeries(stats.timers.nodepool.image_update.*.ovh-bhs1.mean), '0.001'), 'BHS1') - - target: alias(scale(averageSeries(stats.timers.nodepool.image_update.*.ovh-gra1.mean), '0.001'), 'GRA1') + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-packethost.yaml b/grafana/nodepool-packethost.yaml index f1eb31e93e..70dfb43151 100644 --- a/grafana/nodepool-packethost.yaml +++ b/grafana/nodepool-packethost.yaml @@ -1,7 +1,11 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Packethost' templating: - - name: provider + - name: region includeAll: true multi: true query: stats.gauges.nodepool.provider.packethost-* @@ -9,14 +13,21 @@ dashboard: type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Packethost Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Packethost. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-packethost.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -27,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -36,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -45,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -54,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'packethost-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'packethost-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'packethost-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'packethost-', '') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetLimits.mean, '0.001'), 4), 'packethost-', '') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -113,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'packethost-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'packethost-', '') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'packethost-', '') - - title: Test Nodes (us-west-1) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.packethost-us-west-1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-rax-experimental.yaml b/grafana/nodepool-rax-experimental.yaml deleted file mode 100644 index 932f062e6c..0000000000 --- a/grafana/nodepool-rax-experimental.yaml +++ /dev/null @@ -1,177 +0,0 @@ -dashboard: - title: 'Nodepool: Rackspace Experimental' - templating: - - name: provider - includeAll: true - multi: true - query: stats.gauges.nodepool.provider.rax-* - refresh: true - type: query - rows: - - title: Description - height: 100px - panels: - - title: Description - content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** - - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-rax-experimental.yaml). - type: text - - title: Nodes - showTitle: true - height: 150px - panels: - - title: Building - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building) - type: singlestat - valueName: current - - title: Ready - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready) - type: singlestat - valueName: current - - title: In Use - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use) - type: singlestat - valueName: current - - title: Deleting - span: 3 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting) - type: singlestat - valueName: current - - title: API Operations - showTitle: true - height: 250px - panels: - - title: Create Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'rax-', '') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'rax-', '') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'rax-', '') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'rax-', '') - - title: Node Launches - showTitle: true - height: 250px - panels: - - title: Ready Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'rax-', '') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'rax-', '') - - title: Time to Ready - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'rax-', '') - - title: Test Nodes (DFW) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (IAD) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-iad.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (ORD) - type: graph - span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" - targets: - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.in-use, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-ord.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False diff --git a/grafana/nodepool-rax.yaml b/grafana/nodepool-rax.yaml index 929e9cebb6..083f25aa77 100644 --- a/grafana/nodepool-rax.yaml +++ b/grafana/nodepool-rax.yaml @@ -1,15 +1,33 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Rackspace' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.rax-* + refresh: true + type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Rackspace Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Rackspace. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-rax.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -20,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -29,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -38,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -47,68 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputePostServers.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputePostServers.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputePostServers.mean, '0.001'), 'ORD') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetServersDetail.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetServersDetail.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetServersDetail.mean, '0.001'), 'ORD') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeDeleteServers.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeDeleteServers.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeDeleteServers.mean, '0.001'), 'ORD') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetServers.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetServers.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetServers.mean, '0.001'), 'ORD') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetLimits.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetLimits.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetLimits.mean, '0.001'), 'ORD') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -116,80 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-dfw.ready, '1m'), 'DFW') - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-iad.ready, '1m'), 'IAD') - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-ord.ready, '1m'), 'ORD') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-dfw.error.*), '1m'), 'DFW') - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-iad.error.*), '1m'), 'IAD') - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-ord.error.*), '1m'), 'ORD') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: alias(scale(stats.timers.nodepool.launch.provider.rax-dfw.ready.mean, '0.001'), 'DFW') - - target: alias(scale(stats.timers.nodepool.launch.provider.rax-iad.ready.mean, '0.001'), 'IAD') - - target: alias(scale(stats.timers.nodepool.launch.provider.rax-ord.ready.mean, '0.001'), 'ORD') - - title: Test Nodes (DFW) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-dfw.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (IAD) + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-iad.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False - - title: Test Nodes (ORD) + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.rax-ord.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool-vexxhost.yaml b/grafana/nodepool-vexxhost.yaml index d6355dd6c3..46802f0864 100644 --- a/grafana/nodepool-vexxhost.yaml +++ b/grafana/nodepool-vexxhost.yaml @@ -1,15 +1,33 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + dashboard: title: 'Nodepool: Vexxhost' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.vexxhost-* + refresh: true + type: query rows: - title: Description - height: 100px + height: 150px panels: - title: Description content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + Vexxhost Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Vexxhost. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-vexxhost.yaml). type: text + - title: Nodes showTitle: true height: 150px @@ -20,7 +38,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.building) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) type: singlestat valueName: current - title: Ready @@ -29,7 +47,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.ready) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) type: singlestat valueName: current - title: In Use @@ -38,7 +56,7 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.in-use) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) type: singlestat valueName: current - title: Deleting @@ -47,58 +65,36 @@ dashboard: full: true show: true targets: - - target: sumSeries(stats.gauges.nodepool.provider.vexxhost-*.nodes.deleting) + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) type: singlestat valueName: current - - title: API Operations - showTitle: true - height: 250px + + - title: Test Nodes + height: 400px panels: - - title: Create Server + - title: Test Node History - $region type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputePostServers.mean, '0.001'), 'YMQ') - - title: Get Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeGetServersDetail.mean, '0.001'), 'YMQ') - - title: Delete Server - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeDeleteServers.mean, '0.001'), 'YMQ') - - title: List Servers - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeGetServers.mean, '0.001'), 'YMQ') - - title: Get Limits - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(stats.timers.nodepool.task.vexxhost-ca-ymq-1.ComputeGetLimits.mean, '0.001'), 'YMQ') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + - title: Node Launches showTitle: true height: 250px @@ -106,40 +102,96 @@ dashboard: - title: Ready Node Launch Attempts type: graph span: 4 + lines: false + bars: true nullPointMode: null as zero - leftYAxisLabel: "events / min" + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(smartSummarize(stats_counts.nodepool.launch.provider.vexxhost-ca-ymq-1.ready, '1m'), 'YMQ') - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.vexxhost-ca-ymq-1.error.*), '1m'), 'YMQ') + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') - title: Time to Ready type: graph span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false targets: - - target: alias(scale(stats.timers.nodepool.launch.provider.vexxhost-ca-ymq-1.ready.mean, '0.001'), 'YMQ') - - title: Test Nodes (YMQ) + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts type: graph span: 4 - stack: true - tooltip: - value_type: individual - leftYAxisLabel: "nodes" + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false targets: - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.building, 'Building') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.ready, 'Available') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.in-use, 'In Use') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.used, 'Used') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.nodes.deleting, 'Deleting') - - target: alias(stats.gauges.nodepool.provider.vexxhost-ca-ymq-1.max_servers, 'Max') - seriesOverrides: - - alias: Max - stack: False + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool.template b/grafana/nodepool.template new file mode 100644 index 0000000000..c7a48491ba --- /dev/null +++ b/grafana/nodepool.template @@ -0,0 +1,197 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + +dashboard: + title: 'Nodepool: %PROVIDER%' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.%STAT_LIST% + refresh: true + type: query + rows: + - title: Description + height: 150px + panels: + - title: Description + content: | + %PROVIDER% Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for %PROVIDER%. + + **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template). + + type: text + + - title: Nodes + showTitle: true + height: 150px + panels: + - title: Building + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) + type: singlestat + valueName: current + - title: Ready + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) + type: singlestat + valueName: current + - title: In Use + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) + type: singlestat + valueName: current + - title: Deleting + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) + type: singlestat + valueName: current + + - title: Test Nodes + height: 400px + panels: + - title: Test Node History - $region + type: graph + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false + targets: + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + + - title: Node Launches + showTitle: true + height: 250px + panels: + - title: Ready Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') + - title: Time to Ready + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: Create Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Delete Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: List Servers + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) + - title: Get Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4) diff --git a/grafana/nodepool.yaml b/grafana/nodepool.yaml deleted file mode 100644 index 3ed09528ce..0000000000 --- a/grafana/nodepool.yaml +++ /dev/null @@ -1,143 +0,0 @@ -dashboard: - title: Nodepool - rows: - - title: Description - height: 100px - panels: - - title: Description - content: | - **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).** - - If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.yaml). - type: text - - title: Nodes - showTitle: true - height: 150px - panels: - - title: Building - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.building) - type: singlestat - valueName: current - - title: Ready - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.ready) - type: singlestat - valueName: current - - title: In Use - span: 1 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.in-use) - type: singlestat - valueName: current - - title: Used - span: 1 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.used) - type: singlestat - valueName: current - - title: Deleting - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.deleting) - type: singlestat - valueName: current - - title: Hold - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.hold) - type: singlestat - valueName: current - - title: Failed - span: 2 - sparkline: - full: true - show: true - targets: - - target: sumSeries(stats.gauges.nodepool.provider.*.nodes.failed) - type: singlestat - valueName: current - - title: Nodepool Images - showTitle: true - height: 320px - panels: - - title: Building Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.building, 4) - type: graph - - title: Ready Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.ready, 4) - type: graph - - title: In-use Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.in-use, 4) - type: graph - - title: Deleting Nodes - span: 3 - targets: - - target: aliasByNode(stats.gauges.nodepool.label.*.nodes.deleting, 4) - type: graph - - title: Node Launches - showTitle: true - height: 250px - panels: - - title: Ready Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasByNode(summarize(stats_counts.nodepool.launch.provider.*.ready, '1m'), 4) - - title: Error Node Launch Attempts - type: graph - span: 4 - nullPointMode: null as zero - leftYAxisLabel: "events / min" - targets: - - target: aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.*.error.*), '1m'), 4) - - title: Time to Ready - type: graph - span: 4 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: aliasByNode(scale(stats.timers.nodepool.launch.provider.*.ready.mean, '0.001'), 5) - - title: Job Runtimes - showTitle: true - height: 250px - panels: - - title: gate-tempest-dsvm-neutron-full - type: graph - span: 6 - leftYAxisLabel: "time" - y_formats: - - s - - none - targets: - - target: alias(scale(averageSeries(stats.timers.nodepool.job.gate-tempest-dsvm-neutron-full-ubuntu-xenial.master.ubuntu-xenial.runtime.mean), '0.001'), 'Average')