From 8e7b15905bb72839b58ff5725c7f4c6a1c00165a Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Fri, 6 Sep 2024 12:43:59 +0000 Subject: [PATCH] Add RackSpace Flex Nodepool dashboard in Grafana We're starting to increase utilization in this provider, so it'll be useful to have easier ways to monitor things there. Change-Id: I58d6b301c314c95b3e1a7abd0d4404a42f3e7e86 --- grafana/create-nodepool.sh | 1 + grafana/nodepool-raxflex.yaml | 252 ++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 grafana/nodepool-raxflex.yaml diff --git a/grafana/create-nodepool.sh b/grafana/create-nodepool.sh index 7389e0f7bb..b1a988546b 100755 --- a/grafana/create-nodepool.sh +++ b/grafana/create-nodepool.sh @@ -20,6 +20,7 @@ function create { } create Rackspace 'rax-*' nodepool-rax.yaml +create Rackspace-Flex 'raxflex-*' nodepool-raxflex.yaml create OVH 'ovh-*' nodepool-ovh.yaml create Vexxhost 'vexxhost-*' nodepool-vexxhost.yaml create OSUOSL 'osuosl-*' nodepool-osuosl.yaml diff --git a/grafana/nodepool-raxflex.yaml b/grafana/nodepool-raxflex.yaml new file mode 100644 index 0000000000..2972d5e3a7 --- /dev/null +++ b/grafana/nodepool-raxflex.yaml @@ -0,0 +1,252 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + +dashboard: + title: 'Nodepool: Rackspace-Flex' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.raxflex-* + refresh: 1 + type: query + rows: + - title: Description + height: 150px + panels: + - title: Description + content: | + Rackspace-Flex Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for Rackspace-Flex. + + **This dashboard is managed by [Grafyaml](https://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://opendev.org/openstack/project-config/src/branch/master/grafana/nodepool.template). + + type: text + + - title: Nodes + showTitle: true + height: 150px + panels: + - title: Building + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) + type: singlestat + valueName: current + - title: Ready + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) + type: singlestat + valueName: current + - title: In Use + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) + type: singlestat + valueName: current + - title: Deleting + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) + type: singlestat + valueName: current + + - title: Test Nodes + height: 400px + panels: + - title: Test Node History - $region + type: graph + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false + targets: + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + + - title: Node Launches + showTitle: true + height: 250px + panels: + - title: Ready Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') + - title: Time to Ready + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: POST Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.POST.servers.*.mean, 4) + - title: GET Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.server.*.mean, 4) + - title: DELETE Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.DELETE.server.*.mean, 4) + - title: GET Servers Details + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.servers_detail.*.mean, 4) + - title: GET Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.limits.*.mean, 4) + - title: GET Flavors + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.flavors_detail.*.mean, 4) + - title: GET network floatingips + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.floatingips.*.mean, 4) + - title: GET networks + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.networks.*.mean, 4) + - title: GET network ports + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.ports.*.mean, 4) + - title: GET network subnets + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.subnets.*.mean, 4)