project-config/grafana/nodepool-osuosl.yaml
Clark Boylan 4ef6ea4d10 Fix nodepool API ops Grafana graphs
openstacksdk statsd records for API operations are not response code
specific. This change adds a glob to the statsd record path to include
all return code response metrics in our API graphs. I believe this
change may have come with the openstacksdk 0.103.0 update. We also
update paths for servers POST, server details, and flavor details as
they have changed.

Note the network info is empty which is why we don't get graphs for
them, but the paths appear correct. I think this may be beacuse we don't
need to query network info in any of our clouds currently.

While we are at it we stop updating the airship and inap cloud graphs
since those should be cleaned up and this keeps the review overhead
smaller.

Change-Id: I5a6b80118afaf3b7782a1d1c131787f208583799
2022-12-08 15:42:45 -08:00

253 lines
7.8 KiB
YAML

#
# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh
#
dashboard:
title: 'Nodepool: OSUOSL'
templating:
- name: region
includeAll: true
multi: true
query: stats.gauges.nodepool.provider.osuosl-*
refresh: 1
type: query
rows:
- title: Description
height: 150px
panels:
- title: Description
content: |
OSUOSL Nodepool Status
==========================
This dashboard monitors the status of the nodepool environment for OSUOSL.
**This dashboard is managed by [Grafyaml](https://docs.openstack.org/infra/system-config/grafyaml.html).**
If you would like to make changes to this dashboard, please see the template in the `grafana` directory in
[project-config](https://opendev.org/openstack/project-config/src/branch/master/grafana/nodepool.template).
type: text
- title: Nodes
showTitle: true
height: 150px
panels:
- title: Building
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building)
type: singlestat
valueName: current
- title: Ready
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready)
type: singlestat
valueName: current
- title: In Use
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use)
type: singlestat
valueName: current
- title: Deleting
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting)
type: singlestat
valueName: current
- title: Test Nodes
height: 400px
panels:
- title: Test Node History - $region
type: graph
span: 12
stack: true
repeat: region
minSpan: 4
tooltip:
value_type: individual
yaxes:
- label: "nodes"
- show: false
targets:
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max')
seriesOverrides:
- alias: Max
stack: False
- title: Node Launches
showTitle: true
height: 250px
panels:
- title: Ready Node Launch Attempts
type: graph
span: 4
lines: false
bars: true
nullPointMode: null as zero
yaxes:
- label: "events / min"
- show: false
targets:
- target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1')
- title: Time to Ready
type: graph
span: 4
lines: false
bars: true
nullPointMode: null as zero
yaxes:
- label: "time"
format: ms
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5)
- title: Error Node Launch Attempts
type: graph
span: 4
lines: false
bars: true
nullPointMode: null as zero
yaxes:
- label: "events / min"
- show: false
targets:
- target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors")
- title: API Operations
showTitle: true
height: 250px
panels:
- title: POST Server
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.POST.servers.*.mean, 4)
- title: GET Server
type: graph
span: 4
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.server.*.mean, 4)
- title: DELETE Server
type: graph
span: 4
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.DELETE.server.*.mean, 4)
- title: GET Servers Details
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.servers_detail.*.mean, 4)
- title: GET Limits
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.limits.*.mean, 4)
- title: GET Flavors
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.flavors_detail.*.mean, 4)
- title: GET network floatingips
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.floatingips.*.mean, 4)
- title: GET networks
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.networks.*.mean, 4)
- title: GET network ports
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.ports.*.mean, 4)
- title: GET network subnets
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.subnets.*.mean, 4)