d34d7ad763
All of these dashboards are the same, and have mostly copied all the same issues with them. This makes updating anything a massive pain. This implements a single dashboard template with a small script to create individual dashboards for each provider and its regions. I have included a range of fixes. The y-axis format has changed in later versions of grafana. The API time tracking is no longer scaled, but we just tell grafana it is in ms and it displays it correctly. The test nodes history graph is moved to the top, as it is probably the most interesting graph (note this splits itself out per region, if mulitple regions are selected). Values for "null as zero" are consistently set. Various formatting fixes for the labels are included. Change-Id: I5fbffaec3c82aa1fce0947f771de67edd15f7dfc
198 lines
6.1 KiB
YAML
198 lines
6.1 KiB
YAML
#
|
|
# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh
|
|
#
|
|
|
|
dashboard:
|
|
title: 'Nodepool: OVH'
|
|
templating:
|
|
- name: region
|
|
includeAll: true
|
|
multi: true
|
|
query: stats.gauges.nodepool.provider.ovh-*
|
|
refresh: true
|
|
type: query
|
|
rows:
|
|
- title: Description
|
|
height: 150px
|
|
panels:
|
|
- title: Description
|
|
content: |
|
|
OVH Nodepool Status
|
|
==========================
|
|
|
|
This dashboard monitors the status of the nodepool environment for OVH.
|
|
|
|
**This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).**
|
|
If you would like to make changes to this dashboard, please see the template in the `grafana` directory in
|
|
[project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template).
|
|
|
|
type: text
|
|
|
|
- title: Nodes
|
|
showTitle: true
|
|
height: 150px
|
|
panels:
|
|
- title: Building
|
|
span: 3
|
|
sparkline:
|
|
full: true
|
|
show: true
|
|
targets:
|
|
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building)
|
|
type: singlestat
|
|
valueName: current
|
|
- title: Ready
|
|
span: 3
|
|
sparkline:
|
|
full: true
|
|
show: true
|
|
targets:
|
|
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready)
|
|
type: singlestat
|
|
valueName: current
|
|
- title: In Use
|
|
span: 3
|
|
sparkline:
|
|
full: true
|
|
show: true
|
|
targets:
|
|
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use)
|
|
type: singlestat
|
|
valueName: current
|
|
- title: Deleting
|
|
span: 3
|
|
sparkline:
|
|
full: true
|
|
show: true
|
|
targets:
|
|
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting)
|
|
type: singlestat
|
|
valueName: current
|
|
|
|
- title: Test Nodes
|
|
height: 400px
|
|
panels:
|
|
- title: Test Node History - $region
|
|
type: graph
|
|
span: 12
|
|
stack: true
|
|
repeat: region
|
|
minSpan: 4
|
|
tooltip:
|
|
value_type: individual
|
|
yaxes:
|
|
- label: "nodes"
|
|
- show: false
|
|
targets:
|
|
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building')
|
|
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available')
|
|
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use')
|
|
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used')
|
|
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting')
|
|
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max')
|
|
seriesOverrides:
|
|
- alias: Max
|
|
stack: False
|
|
|
|
|
|
- title: Node Launches
|
|
showTitle: true
|
|
height: 250px
|
|
panels:
|
|
- title: Ready Node Launch Attempts
|
|
type: graph
|
|
span: 4
|
|
lines: false
|
|
bars: true
|
|
nullPointMode: null as zero
|
|
yaxes:
|
|
- label: "events / min"
|
|
- show: false
|
|
targets:
|
|
- target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1')
|
|
- title: Time to Ready
|
|
type: graph
|
|
span: 4
|
|
lines: false
|
|
bars: true
|
|
nullPointMode: null as zero
|
|
yaxes:
|
|
- label: "time"
|
|
format: ms
|
|
- show: false
|
|
targets:
|
|
- target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5)
|
|
- title: Error Node Launch Attempts
|
|
type: graph
|
|
span: 4
|
|
lines: false
|
|
bars: true
|
|
nullPointMode: null as zero
|
|
yaxes:
|
|
- label: "events / min"
|
|
- show: false
|
|
targets:
|
|
- target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors")
|
|
|
|
- title: API Operations
|
|
showTitle: true
|
|
height: 250px
|
|
panels:
|
|
- title: Create Server
|
|
type: graph
|
|
lines: true
|
|
nullPointMode: connected
|
|
span: 4
|
|
yaxes:
|
|
- format: ms
|
|
label: Time
|
|
- show: false
|
|
targets:
|
|
- target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
|
|
- title: Get Server
|
|
type: graph
|
|
span: 4
|
|
lines: true
|
|
nullPointMode: connected
|
|
span: 4
|
|
yaxes:
|
|
- format: ms
|
|
label: Time
|
|
- show: false
|
|
targets:
|
|
- target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
|
|
- title: Delete Server
|
|
type: graph
|
|
span: 4
|
|
lines: true
|
|
nullPointMode: connected
|
|
span: 4
|
|
yaxes:
|
|
- format: ms
|
|
label: Time
|
|
- show: false
|
|
targets:
|
|
- target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
|
|
- title: List Servers
|
|
type: graph
|
|
lines: true
|
|
nullPointMode: connected
|
|
span: 4
|
|
yaxes:
|
|
- format: ms
|
|
label: Time
|
|
- show: false
|
|
targets:
|
|
- target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
|
|
- title: Get Limits
|
|
type: graph
|
|
lines: true
|
|
nullPointMode: connected
|
|
span: 4
|
|
yaxes:
|
|
- format: ms
|
|
label: Time
|
|
- show: false
|
|
targets:
|
|
- target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
|