project-config/grafana/nodepool-rax.yaml
David Moreau-Simard a281b06c27
Update Nodepool graphite metric names
Following the update to Zuul v3 some things changed:
- nodes.delete became nodes.deleting
- nodes.used became nodes.in-use but nodes.used is still relevant
  as it's the status between 'in-use' and 'deleting'
- Add a panel for displaying failed nodes

Change-Id: I240d082115bd9078e45984d8fcff212a4e40e842
Depends-On: I6a89752d74ed7424267c3af3937ad01fb4bb8f86
2017-10-02 10:31:52 -04:00

200 lines
8.5 KiB
YAML

dashboard:
title: 'Nodepool: Rackspace'
rows:
- title: Description
height: 100px
panels:
- title: Description
content: |
**This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).**
If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-rax.yaml).
type: text
- title: Nodes
showTitle: true
height: 150px
panels:
- title: Building
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.building)
type: singlestat
valueName: current
- title: Ready
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.ready)
type: singlestat
valueName: current
- title: In Use
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.in-use)
type: singlestat
valueName: current
- title: Deleting
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.rax-*.nodes.deleting)
type: singlestat
valueName: current
- title: API Operations
showTitle: true
height: 250px
panels:
- title: Create Server
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputePostServers.mean, '0.001'), 'DFW')
- target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputePostServers.mean, '0.001'), 'IAD')
- target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputePostServers.mean, '0.001'), 'ORD')
- title: Get Server
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetServersDetail.mean, '0.001'), 'DFW')
- target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetServersDetail.mean, '0.001'), 'IAD')
- target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetServersDetail.mean, '0.001'), 'ORD')
- title: Delete Server
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeDeleteServers.mean, '0.001'), 'DFW')
- target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeDeleteServers.mean, '0.001'), 'IAD')
- target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeDeleteServers.mean, '0.001'), 'ORD')
- title: List Servers
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: alias(scale(stats.timers.nodepool.task.rax-dfw.ComputeGetServers.mean, '0.001'), 'DFW')
- target: alias(scale(stats.timers.nodepool.task.rax-iad.ComputeGetServers.mean, '0.001'), 'IAD')
- target: alias(scale(stats.timers.nodepool.task.rax-ord.ComputeGetServers.mean, '0.001'), 'ORD')
- title: Node Launches
showTitle: true
height: 250px
panels:
- title: Ready Node Launch Attempts
type: graph
span: 4
nullPointMode: null as zero
leftYAxisLabel: "events / min"
targets:
- target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-dfw.ready, '1m'), 'DFW')
- target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-iad.ready, '1m'), 'IAD')
- target: alias(smartSummarize(stats_counts.nodepool.launch.provider.rax-ord.ready, '1m'), 'ORD')
- title: Error Node Launch Attempts
type: graph
span: 4
nullPointMode: null as zero
leftYAxisLabel: "events / min"
targets:
- target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-dfw.error.*), '1m'), 'DFW')
- target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-iad.error.*), '1m'), 'IAD')
- target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.rax-ord.error.*), '1m'), 'ORD')
- title: Time to Ready
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: alias(scale(stats.timers.nodepool.launch.provider.rax-dfw.ready.mean, '0.001'), 'DFW')
- target: alias(scale(stats.timers.nodepool.launch.provider.rax-iad.ready.mean, '0.001'), 'IAD')
- target: alias(scale(stats.timers.nodepool.launch.provider.rax-ord.ready.mean, '0.001'), 'ORD')
- title: Test Nodes (DFW)
type: graph
span: 4
stack: true
tooltip:
value_type: individual
leftYAxisLabel: "nodes"
targets:
- target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.building, 'Building')
- target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.ready, 'Available')
- target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.in-use, 'In Use')
- target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.used, 'Used')
- target: alias(stats.gauges.nodepool.provider.rax-dfw.nodes.deleting, 'Deleting')
- target: alias(stats.gauges.nodepool.provider.rax-dfw.max_servers, 'Max')
seriesOverrides:
- alias: Max
stack: False
- title: Test Nodes (IAD)
type: graph
span: 4
stack: true
tooltip:
value_type: individual
leftYAxisLabel: "nodes"
targets:
- target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.building, 'Building')
- target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.ready, 'Available')
- target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.in-use, 'In Use')
- target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.used, 'Used')
- target: alias(stats.gauges.nodepool.provider.rax-iad.nodes.deleting, 'Deleting')
- target: alias(stats.gauges.nodepool.provider.rax-iad.max_servers, 'Max')
seriesOverrides:
- alias: Max
stack: False
- title: Test Nodes (ORD)
type: graph
span: 4
stack: true
tooltip:
value_type: individual
leftYAxisLabel: "nodes"
targets:
- target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.building, 'Building')
- target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.ready, 'Available')
- target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.in-use, 'In Use')
- target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.used, 'Used')
- target: alias(stats.gauges.nodepool.provider.rax-ord.nodes.deleting, 'Deleting')
- target: alias(stats.gauges.nodepool.provider.rax-ord.max_servers, 'Max')
seriesOverrides:
- alias: Max
stack: False
- title: Job Runtimes
showTitle: true
height: 250px
panels:
- title: gate-tempest-dsvm-neutron-full
type: graph
span: 6
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: alias(scale(stats.timers.nodepool.job.gate-tempest-dsvm-neutron-full-ubuntu-xenial.master.ubuntu-xenial.rax-dfw.runtime.mean, '0.001'), 'DFW')
- target: alias(scale(stats.timers.nodepool.job.gate-tempest-dsvm-neutron-full-ubuntu-xenial.master.ubuntu-xenial.rax-iad.runtime.mean, '0.001'), 'IAD')
- target: alias(scale(stats.timers.nodepool.job.gate-tempest-dsvm-neutron-full-ubuntu-xenial.master.ubuntu-xenial.rax-ord.runtime.mean, '0.001'), 'ORD')