project-config/grafana/nodepool-infracloud.yaml
David Moreau-Simard a281b06c27
Update Nodepool graphite metric names
Following the update to Zuul v3 some things changed:
- nodes.delete became nodes.deleting
- nodes.used became nodes.in-use but nodes.used is still relevant
  as it's the status between 'in-use' and 'deleting'
- Add a panel for displaying failed nodes

Change-Id: I240d082115bd9078e45984d8fcff212a4e40e842
Depends-On: I6a89752d74ed7424267c3af3937ad01fb4bb8f86
2017-10-02 10:31:52 -04:00

175 lines
6.6 KiB
YAML

dashboard:
title: 'Nodepool: Infra Cloud'
templating:
- name: provider
includeAll: true
multi: true
query: stats.gauges.nodepool.provider.infracloud-*
refresh: true
type: query
rows:
- title: Description
height: 100px
panels:
- title: Description
content: |
**This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).**
If you would like to make changes to this dashboard, please see the grafana directory in [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool-infracloud.yaml).
type: text
- title: Nodes
showTitle: true
height: 150px
panels:
- title: Building
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.building)
type: singlestat
valueName: current
- title: Ready
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.ready)
type: singlestat
valueName: current
- title: In Use
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.in-use)
type: singlestat
valueName: current
- title: Deleting
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$provider.nodes.deleting)
type: singlestat
valueName: current
- title: API Operations
showTitle: true
height: 250px
panels:
- title: Create Server
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputePostServers.mean, '0.001'), 4), 'infracloud-', '')
- title: Get Server
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServersDetail.mean, '0.001'), 4), 'infracloud-', '')
- title: Delete Server
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeDeleteServers.mean, '0.001'), 4), 'infracloud-', '')
- title: List Servers
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: aliasSub(aliasByNode(scale(stats.timers.nodepool.task.$provider.ComputeGetServers.mean, '0.001'), 4), 'infracloud-', '')
- title: Node Launches
showTitle: true
height: 250px
panels:
- title: Ready Node Launch Attempts
type: graph
span: 4
nullPointMode: null as zero
leftYAxisLabel: "events / min"
targets:
- target: aliasSub(aliasByNode(summarize(stats_counts.nodepool.launch.provider.$provider.ready, '1m'), 4), 'infracloud-', '')
- title: Error Node Launch Attempts
type: graph
span: 4
nullPointMode: null as zero
leftYAxisLabel: "events / min"
targets:
- target: aliasSub(aliasByNode(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$provider.error.*), '1m'), 4), 'infracloud-', '')
- title: Time to Ready
type: graph
span: 4
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: aliasSub(aliasByNode(scale(stats.timers.nodepool.launch.provider.$provider.ready.mean, '0.001'), 5), 'infracloud-', '')
- title: Test Nodes (Chocolate)
type: graph
span: 4
stack: true
tooltip:
value_type: individual
leftYAxisLabel: "nodes"
targets:
- target: alias(stats.gauges.nodepool.provider.infracloud-chocolate.nodes.building, 'Building')
- target: alias(stats.gauges.nodepool.provider.infracloud-chocolate.nodes.ready, 'Available')
- target: alias(stats.gauges.nodepool.provider.infracloud-chocolate.nodes.in-use, 'In Use')
- target: alias(stats.gauges.nodepool.provider.infracloud-chocolate.nodes.used, 'Used')
- target: alias(stats.gauges.nodepool.provider.infracloud-chocolate.nodes.deleting, 'Deleting')
- target: alias(stats.gauges.nodepool.provider.infracloud-chocolate.max_servers, 'Max')
seriesOverrides:
- alias: Max
stack: False
- title: Test Nodes (Vanilla)
type: graph
span: 4
stack: true
tooltip:
value_type: individual
leftYAxisLabel: "nodes"
targets:
- target: alias(stats.gauges.nodepool.provider.infracloud-vanilla.nodes.building, 'Building')
- target: alias(stats.gauges.nodepool.provider.infracloud-vanilla.nodes.ready, 'Available')
- target: alias(stats.gauges.nodepool.provider.infracloud-vanilla.nodes.in-use, 'In Use')
- target: alias(stats.gauges.nodepool.provider.infracloud-vanilla.nodes.used, 'Used')
- target: alias(stats.gauges.nodepool.provider.infracloud-vanilla.nodes.deleting, 'Deleting')
- target: alias(stats.gauges.nodepool.provider.infracloud-vanilla.max_servers, 'Max')
seriesOverrides:
- alias: Max
stack: False
- title: Job Runtimes
showTitle: true
height: 250px
panels:
- title: gate-tempest-dsvm-neutron-full
type: graph
span: 6
leftYAxisLabel: "time"
y_formats:
- s
- none
targets:
- target: alias(scale(stats.timers.nodepool.job.gate-tempest-dsvm-neutron-full-ubuntu-xenial.master.ubuntu-xenial.infracloud-chocolate.runtime.mean, '0.001'), 'chocolate')
- target: alias(scale(stats.timers.nodepool.job.gate-tempest-dsvm-neutron-full-ubuntu-xenial.master.ubuntu-xenial.infracloud-vanilla.runtime.mean, '0.001'), 'vanilla')