Add OpenMetal to Nodepool and Grafana

This is essentially reverting commits bd15ddc and cb4b99b which were
the final stages of winding down and cleaning up the old InMotion
cloud which OpenMetal has replaced, with the cloud name updated (but
region kept the same) and grafyaml data regenerated. It stops short
of actually booting nodes in the new environment until we have a
chance to spot check things once images get uploaded.

Since this is re-adding diskimages back to nl02, I refrained from
including centos-8-stream which is in the progress of being removed,
so that we don't unnecessarily upload images we're not planning to
boot.

Change-Id: If8e9b7105b4c7a13e87ebb4f6c985e821c30a842
This commit is contained in:
Jeremy Stanley 2024-06-13 20:13:23 +00:00
parent cea490f471
commit 28ae631642
4 changed files with 349 additions and 3 deletions

View File

@ -24,3 +24,4 @@ create Linaro 'linaro-*' nodepool-linaro.yaml
create OVH 'ovh-*' nodepool-ovh.yaml
create Vexxhost 'vexxhost-*' nodepool-vexxhost.yaml
create OSUOSL 'osuosl-*' nodepool-osuosl.yaml
create OpenMetal 'openmetal-*' nodepool-openmetal.yaml

View File

@ -0,0 +1,252 @@
#
# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh
#
dashboard:
title: 'Nodepool: OpenMetal'
templating:
- name: region
includeAll: true
multi: true
query: stats.gauges.nodepool.provider.openmetal-*
refresh: 1
type: query
rows:
- title: Description
height: 150px
panels:
- title: Description
content: |
OpenMetal Nodepool Status
==========================
This dashboard monitors the status of the nodepool environment for OpenMetal.
**This dashboard is managed by [Grafyaml](https://docs.openstack.org/infra/system-config/grafyaml.html).**
If you would like to make changes to this dashboard, please see the template in the `grafana` directory in
[project-config](https://opendev.org/openstack/project-config/src/branch/master/grafana/nodepool.template).
type: text
- title: Nodes
showTitle: true
height: 150px
panels:
- title: Building
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building)
type: singlestat
valueName: current
- title: Ready
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready)
type: singlestat
valueName: current
- title: In Use
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use)
type: singlestat
valueName: current
- title: Deleting
span: 3
sparkline:
full: true
show: true
targets:
- target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting)
type: singlestat
valueName: current
- title: Test Nodes
height: 400px
panels:
- title: Test Node History - $region
type: graph
span: 12
stack: true
repeat: region
minSpan: 4
tooltip:
value_type: individual
yaxes:
- label: "nodes"
- show: false
targets:
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting')
- target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max')
seriesOverrides:
- alias: Max
stack: False
- title: Node Launches
showTitle: true
height: 250px
panels:
- title: Ready Node Launch Attempts
type: graph
span: 4
lines: false
bars: true
nullPointMode: null as zero
yaxes:
- label: "events / min"
- show: false
targets:
- target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1')
- title: Time to Ready
type: graph
span: 4
lines: false
bars: true
nullPointMode: null as zero
yaxes:
- label: "time"
format: ms
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5)
- title: Error Node Launch Attempts
type: graph
span: 4
lines: false
bars: true
nullPointMode: null as zero
yaxes:
- label: "events / min"
- show: false
targets:
- target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors")
- title: API Operations
showTitle: true
height: 250px
panels:
- title: POST Server
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.POST.servers.*.mean, 4)
- title: GET Server
type: graph
span: 4
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.server.*.mean, 4)
- title: DELETE Server
type: graph
span: 4
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.DELETE.server.*.mean, 4)
- title: GET Servers Details
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.servers_detail.*.mean, 4)
- title: GET Limits
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.limits.*.mean, 4)
- title: GET Flavors
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.flavors_detail.*.mean, 4)
- title: GET network floatingips
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.floatingips.*.mean, 4)
- title: GET networks
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.networks.*.mean, 4)
- title: GET network ports
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.ports.*.mean, 4)
- title: GET network subnets
type: graph
lines: true
nullPointMode: connected
span: 4
yaxes:
- format: ms
label: Time
- show: false
targets:
- target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.subnets.*.mean, 4)

View File

@ -15,7 +15,6 @@ zookeeper-servers:
# nl01.o.o will manage min-ready of our labels. If nl01.o.o is ever disabled,
# another launcher will need to assume this logic.
labels:
- name: centos-8-stream
- name: centos-9-stream
- name: debian-bookworm
- name: debian-bullseye
@ -29,10 +28,96 @@ labels:
- name: ubuntu-xenial
- name: openEuler-22-03-LTS
providers: []
providers:
- name: openmetal-iad3
region-name: 'iad3'
# Changing this cloud value impacts things like mirror names in jobs.
# It should be changed carefully.
cloud: openmetal
boot-timeout: 120
launch-timeout: 300
rate: 0.01
diskimages: &provider_diskimages
- name: centos-9-stream
config-drive: true
- name: debian-bookworm
config-drive: true
- name: debian-bullseye
config-drive: true
- name: gentoo-17-0-systemd
config-drive: true
- name: rockylinux-8
config-drive: true
- name: rockylinux-9
config-drive: true
- name: ubuntu-bionic
config-drive: true
- name: ubuntu-focal
config-drive: true
- name: ubuntu-jammy
config-drive: true
- name: ubuntu-noble
config-drive: true
- name: ubuntu-xenial
config-drive: true
- name: openEuler-22-03-LTS
config-drive: true
pools:
- name: main
max-servers: 0
labels:
- name: centos-9-stream
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: centos-9-stream
key-name: infra-root-keys-2024-04-08
- name: debian-bookworm
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: debian-bookworm
key-name: infra-root-keys-2024-04-08
- name: debian-bullseye
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: debian-bullseye
key-name: infra-root-keys-2024-04-08
- name: gentoo-17-0-systemd
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: gentoo-17-0-systemd
key-name: infra-root-keys-2024-04-08
- name: ubuntu-bionic
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: ubuntu-bionic
key-name: infra-root-keys-2024-04-08
- name: ubuntu-focal
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: ubuntu-focal
key-name: infra-root-keys-2024-04-08
- name: ubuntu-jammy
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: ubuntu-jammy
key-name: infra-root-keys-2024-04-08
- name: ubuntu-noble
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: ubuntu-noble
key-name: infra-root-keys-2024-04-08
- name: ubuntu-xenial
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: ubuntu-xenial
key-name: infra-root-keys-2024-04-08
- name: openEuler-22-03-LTS
min-ram: 8000
flavor-name: 'opendev-zuul'
diskimage: openEuler-22-03-LTS
key-name: infra-root-keys-2024-04-08
diskimages:
- name: centos-8-stream
- name: centos-9-stream
- name: debian-bookworm
- name: debian-bullseye

View File

@ -120,6 +120,14 @@ providers:
image-upload-timeout: 21600
diskimages: *provider_diskimages
- name: openmetal-iad3
region-name: 'iad3'
cloud: openmetal
rate: 0.25
# 6 hours
image-upload-timeout: 21600
diskimages: *provider_diskimages
diskimages:
- name: base
abstract: True