From 28ae63164225a914a89cc7a057f422d7b9ca1685 Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Thu, 13 Jun 2024 20:13:23 +0000 Subject: [PATCH] Add OpenMetal to Nodepool and Grafana This is essentially reverting commits bd15ddc and cb4b99b which were the final stages of winding down and cleaning up the old InMotion cloud which OpenMetal has replaced, with the cloud name updated (but region kept the same) and grafyaml data regenerated. It stops short of actually booting nodes in the new environment until we have a chance to spot check things once images get uploaded. Since this is re-adding diskimages back to nl02, I refrained from including centos-8-stream which is in the progress of being removed, so that we don't unnecessarily upload images we're not planning to boot. Change-Id: If8e9b7105b4c7a13e87ebb4f6c985e821c30a842 --- grafana/create-nodepool.sh | 1 + grafana/nodepool-openmetal.yaml | 252 ++++++++++++++++++++++++++++++++ nodepool/nl02.opendev.org.yaml | 91 +++++++++++- nodepool/nodepool.yaml | 8 + 4 files changed, 349 insertions(+), 3 deletions(-) create mode 100644 grafana/nodepool-openmetal.yaml diff --git a/grafana/create-nodepool.sh b/grafana/create-nodepool.sh index d839fed1dc..09d694f3ac 100755 --- a/grafana/create-nodepool.sh +++ b/grafana/create-nodepool.sh @@ -24,3 +24,4 @@ create Linaro 'linaro-*' nodepool-linaro.yaml create OVH 'ovh-*' nodepool-ovh.yaml create Vexxhost 'vexxhost-*' nodepool-vexxhost.yaml create OSUOSL 'osuosl-*' nodepool-osuosl.yaml +create OpenMetal 'openmetal-*' nodepool-openmetal.yaml diff --git a/grafana/nodepool-openmetal.yaml b/grafana/nodepool-openmetal.yaml new file mode 100644 index 0000000000..f4dccbfe52 --- /dev/null +++ b/grafana/nodepool-openmetal.yaml @@ -0,0 +1,252 @@ +# +# NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh +# + +dashboard: + title: 'Nodepool: OpenMetal' + templating: + - name: region + includeAll: true + multi: true + query: stats.gauges.nodepool.provider.openmetal-* + refresh: 1 + type: query + rows: + - title: Description + height: 150px + panels: + - title: Description + content: | + OpenMetal Nodepool Status + ========================== + + This dashboard monitors the status of the nodepool environment for OpenMetal. + + **This dashboard is managed by [Grafyaml](https://docs.openstack.org/infra/system-config/grafyaml.html).** + If you would like to make changes to this dashboard, please see the template in the `grafana` directory in + [project-config](https://opendev.org/openstack/project-config/src/branch/master/grafana/nodepool.template). + + type: text + + - title: Nodes + showTitle: true + height: 150px + panels: + - title: Building + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building) + type: singlestat + valueName: current + - title: Ready + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready) + type: singlestat + valueName: current + - title: In Use + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use) + type: singlestat + valueName: current + - title: Deleting + span: 3 + sparkline: + full: true + show: true + targets: + - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting) + type: singlestat + valueName: current + + - title: Test Nodes + height: 400px + panels: + - title: Test Node History - $region + type: graph + span: 12 + stack: true + repeat: region + minSpan: 4 + tooltip: + value_type: individual + yaxes: + - label: "nodes" + - show: false + targets: + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting') + - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max') + seriesOverrides: + - alias: Max + stack: False + + + - title: Node Launches + showTitle: true + height: 250px + panels: + - title: Ready Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1') + - title: Time to Ready + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "time" + format: ms + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5) + - title: Error Node Launch Attempts + type: graph + span: 4 + lines: false + bars: true + nullPointMode: null as zero + yaxes: + - label: "events / min" + - show: false + targets: + - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors") + + - title: API Operations + showTitle: true + height: 250px + panels: + - title: POST Server + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.POST.servers.*.mean, 4) + - title: GET Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.server.*.mean, 4) + - title: DELETE Server + type: graph + span: 4 + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.DELETE.server.*.mean, 4) + - title: GET Servers Details + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.servers_detail.*.mean, 4) + - title: GET Limits + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.limits.*.mean, 4) + - title: GET Flavors + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.compute.GET.flavors_detail.*.mean, 4) + - title: GET network floatingips + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.floatingips.*.mean, 4) + - title: GET networks + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.networks.*.mean, 4) + - title: GET network ports + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.ports.*.mean, 4) + - title: GET network subnets + type: graph + lines: true + nullPointMode: connected + span: 4 + yaxes: + - format: ms + label: Time + - show: false + targets: + - target: aliasByNode(stats.timers.nodepool.task.$region.network.GET.subnets.*.mean, 4) diff --git a/nodepool/nl02.opendev.org.yaml b/nodepool/nl02.opendev.org.yaml index 92f705bbe2..96e8d244be 100644 --- a/nodepool/nl02.opendev.org.yaml +++ b/nodepool/nl02.opendev.org.yaml @@ -15,7 +15,6 @@ zookeeper-servers: # nl01.o.o will manage min-ready of our labels. If nl01.o.o is ever disabled, # another launcher will need to assume this logic. labels: - - name: centos-8-stream - name: centos-9-stream - name: debian-bookworm - name: debian-bullseye @@ -29,10 +28,96 @@ labels: - name: ubuntu-xenial - name: openEuler-22-03-LTS -providers: [] +providers: + - name: openmetal-iad3 + region-name: 'iad3' + # Changing this cloud value impacts things like mirror names in jobs. + # It should be changed carefully. + cloud: openmetal + boot-timeout: 120 + launch-timeout: 300 + rate: 0.01 + diskimages: &provider_diskimages + - name: centos-9-stream + config-drive: true + - name: debian-bookworm + config-drive: true + - name: debian-bullseye + config-drive: true + - name: gentoo-17-0-systemd + config-drive: true + - name: rockylinux-8 + config-drive: true + - name: rockylinux-9 + config-drive: true + - name: ubuntu-bionic + config-drive: true + - name: ubuntu-focal + config-drive: true + - name: ubuntu-jammy + config-drive: true + - name: ubuntu-noble + config-drive: true + - name: ubuntu-xenial + config-drive: true + - name: openEuler-22-03-LTS + config-drive: true + pools: + - name: main + max-servers: 0 + labels: + - name: centos-9-stream + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: centos-9-stream + key-name: infra-root-keys-2024-04-08 + - name: debian-bookworm + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: debian-bookworm + key-name: infra-root-keys-2024-04-08 + - name: debian-bullseye + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: debian-bullseye + key-name: infra-root-keys-2024-04-08 + - name: gentoo-17-0-systemd + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: gentoo-17-0-systemd + key-name: infra-root-keys-2024-04-08 + - name: ubuntu-bionic + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: ubuntu-bionic + key-name: infra-root-keys-2024-04-08 + - name: ubuntu-focal + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: ubuntu-focal + key-name: infra-root-keys-2024-04-08 + - name: ubuntu-jammy + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: ubuntu-jammy + key-name: infra-root-keys-2024-04-08 + - name: ubuntu-noble + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: ubuntu-noble + key-name: infra-root-keys-2024-04-08 + - name: ubuntu-xenial + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: ubuntu-xenial + key-name: infra-root-keys-2024-04-08 + - name: openEuler-22-03-LTS + min-ram: 8000 + flavor-name: 'opendev-zuul' + diskimage: openEuler-22-03-LTS + key-name: infra-root-keys-2024-04-08 diskimages: - - name: centos-8-stream - name: centos-9-stream - name: debian-bookworm - name: debian-bullseye diff --git a/nodepool/nodepool.yaml b/nodepool/nodepool.yaml index 16ce4831f6..dd49f90d8e 100644 --- a/nodepool/nodepool.yaml +++ b/nodepool/nodepool.yaml @@ -120,6 +120,14 @@ providers: image-upload-timeout: 21600 diskimages: *provider_diskimages + - name: openmetal-iad3 + region-name: 'iad3' + cloud: openmetal + rate: 0.25 + # 6 hours + image-upload-timeout: 21600 + diskimages: *provider_diskimages + diskimages: - name: base abstract: True