From b5a67794712397ed8c615888ce879a4992296ab9 Mon Sep 17 00:00:00 2001 From: Robert Church Date: Wed, 14 Aug 2024 12:27:34 -0500 Subject: [PATCH] Add floating monitor helm chart This change adds a new helm chart to be deployed on AIO-DX systems when: - the optional ceph DRDB filesystem is deployed (via controllerfs API) - the floating monitor is enabled (via application helm plugin) The chart starts a deployment that is based on the standard ceph monitor deployment generated by the Rook Operator. Changes to the deployment include: - overrides enabled for cluster FSID and static public address - setting monitor name to 'mon-float' - add an init container to prevent start if the filesystem mount is not a DRBD device - add an init container to wipe any existing monitor DB if this deployment was NOT previously running Orchestration of this monitor is managed by helm install and delete hooks. These hooks perform the following activities - pre-install: - Wait for the cephcluster to be 'Ready' - Disable the Rook operator from managing monitors - Patch the monitor endpoints to include the floating monitor - Patch the config secrets to include the floating monitor - Wait for the fixed monitors to absorb the changes - Extract the cluster FSID and populate a configmap for input to the floating monitor deployment - Save the current health state of the cluster. If the state is HEALTH_OK, the post-install will look to achieve that state again prior to completion - post-install - Wait for the floating monitor deployment to be 'available' - If the cluster was previously HEALTH_OK; wait for a HEALTH_WARN -> HEALTH_OK transition as the monitor joins the cluster - Save the state of the floating monitor as enabled so that subsequent restarts of the floating monitor does not remove the existing monitor DB on init - pre-delete - Wait for the cluster to be 'Ready' - Save the current health state of the cluster. If the state is HEALTH_OK, the post-delete will look to achieve that state again prior to completion - post-delete - Wait for the floating monitor pod to be deleted - Patch the monitor endpoints to remove the floating monitor - Patch the config secrets to remove the floating monitor - Restart the monitor deployments to pick up the changes - Re-enable the Rook Operator to manage the monitors - Patch the cephcluster to perform monitor failovers in 5s to allow the Operator to "cleanup" evidence of the floating monitor quickly - Restore the default failover value for by patching the cephcluster and setting thr value to 600s - If the cluster was previously HEALTH_OK; wait for a HEALTH_WARN -> HEALTH_OK transition as the monitor joins the cluster Test Plan: PASS - Pkg build + ISO generation PASS - Successful AIO-DX Installation PASS - Initial Rook deployment without floating monitor. PASS - manually add/remove a floating monitor via helm commands (upgrade --install & delete) PASS - IPv4/IPv6 lab testing Change-Id: I16f6a6bc9b14801d379a4f3f5210d29ee72195ee Depends-On: https://review.opendev.org/c/starlingx/stx-puppet/+/926375 Story: 2011066 Task: 50836 Signed-off-by: Robert Church --- debian_pkg_dirs | 3 +- .../debian/deb_folder/changelog | 5 + .../debian/deb_folder/control | 16 + .../debian/deb_folder/copyright | 41 ++ .../rook-ceph-floating-monitor-helm.install | 1 + .../debian/deb_folder/rules | 28 + .../debian/deb_folder/source/format | 1 + .../debian/meta_data.yaml | 10 + .../rook-ceph-floating-monitor-helm/Makefile | 41 ++ .../rook-ceph-floating-monitor/.helmignore | 22 + .../rook-ceph-floating-monitor/Chart.yaml | 10 + .../templates/deployment.yaml | 511 ++++++++++++++++++ .../templates/post-delete-job.yaml | 222 ++++++++ .../templates/post-install-job.yaml | 148 +++++ .../templates/pre-delete-job.yaml | 159 ++++++ .../templates/pre-install-job.yaml | 175 ++++++ .../templates/service.yaml | 53 ++ .../rook-ceph-floating-monitor/values.yaml | 32 ++ 18 files changed, 1477 insertions(+), 1 deletion(-) create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/changelog create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/control create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/copyright create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rook-ceph-floating-monitor-helm.install create mode 100755 helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rules create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/source/format create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/debian/meta_data.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/Makefile create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/.helmignore create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/Chart.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/deployment.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-delete-job.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-install-job.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-delete-job.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-install-job.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/service.yaml create mode 100644 helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/values.yaml diff --git a/debian_pkg_dirs b/debian_pkg_dirs index 8b649fd..b462dbb 100644 --- a/debian_pkg_dirs +++ b/debian_pkg_dirs @@ -1,4 +1,5 @@ -helm-charts/upstream/rook-ceph-helm +helm-charts/custom/rook-ceph-floating-monitor-helm helm-charts/custom/rook-ceph-provisioner-helm +helm-charts/upstream/rook-ceph-helm python3-k8sapp-rook-ceph stx-rook-ceph-helm diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/changelog b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/changelog new file mode 100644 index 0000000..68a3150 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/changelog @@ -0,0 +1,5 @@ +rook-ceph-floating-monitor-helm (1.0-0) unstable; urgency=medium + + * Initial release. + + -- Robert Church Wed, 31 Jul 2024 00:00:01 +0000 diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/control b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/control new file mode 100644 index 0000000..3311414 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/control @@ -0,0 +1,16 @@ +Source: rook-ceph-floating-monitor-helm +Section: libs +Priority: optional +Maintainer: StarlingX Developers +Build-Depends: debhelper-compat (= 13), + helm, +Standards-Version: 4.5.1 +Homepage: https://www.starlingx.io + +Package: rook-ceph-floating-monitor-helm +Section: libs +Architecture: any +Depends: ${misc:Depends} +Description: StarlingX Platform Rook Ceph floating monitor helm chart + This package contains cluster resources for a DRBD based floating monitor + deployment. diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/copyright b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/copyright new file mode 100644 index 0000000..41c3068 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/copyright @@ -0,0 +1,41 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: rook-ceph-floating-monitor-helm +Source: https://opendev.org/starlingx/platform-armada-app/ + +Files: * +Copyright: (c) 2024 Wind River Systems, Inc +License: Apache-2 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + https://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian-based systems the full text of the Apache version 2.0 license + can be found in `/usr/share/common-licenses/Apache-2.0'. + +# If you want to use GPL v2 or later for the /debian/* files use +# the following clauses, or change it to suit. Delete these two lines +Files: debian/* +Copyright: 2024 Wind River Systems, Inc +License: Apache-2 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + https://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian-based systems the full text of the Apache version 2.0 license + can be found in `/usr/share/common-licenses/Apache-2.0'. diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rook-ceph-floating-monitor-helm.install b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rook-ceph-floating-monitor-helm.install new file mode 100644 index 0000000..8a0c6de --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rook-ceph-floating-monitor-helm.install @@ -0,0 +1 @@ +usr/lib/helm/* diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rules b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rules new file mode 100755 index 0000000..716d8c0 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/rules @@ -0,0 +1,28 @@ +#!/usr/bin/make -f +# export DH_VERBOSE = 1 + +export ROOT = debian/tmp +export APP_FOLDER = $(ROOT)/usr/lib/helm + + +export DEB_VERSION = $(shell dpkg-parsechangelog | egrep '^Version:' | cut -f 2 -d ' ') +export PATCH_VERSION = $(shell echo $(DEB_VERSION) | cut -f 4 -d '.') +export CHART_BASE_VERSION = $(shell echo $(DEB_VERSION) | sed 's/-/./' | cut -d '.' -f 1-2) +export CHART_VERSION = $(CHART_BASE_VERSION).${PATCH_VERSION} + +%: + dh $@ + +override_dh_auto_build: + # Stage the chart for building + mkdir -p build + mv Makefile rook-ceph-floating-monitor build + + # Build the chart + cd build && make CHART_VERSION=$(CHART_VERSION) rook-ceph-floating-monitor + +override_dh_auto_install: + install -d -m 755 $(APP_FOLDER) + install -p -D -m 755 build/rook-ceph-floating-monitor*.tgz $(APP_FOLDER) + +override_dh_auto_test: diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/source/format b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/deb_folder/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/meta_data.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/meta_data.yaml new file mode 100644 index 0000000..6b52759 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/debian/meta_data.yaml @@ -0,0 +1,10 @@ +--- +debname: rook-ceph-floating-monitor-helm +debver: 1.0-0 +src_path: rook-ceph-floating-monitor-helm +revision: + dist: $STX_DIST + PKG_GITREVCOUNT: true + GITREVCOUNT: + SRC_DIR: ${MY_REPO}/stx/app-rook-ceph/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor + BASE_SRCREV: c6c693d51cdc6daa4eafe34ccab5ce35496bf516 diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/Makefile b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/Makefile new file mode 100644 index 0000000..a9b44e4 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/Makefile @@ -0,0 +1,41 @@ +# +# Copyright 2017 The Openstack-Helm Authors. +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# It's necessary to set this because some environments don't link sh -> bash. +SHELL := /bin/bash +TASK := build + +EXCLUDES := helm-toolkit doc tests tools logs tmp +CHARTS := helm-toolkit $(filter-out $(EXCLUDES), $(patsubst %/.,%,$(wildcard */.))) + +.PHONY: $(EXCLUDES) $(CHARTS) + +all: $(CHARTS) + +$(CHARTS): + @if [ -d $@ ]; then \ + echo; \ + echo "===== Processing [$@] chart ====="; \ + make $(TASK)-$@; \ + fi + +init-%: + if [ -f $*/Makefile ]; then make -C $*; fi + +lint-%: init-% + if [ -d $* ]; then helm lint $*; fi + +build-%: lint-% + if [ -d $* ]; then helm package --version $(CHART_VERSION) $*; fi + +clean: + @echo "Clean all build artifacts" + rm -f */templates/_partials.tpl */templates/_globals.tpl + rm -rf */charts */tmpcharts + +%: + @: diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/.helmignore b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/.helmignore new file mode 100644 index 0000000..50af031 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/Chart.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/Chart.yaml new file mode 100644 index 0000000..ae727d7 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/Chart.yaml @@ -0,0 +1,10 @@ +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +appVersion: "1.1" +description: A Helm chart for Kubernetes +name: rook-ceph-floating-monitor +version: 1.0.0 diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/deployment.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/deployment.yaml new file mode 100644 index 0000000..c0cc590 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/deployment.yaml @@ -0,0 +1,511 @@ +{{/* +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +*/}} + +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + ceph.rook.io/do-not-reconcile: "" + app.kubernetes.io/managed-by: rook-ceph-operator + app: rook-ceph-mon + app.kubernetes.io/component: cephclusters.ceph.rook.io + app.kubernetes.io/instance: float + app.kubernetes.io/name: ceph-mon + app.kubernetes.io/part-of: rook-ceph + app.starlingx.io/component: platform + ceph-version: 18.2.2-0 + ceph_daemon_id: float + ceph_daemon_type: mon + mon: float + mon_cluster: rook-ceph + rook-version: v1.13.7 + rook.io/operator-namespace: rook-ceph + rook_cluster: rook-ceph + name: rook-ceph-mon-float + namespace: rook-ceph +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: rook-ceph-mon + ceph_daemon_id: float + mon: float + mon_cluster: rook-ceph + rook_cluster: rook-ceph + strategy: + type: Recreate + template: + metadata: + labels: + ceph.rook.io/do-not-reconcile: "" + app.kubernetes.io/managed-by: rook-ceph-operator + app: rook-ceph-mon + app.kubernetes.io/component: cephclusters.ceph.rook.io + app.kubernetes.io/instance: float + app.kubernetes.io/name: ceph-mon + app.kubernetes.io/part-of: rook-ceph + app.starlingx.io/component: platform + ceph_daemon_id: float + ceph_daemon_type: mon + mon: float + mon_cluster: rook-ceph + rook.io/operator-namespace: rook-ceph + rook_cluster: rook-ceph + name: rook-ceph-mon-float + namespace: rook-ceph + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: ceph-mon-float-placement + operator: In + values: + - enabled + containers: + - args: + - --fsid=$(ROOK_CEPH_FSID) + - --keyring=/etc/ceph/keyring-store/keyring + - --default-log-to-stderr=true + - --default-err-to-stderr=true + - --default-mon-cluster-log-to-stderr=true + - '--default-log-stderr-prefix=debug ' + - --default-log-to-file=false + - --default-mon-cluster-log-to-file=false + - --mon-host=$(ROOK_CEPH_MON_HOST) + - --mon-initial-members=$(ROOK_CEPH_MON_INITIAL_MEMBERS) + - --id=float + - --setuser=ceph + - --setgroup=ceph + - --ms-bind-ipv4=true + - --ms-bind-ipv6=false + - --foreground + - --public-addr={{ $.Values.config.ip_address }} + - --setuser-match-path=/var/lib/ceph/mon/ceph-float/store.db + - --public-bind-addr=$(ROOK_POD_IP) + command: + - ceph-mon + env: + - name: CONTAINER_IMAGE + value: {{ .Values.images.tags.ceph | quote }} + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_MEMORY_LIMIT + valueFrom: + resourceFieldRef: + divisor: "0" + resource: limits.memory + - name: POD_MEMORY_REQUEST + valueFrom: + resourceFieldRef: + divisor: "0" + resource: requests.memory + - name: POD_CPU_LIMIT + valueFrom: + resourceFieldRef: + divisor: "1" + resource: limits.cpu + - name: POD_CPU_REQUEST + valueFrom: + resourceFieldRef: + divisor: "0" + resource: requests.cpu + - name: CEPH_USE_RANDOM_NONCE + value: "true" + - name: ROOK_MSGR2 + value: msgr2_false_encryption_false_compression_false + - name: ROOK_CEPH_MON_HOST + valueFrom: + secretKeyRef: + key: mon_host + name: rook-ceph-config + - name: ROOK_CEPH_MON_INITIAL_MEMBERS + valueFrom: + secretKeyRef: + key: mon_initial_members + name: rook-ceph-config + - name: ROOK_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: ROOK_CEPH_FSID + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: fsid + image: {{ .Values.images.tags.ceph | quote }} + imagePullPolicy: IfNotPresent + livenessProbe: + exec: + command: + - env + - -i + - sh + - -c + - "\noutp=\"$(ceph --admin-daemon /run/ceph/ceph-mon.float.asok mon_status + 2>&1)\"\nrc=$?\nif [ $rc -ne 0 ]; then\n\techo \"ceph daemon health + check failed with the following output:\"\n\techo \"$outp\" | sed -e + 's/^/> /g'\n\texit $rc\nfi\n" + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + name: mon + ports: + - containerPort: 3300 + name: tcp-msgr2 + protocol: TCP + - containerPort: 6789 + name: tcp-msgr1 + protocol: TCP + resources: + limits: + memory: 2Gi + requests: + cpu: "0" + memory: "0" + securityContext: + privileged: false + startupProbe: + exec: + command: + - env + - -i + - sh + - -c + - "\noutp=\"$(ceph --admin-daemon /run/ceph/ceph-mon.float.asok mon_status + 2>&1)\"\nrc=$?\nif [ $rc -ne 0 ]; then\n\techo \"ceph daemon health + check failed with the following output:\"\n\techo \"$outp\" | sed -e + 's/^/> /g'\n\texit $rc\nfi\n" + failureThreshold: 6 + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/ceph + name: rook-config-override + readOnly: true + - mountPath: /run/ceph + name: ceph-daemons-sock-dir + - mountPath: /etc/ceph/keyring-store/ + name: rook-ceph-mons-keyring + readOnly: true + - mountPath: /var/log/ceph + name: rook-ceph-log + - mountPath: /var/lib/ceph/crash + name: rook-ceph-crash + - mountPath: /var/lib/ceph/mon/ceph-float + name: ceph-daemon-data + workingDir: /var/log/ceph + - command: + - /bin/bash + - -x + - -e + - -m + - -c + - "\nCEPH_CLIENT_ID=ceph-mon.float\nPERIODICITY=daily\nLOG_ROTATE_CEPH_FILE=/etc/logrotate.d/ceph\nLOG_MAX_SIZE=500M\nROTATE=7\n\n# + edit the logrotate file to only rotate a specific daemon log\n# otherwise + we will logrotate log files without reloading certain daemons\n# this might + happen when multiple daemons run on the same machine\nsed -i \"s|*.log|$CEPH_CLIENT_ID.log|\" + \"$LOG_ROTATE_CEPH_FILE\"\n\n# replace default daily with given user input\nsed + --in-place \"s/daily/$PERIODICITY/g\" \"$LOG_ROTATE_CEPH_FILE\"\n\n# replace + rotate count, default 7 for all ceph daemons other than rbd-mirror\nsed + --in-place \"s/rotate 7/rotate $ROTATE/g\" \"$LOG_ROTATE_CEPH_FILE\"\n\nif + [ \"$LOG_MAX_SIZE\" != \"0\" ]; then\n\t# adding maxsize $LOG_MAX_SIZE at + the 4th line of the logrotate config file with 4 spaces to maintain indentation\n\tsed + --in-place \"4i \\ \\ \\ \\ maxsize $LOG_MAX_SIZE\" \"$LOG_ROTATE_CEPH_FILE\"\nfi\n\nwhile + true; do\n\t# we don't force the logrorate but we let the logrotate binary + handle the rotation based on user's input for periodicity and size\n\tlogrotate + --verbose \"$LOG_ROTATE_CEPH_FILE\"\n\tsleep 15m\ndone\n" + image: {{ .Values.images.tags.ceph | quote }} + imagePullPolicy: IfNotPresent + name: log-collector + resources: + limits: + memory: 1Gi + requests: + cpu: "0" + memory: "0" + securityContext: + privileged: false + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + tty: true + volumeMounts: + - mountPath: /etc/ceph + name: rook-config-override + readOnly: true + - mountPath: /run/ceph + name: ceph-daemons-sock-dir + - mountPath: /var/log/ceph + name: rook-ceph-log + - mountPath: /var/lib/ceph/crash + name: rook-ceph-crash + dnsPolicy: ClusterFirst + initContainers: + - args: + - -c + - "mountpoint -d /var/lib/ceph/mon/ceph-float | grep -q ^147" + command: + - bash + image: {{ .Values.images.tags.ceph | quote }} + imagePullPolicy: IfNotPresent + name: verify-drdb-mount + resources: + limits: + memory: 2Gi + requests: + cpu: "0" + memory: "0" + securityContext: + privileged: false + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/lib/ceph/mon/ceph-float + name: ceph-daemon-data + - args: + - -c + - '[ "${WAS_DEPLOYED}" == "false" ] && rm -fr /var/lib/ceph/mon/ceph-float/* || true' + command: + - bash + image: {{ .Values.images.tags.ceph | quote }} + imagePullPolicy: IfNotPresent + name: wipe-old-mon-db + env: + - name: WAS_DEPLOYED + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: enabled + resources: + limits: + memory: 2Gi + requests: + cpu: "0" + memory: "0" + securityContext: + privileged: false + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/lib/ceph/mon/ceph-float + name: ceph-daemon-data + - args: + - --verbose + - --recursive + - ceph:ceph + - /var/log/ceph + - /var/lib/ceph/crash + - /run/ceph + - /var/lib/ceph/mon/ceph-float + command: + - chown + image: {{ .Values.images.tags.ceph | quote }} + imagePullPolicy: IfNotPresent + name: chown-container-data-dir + resources: + limits: + memory: 2Gi + requests: + cpu: "0" + memory: "0" + securityContext: + privileged: false + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/ceph + name: rook-config-override + readOnly: true + - mountPath: /run/ceph + name: ceph-daemons-sock-dir + - mountPath: /etc/ceph/keyring-store/ + name: rook-ceph-mons-keyring + readOnly: true + - mountPath: /var/log/ceph + name: rook-ceph-log + - mountPath: /var/lib/ceph/crash + name: rook-ceph-crash + - mountPath: /var/lib/ceph/mon/ceph-float + name: ceph-daemon-data + - args: + - --fsid=$(ROOK_CEPH_FSID) + - --keyring=/etc/ceph/keyring-store/keyring + - --default-log-to-stderr=true + - --default-err-to-stderr=true + - --default-mon-cluster-log-to-stderr=true + - '--default-log-stderr-prefix=debug ' + - --default-log-to-file=false + - --default-mon-cluster-log-to-file=false + - --mon-host=$(ROOK_CEPH_MON_HOST) + - --mon-initial-members=$(ROOK_CEPH_MON_INITIAL_MEMBERS) + - --id=float + - --setuser=ceph + - --setgroup=ceph + - --ms-bind-ipv4=true + - --ms-bind-ipv6=false + - --public-addr=10.96.0.255 + - --mkfs + command: + - ceph-mon + env: + - name: CONTAINER_IMAGE + value: {{ .Values.images.tags.ceph | quote }} + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_MEMORY_LIMIT + valueFrom: + resourceFieldRef: + divisor: "0" + resource: limits.memory + - name: POD_MEMORY_REQUEST + valueFrom: + resourceFieldRef: + divisor: "0" + resource: requests.memory + - name: POD_CPU_LIMIT + valueFrom: + resourceFieldRef: + divisor: "1" + resource: limits.cpu + - name: POD_CPU_REQUEST + valueFrom: + resourceFieldRef: + divisor: "0" + resource: requests.cpu + - name: CEPH_USE_RANDOM_NONCE + value: "true" + - name: ROOK_MSGR2 + value: msgr2_false_encryption_false_compression_false + - name: ROOK_CEPH_MON_HOST + valueFrom: + secretKeyRef: + key: mon_host + name: rook-ceph-config + - name: ROOK_CEPH_MON_INITIAL_MEMBERS + valueFrom: + secretKeyRef: + key: mon_initial_members + name: rook-ceph-config + - name: ROOK_CEPH_FSID + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: fsid + image: {{ .Values.images.tags.ceph | quote }} + imagePullPolicy: IfNotPresent + name: init-mon-fs + resources: + limits: + memory: 2Gi + requests: + cpu: "0" + memory: "0" + securityContext: + privileged: false + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/ceph + name: rook-config-override + readOnly: true + - mountPath: /run/ceph + name: ceph-daemons-sock-dir + - mountPath: /etc/ceph/keyring-store/ + name: rook-ceph-mons-keyring + readOnly: true + - mountPath: /var/log/ceph + name: rook-ceph-log + - mountPath: /var/lib/ceph/crash + name: rook-ceph-crash + - mountPath: /var/lib/ceph/mon/ceph-float + name: ceph-daemon-data + nodeSelector: + node-role.kubernetes.io/control-plane: "" + priorityClassName: system-node-critical + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + shareProcessNamespace: true + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + - effect: NoExecute + key: services + operator: Exists + volumes: + - name: rook-config-override + projected: + defaultMode: 420 + sources: + - configMap: + items: + - key: config + mode: 292 + path: ceph.conf + name: rook-config-override + - name: rook-ceph-mons-keyring + secret: + defaultMode: 420 + secretName: rook-ceph-mons-keyring + - hostPath: + path: /var/lib/ceph/mon-float/exporter + type: DirectoryOrCreate + name: ceph-daemons-sock-dir + - hostPath: + path: /var/lib/ceph/mon-float/rook-ceph/log + type: "" + name: rook-ceph-log + - hostPath: + path: /var/lib/ceph/mon-float/rook-ceph/crash + type: "" + name: rook-ceph-crash + - hostPath: + path: /var/lib/ceph/mon-float/mon-float + type: "" + name: ceph-daemon-data diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-delete-job.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-delete-job.yaml new file mode 100644 index 0000000..f3e98a1 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-delete-job.yaml @@ -0,0 +1,222 @@ +{{/* +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +*/}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: rook-mon-float-config-post-delete + namespace: {{ $.Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "post-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +data: + rook_mon_float.sh: |- + #!/bin/bash + if [ "${MANAGED_REMOVAL}" != "true" ]; then + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Cluster teardown. Skipping managed removal" + exit 0 + fi + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for floating monitor pod to be deleted..." + kubectl wait --for=delete -n rook-ceph pod -l "app=rook-ceph-mon,mon=float" --timeout=60s + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Patch the secret to remove the floating monitor: rook-ceph-config ..." + kubectl get secret -n rook-ceph rook-ceph-config --template={{ "{{.data.mon_initial_members}}" }} | base64 -d -w0 | sed s/,float$// | base64 -w0 | xargs -i kubectl patch secret -n rook-ceph rook-ceph-config -p "{\"data\":{\"mon_initial_members\":\"{}\"}}" +{{- if eq $.Values.config.ip_family "IPv4"}} + kubectl get secret -n rook-ceph rook-ceph-config --template={{ "{{.data.mon_host}}" }} | base64 -d -w0 | sed s/,\\[v2:${IP_ADDRESS}.*\\]$// | base64 -w0 | xargs -i kubectl patch secret -n rook-ceph rook-ceph-config -p "{\"data\":{\"mon_host\":\"{}\"}}" +{{- else }} + kubectl get secret -n rook-ceph rook-ceph-config --template={{ "{{.data.mon_host}}" }} | base64 -d -w0 | sed s/,\\[v2:\\${IP_ADDRESS}.*\\]$// | base64 -w0 | xargs -i kubectl patch secret -n rook-ceph rook-ceph-config -p "{\"data\":{\"mon_host\":\"{}\"}}" +{{- end }} + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Patch the endpoints to remove the floating monitor: rook-ceph-mon-endpoints ..." +{{- if eq $.Values.config.ip_family "IPv4"}} + kubectl get cm -n rook-ceph rook-ceph-mon-endpoints --template={{ "{{.data.data}}" }} | sed s/,float=${IP_ADDRESS}:6789$// | xargs -i kubectl patch cm -n rook-ceph rook-ceph-mon-endpoints -p "{\"data\":{\"data\":\"{}\"}}" +{{- else }} + kubectl get cm -n rook-ceph rook-ceph-mon-endpoints --template={{ "{{.data.data}}" }} | sed s/,float=\\${IP_ADDRESS}:6789$// | xargs -i kubectl patch cm -n rook-ceph rook-ceph-mon-endpoints -p "{\"data\":{\"data\":\"{}\"}}" +{{- end }} + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Rollout restart the monitor deployments..." + kubectl rollout restart deployment -n rook-ceph --selector=app=rook-ceph-mon + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for deployments to be available..." + kubectl wait --for=condition=available deployment --all=true -n rook-ceph -lapp=rook-ceph-mon --timeout=60s + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for monitor pods to be ready..." + kubectl wait --for=condition=ready pod --all=true -n rook-ceph -lapp=rook-ceph-mon --timeout=60s + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Renable the operator to track monitors..." + kubectl label deployment -n rook-ceph -l app=rook-ceph-mon ceph.rook.io/do-not-reconcile- + + TIMEOUT=$(kubectl get -n rook-ceph cephcluster/${CLUSTER_CRD} --output=jsonpath='{.spec.healthCheck.daemonHealth.mon.timeout}') + if [ "${TIMEOUT}" != "5s" ]; then + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Patch the cluster to set the monitor failover to 5s ... (will restart operator)" + kubectl patch -n rook-ceph cephcluster/${CLUSTER_CRD} --type='merge' -p '{"spec":{"healthCheck":{"daemonHealth":{"mon":{"timeout": "5s", "interval": "10s"}}}}}' + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for the cluster state: Progressing..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.phase}'=Progressing + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for the cluster state: Ready..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.phase}'=Ready + + if [ "${WAS_DEPLOYED}" == "true" ]; then + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for cluster HEALTH_WARN..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.ceph.health}'=HEALTH_WARN + fi + fi + + if [ "${INITIAL_CLUSTER_HEALTH}" == "HEALTH_OK" ]; then + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for failover cleanup and cluster HEALTH_OK..." + # kubectl get --output=jsonpath='{.status.ceph.health}' -n rook-ceph cephcluster/rook-ceph + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=240s --for=jsonpath='{.status.ceph.health}'=HEALTH_OK + fi + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Patch the cluster to set the monitor failover to 600s ... (will restart operator)" + kubectl patch -n rook-ceph cephcluster/${CLUSTER_CRD} --type='merge' -p '{"spec":{"healthCheck":{"daemonHealth":{"mon":{"timeout": "600s", "interval": "45s"}}}}}' + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for the cluster state: Progressing..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.phase}'=Progressing + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for the cluster state: Ready..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.phase}'=Ready + + if [ "${INITIAL_CLUSTER_HEALTH}" == "HEALTH_OK" ]; then + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for failover cleanup and cluster HEALTH_OK..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.ceph.health}'=HEALTH_OK + fi + + # TODO: Cleanup cm/rook-ceph-mon-float-inputs? + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Done with floating monitor removal..." + exit 0 +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "post-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["list", "watch"] +- apiGroups: [""] + resources: ["deployments"] + verbs: ["watch"] +- apiGroups: [""] + resources: ["configmaps", "secrets"] + verbs: ["get", "patch"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["list", "patch", "watch"] +- apiGroups: ["ceph.rook.io"] + resources: ["cephclusters"] + verbs: ["get", "list", "patch", "watch"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "post-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +imagePullSecrets: + - name: default-registry-key +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "post-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: rook-ceph-mon-float +subjects: +- kind: ServiceAccount + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: "mon-float-post-delete" + namespace: {{ $.Release.Namespace }} + labels: + app: "{{ $.Chart.Name }}" + app.starlingx.io/component: {{ index $.Values "app.starlingx.io/component" }} + chart: "{{ $.Chart.Name }}-{{ $.Chart.AppVersion }}" + release: {{ $.Release.Name }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "post-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} +spec: + backoffLimit: 5 + activeDeadlineSeconds: 600 + template: + spec: + serviceAccountName: rook-ceph-mon-float + containers: + - name: "post-delete" + image: {{ $.Values.images.tags.kubectl | quote }} + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "/tmp/mnt/rook_mon_float.sh"] + env: + - name: CLUSTER_CRD + value: {{ $.Values.config.clusterCrd }} + - name: IP_ADDRESS + value: {{ squote $.Values.config.ip_address }} + - name: WAS_DEPLOYED + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: enabled + - name: INITIAL_CLUSTER_HEALTH + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: health + - name: MANAGED_REMOVAL + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: managed_remove + volumeMounts: + - name: rook-mon-float-config-post-delete + mountPath: /tmp/mnt + restartPolicy: Never + volumes: + - name: rook-mon-float-config-post-delete + configMap: + name: rook-mon-float-config-post-delete diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-install-job.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-install-job.yaml new file mode 100644 index 0000000..39f1cc2 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/post-install-job.yaml @@ -0,0 +1,148 @@ +{{/* +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +*/}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: rook-mon-float-config-post-install + namespace: {{ $.Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": post-install +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +data: + rook_mon_float.sh: |- + #!/bin/bash + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for floating deployment to be available..." + kubectl wait --for=condition=available deployment --all=true -n rook-ceph -lapp=rook-ceph-mon,mon=float + + # The cluster may have not been in a HEALTH_OK state the floating monitor + # was added. If so, wait for that state to be achieved again. + if [ "${INITIAL_CLUSTER_HEALTH}" == "HEALTH_OK" ]; then + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for cluster HEALTH_WARN as floating mon joins the quorum..." + kubectl wait --for=jsonpath='{.status.ceph.health}'=HEALTH_WARN -n rook-ceph cephcluster/rook-ceph --timeout=30s + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for cluster HEALTH_OK..." + kubectl wait --for=jsonpath='{.status.ceph.health}'=HEALTH_OK -n rook-ceph cephcluster/rook-ceph --timeout=60s + fi + + # Populate data for floating monitor deployment + kubectl patch cm -n rook-ceph rook-ceph-mon-float-inputs -p "{\"data\":{\"enabled\":\"true\"}}" + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Done. Floating monitor started..." + exit 0 +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": post-install +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get","patch"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["list", "watch"] +- apiGroups: ["ceph.rook.io"] + resources: ["cephclusters"] + verbs: ["get", "list", "watch"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": post-install +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +imagePullSecrets: + - name: default-registry-key +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": post-install +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: rook-ceph-mon-float +subjects: +- kind: ServiceAccount + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: "mon-float-post-install" + namespace: {{ $.Release.Namespace }} + labels: + app: "{{ $.Chart.Name }}" + app.starlingx.io/component: {{ index $.Values "app.starlingx.io/component" }} + chart: "{{ $.Chart.Name }}-{{ $.Chart.AppVersion }}" + release: {{ $.Release.Name }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": post-install +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} +spec: + backoffLimit: 5 + activeDeadlineSeconds: 120 + template: + spec: + serviceAccountName: rook-ceph-mon-float + containers: + - name: post-install + image: {{ $.Values.images.tags.kubectl | quote }} + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "/tmp/mnt/rook_mon_float.sh"] + env: + - name: CLUSTER_CRD + value: {{ $.Values.config.clusterCrd }} + - name: IP_ADDRESS + value: {{ squote $.Values.config.ip_address }} + - name: INITIAL_CLUSTER_HEALTH + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: health + volumeMounts: + - name: rook-mon-float-config-post-install + mountPath: /tmp/mnt + restartPolicy: Never + volumes: + - name: rook-mon-float-config-post-install + configMap: + name: rook-mon-float-config-post-install diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-delete-job.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-delete-job.yaml new file mode 100644 index 0000000..73ea5d4 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-delete-job.yaml @@ -0,0 +1,159 @@ +{{/* +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +*/}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: rook-mon-float-config-pre-delete + namespace: {{ $.Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +data: + rook_mon_float.sh: |- + #!/bin/bash + if [ "${MANAGED_REMOVAL}" != "true" ]; then + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Cluster teardown. Skipping managed removal" + exit 0 + fi + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for cluster (${CLUSTER_CRD}) status: Ready..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --for=jsonpath='{.status.phase}'=Ready + while true; do kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.phase}'=Ready > /dev/null 2>&1 && break; sleep 1; done + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Cluster (${CLUSTER_CRD}) is Ready" + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Get the cluster health state" + INITIAL_CLUSTER_HEALTH=$(kubectl get --output=jsonpath='{.status.ceph.health}' -n rook-ceph cephcluster/rook-ceph) + + # Populate data for floating monitor removal + kubectl patch cm -n rook-ceph rook-ceph-mon-float-inputs -p "{\"health\":\"${INITIAL_CLUSTER_HEALTH}\"}}" + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Done with pre-delete tasks for the floating monitor removal..." + exit 0 +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["list", "watch"] +- apiGroups: [""] + resources: ["deployments"] + verbs: ["watch"] +- apiGroups: [""] + resources: ["configmaps", "secrets"] + verbs: ["get", "patch"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["list", "patch", "watch"] +- apiGroups: ["ceph.rook.io"] + resources: ["cephclusters"] + verbs: ["get", "list", "patch", "watch"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +imagePullSecrets: + - name: default-registry-key +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: rook-ceph-mon-float +subjects: +- kind: ServiceAccount + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: "mon-float-pre-delete" + namespace: {{ $.Release.Namespace }} + labels: + app: "{{ $.Chart.Name }}" + app.starlingx.io/component: {{ index $.Values "app.starlingx.io/component" }} + chart: "{{ $.Chart.Name }}-{{ $.Chart.AppVersion }}" + release: {{ $.Release.Name }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-delete" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} +spec: + backoffLimit: 5 + activeDeadlineSeconds: 120 + template: + spec: + serviceAccountName: rook-ceph-mon-float + containers: + - name: "pre-delete" + image: {{ $.Values.images.tags.kubectl | quote }} + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "/tmp/mnt/rook_mon_float.sh"] + env: + - name: CLUSTER_CRD + value: {{ $.Values.config.clusterCrd }} + - name: IP_ADDRESS + value: {{ squote $.Values.config.ip_address }} + - name: WAS_DEPLOYED + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: enabled + - name: MANAGED_REMOVAL + valueFrom: + configMapKeyRef: + name: rook-ceph-mon-float-inputs + key: managed_remove + volumeMounts: + - name: rook-mon-float-config-pre-delete + mountPath: /tmp/mnt + restartPolicy: Never + volumes: + - name: rook-mon-float-config-pre-delete + configMap: + name: rook-mon-float-config-pre-delete diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-install-job.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-install-job.yaml new file mode 100644 index 0000000..94b0c07 --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/pre-install-job.yaml @@ -0,0 +1,175 @@ +{{/* +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +*/}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: rook-mon-float-config-pre-install + namespace: {{ $.Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-install" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +data: + rook_mon_float.sh: |- + #!/bin/bash + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for cluster (${CLUSTER_CRD}) status: Ready..." + kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --for=jsonpath='{.status.phase}'=Ready + while true; do kubectl wait -n rook-ceph cephcluster/${CLUSTER_CRD} --timeout=60s --for=jsonpath='{.status.phase}'=Ready > /dev/null 2>&1 && break; sleep 1; done + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Cluster (${CLUSTER_CRD}) is Ready" + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Get the cluster health state" + INITIAL_CLUSTER_HEALTH=$(kubectl get --output=jsonpath='{.status.ceph.health}' -n rook-ceph cephcluster/rook-ceph) + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Disable the operator..." + kubectl label deployment -n rook-ceph -l app=rook-ceph-mon ceph.rook.io/do-not-reconcile="" + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Patch the endpoints to add the floating monitor: rook-ceph-mon-endpoints ..." + kubectl get cm -n rook-ceph rook-ceph-mon-endpoints --template={{ "{{.data.data}}" }} | sed s/$/,float=${IP_ADDRESS}:6789/ | xargs -i kubectl patch cm -n rook-ceph rook-ceph-mon-endpoints -p "{\"data\":{\"data\":\"{}\"}}" + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Patch the secret to add the floating monitor: rook-ceph-config ..." + kubectl get secret -n rook-ceph rook-ceph-config --template={{ "{{.data.mon_host}}" }} | base64 -d -w0 | sed s/$/,[v2:${IP_ADDRESS}:3300,v1:${IP_ADDRESS}:6789]/ | base64 -w0 | xargs -i kubectl patch secret -n rook-ceph rook-ceph-config -p "{\"data\":{\"mon_host\":\"{}\"}}" + kubectl get secret -n rook-ceph rook-ceph-config --template={{ "{{.data.mon_initial_members}}" }} | base64 -d -w0 | sed s/$/,float/ | base64 -w0 | xargs -i kubectl patch secret -n rook-ceph rook-ceph-config -p "{\"data\":{\"mon_initial_members\":\"{}\"}}" + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Rollout restart the monitor deployments..." + kubectl rollout restart deployment -n rook-ceph --selector=app=rook-ceph-mon + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Wait for deployments to be available..." + while true; do kubectl wait --for=condition=available deployment --all=true -n rook-ceph -lapp=rook-ceph-mon --timeout=60s > /dev/null 2>&1 && break; sleep 1; done + + # Populate data for floating monitor deployment + FSID="" + while [ -z ${FSID} ]; do + FSID=$(kubectl get -n rook-ceph cephcluster/rook-ceph --template={{ "{{.status.ceph.fsid}}" }}) + sleep 1 + done + kubectl patch cm -n rook-ceph rook-ceph-mon-float-inputs -p "{\"data\":{\"fsid\":\"${FSID}\", \"enabled\":\"false\", \"health\":\"${INITIAL_CLUSTER_HEALTH}\"}}" + + echo "$(date +%Y-%m-%d\ %H:%M:%S.000): Done with pre-install tasks for floating monitor inclusion..." + exit 0 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: rook-ceph-mon-float-inputs + namespace: {{ $.Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-install" + "helm.sh/hook-weight": "-5" +data: + enabled: "false" + fsid: none + health: HEALTH_OK + managed_remove: {{ $.Values.config.managed_remove | quote }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-install" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +rules: +- apiGroups: [""] + resources: ["configmaps", "secrets"] + verbs: ["get","patch"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["list", "patch", "watch"] +- apiGroups: ["ceph.rook.io"] + resources: ["cephclusters"] + verbs: ["get", "list", "watch"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-install" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +imagePullSecrets: + - name: default-registry-key +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: rook-ceph-mon-float + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-install" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} + "helm.sh/hook-weight": "-5" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: rook-ceph-mon-float +subjects: +- kind: ServiceAccount + name: rook-ceph-mon-float + namespace: {{ .Release.Namespace }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: "mon-float-pre-install" + namespace: {{ $.Release.Namespace }} + labels: + app: "{{ $.Chart.Name }}" + app.starlingx.io/component: {{ index $.Values "app.starlingx.io/component" }} + chart: "{{ $.Chart.Name }}-{{ $.Chart.AppVersion }}" + release: {{ $.Release.Name }} + annotations: + "meta.helm.sh/release-name": {{ $.Release.Name }} + "meta.helm.sh/release-namespace": {{ $.Release.Namespace }} + "helm.sh/hook": "pre-install" +{{- if eq .Values.config.debug "off" }} + "helm.sh/hook-delete-policy": "before-hook-creation","hook-succeeded" +{{- end }} +spec: + backoffLimit: 5 + activeDeadlineSeconds: 120 + template: + spec: + serviceAccountName: rook-ceph-mon-float + containers: + - name: "pre-install" + image: {{ $.Values.images.tags.kubectl | quote }} + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "/tmp/mnt/rook_mon_float.sh"] + env: + - name: CLUSTER_CRD + value: {{ $.Values.config.clusterCrd }} + - name: IP_ADDRESS + value: {{ squote $.Values.config.ip_address }} + volumeMounts: + - name: rook-mon-float-config-pre-install + mountPath: /tmp/mnt + restartPolicy: Never + volumes: + - name: rook-mon-float-config-pre-install + configMap: + name: rook-mon-float-config-pre-install diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/service.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/service.yaml new file mode 100644 index 0000000..f29d5fc --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/templates/service.yaml @@ -0,0 +1,53 @@ +{{/* +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +*/}} + +apiVersion: v1 +kind: Service +metadata: + labels: + ceph.rook.io/do-not-reconcile: "" + app.kubernetes.io/managed-by: rook-ceph-operator + app: rook-ceph-mon + app.kubernetes.io/component: cephclusters.ceph.rook.io + app.kubernetes.io/created-by: rook-ceph-operator + app.kubernetes.io/instance: float + app.kubernetes.io/name: ceph-mon + app.kubernetes.io/part-of: rook-ceph + ceph_daemon_id: float + ceph_daemon_type: mon + mon: float + mon_cluster: rook-ceph + rook.io/operator-namespace: rook-ceph + rook_cluster: rook-ceph + name: rook-ceph-mon-float + namespace: rook-ceph +spec: + clusterIP: {{ trimAll "[]" $.Values.config.ip_address }} + clusterIPs: + - {{ trimAll "[]" $.Values.config.ip_address }} + internalTrafficPolicy: Cluster + ipFamilies: + - {{ $.Values.config.ip_family }} + ipFamilyPolicy: SingleStack + ports: + - name: tcp-msgr1 + port: 6789 + protocol: TCP + targetPort: 6789 + - name: tcp-msgr2 + port: 3300 + protocol: TCP + targetPort: 3300 + selector: + app: rook-ceph-mon + ceph_daemon_id: float + mon: float + mon_cluster: rook-ceph + rook_cluster: rook-ceph + sessionAffinity: None + type: ClusterIP diff --git a/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/values.yaml b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/values.yaml new file mode 100644 index 0000000..a88973d --- /dev/null +++ b/helm-charts/custom/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor-helm/rook-ceph-floating-monitor/values.yaml @@ -0,0 +1,32 @@ +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +app.starlingx.io/component: platform + +images: + tags: + ceph: quay.io/ceph/ceph:v18.2.2 + kubectl: docker.io/bitnami/kubectl:1.29 + +config: + # Cluster CRD name to which the floating monitor should be added + clusterCrd: rook-ceph + + # keep (on)/remove (off) successful jobs run as helm hooks + debug: "on" + + # Static IP in cluster service subnet + # IPV4 + ip_family: IPv4 + ip_address: '10.96.0.16' + + # IPV6 + #ip_address: '[aefd:207::16]' + #ip_family: IPv6 + + # On helm delete, orchestrate handoff back to Rook Operator (true) or skip + # orchestration and just remove the deployment (false) + managed_remove: true