Test job for legacy OSH Ceph to Rook migration

At the moment the recommended way of managing Ceph clusters is using Rook-Ceph operator. However some of the users still utilize legacy OSH Ceph* charts. Since Ceph is a critical part of the infrastructure we suggest a migration procedure and this PR is to test it. Change-Id: I837c8707b9fa45ff4350641920649188be1ce8da
2024-06-20 03:13:18 -05:00 · 2024-06-20 03:13:18 -05:00 · 1d34fbba2a
commit 1d34fbba2a
parent bc45596483
21 changed files with 977 additions and 20 deletions
--- a/ceph-mon/Chart.yaml
+++ b/ceph-mon/Chart.yaml
@ -15,6 +15,6 @@ apiVersion: v1
 appVersion: v1.0.0
 description: OpenStack-Helm Ceph Mon
 name: ceph-mon
-version: 0.1.34
+version: 0.1.35
 home: https://github.com/ceph/ceph
 ...
--- a/ceph-mon/templates/bin/keys/_storage-keyring-manager.sh.tpl
+++ b/ceph-mon/templates/bin/keys/_storage-keyring-manager.sh.tpl
@ -91,6 +91,7 @@ EOF
 }
 #create_kube_storage_key <ceph_key> <kube_secret_name>
 create_kube_storage_key ${CEPH_CLIENT_KEY} ${CEPH_STORAGECLASS_ADMIN_SECRET_NAME}
+create_kube_storage_key ${CEPH_CLIENT_KEY} ${CEPH_STORAGECLASS_ADMIN_SECRET_NAME_NODE}

 {{ else }}

--- a/ceph-mon/templates/job-storage-admin-keys.yaml
+++ b/ceph-mon/templates/job-storage-admin-keys.yaml
@ -96,6 +96,8 @@ spec:
              value: {{ .Values.secrets.keyrings.admin }}
            - name: CEPH_STORAGECLASS_ADMIN_SECRET_NAME
              value: {{ .Values.storageclass.rbd.parameters.adminSecretName }}
+            - name: CEPH_STORAGECLASS_ADMIN_SECRET_NAME_NODE
+              value: {{ .Values.storageclass.rbd.parameters.adminSecretNameNode }}
          command:
            - /tmp/keys-storage-keyring-manager.sh
          volumeMounts:
--- a/ceph-mon/values.yaml
+++ b/ceph-mon/values.yaml
@ -436,6 +436,7 @@ storageclass:
  rbd:
    parameters:
      adminSecretName: pvc-ceph-conf-combined-storageclass
+      adminSecretNameNode: pvc-ceph-conf-combined-storageclass
  cephfs:
    provision_storage_class: true
    provisioner: ceph.com/cephfs
--- a/ceph-provisioners/Chart.yaml
+++ b/ceph-provisioners/Chart.yaml
@ -15,6 +15,6 @@ apiVersion: v1
 appVersion: v1.0.0
 description: OpenStack-Helm Ceph Provisioner
 name: ceph-provisioners
-version: 0.1.29
+version: 0.1.30
 home: https://github.com/ceph/ceph
 ...
--- a/ceph-provisioners/templates/daemonset-csi-rbd-plugin.yaml
+++ b/ceph-provisioners/templates/daemonset-csi-rbd-plugin.yaml
@ -109,7 +109,7 @@ spec:
            - "--nodeserver=true"
            - "--endpoint=$(CSI_ENDPOINT)"
            - "--v=0"
-            - "--drivername=$(DEPLOYMENT_NAMESPACE).rbd.csi.ceph.com"
+            - "--drivername={{ $envAll.Values.storageclass.csi_rbd.provisioner }}"
            - "--pidlimit=-1"
          env:
            - name: DEPLOYMENT_NAMESPACE
--- a/ceph-provisioners/templates/deployment-csi-rbd-provisioner.yaml
+++ b/ceph-provisioners/templates/deployment-csi-rbd-provisioner.yaml
@ -231,7 +231,7 @@ spec:
            - "--controllerserver=true"
            - "--endpoint=$(CSI_ENDPOINT)"
            - "--v=0"
-            - "--drivername=$(DEPLOYMENT_NAMESPACE).rbd.csi.ceph.com"
+            - "--drivername={{ $envAll.Values.storageclass.csi_rbd.provisioner }}"
            - "--pidlimit=-1"
          env:
            - name: DEPLOYMENT_NAMESPACE
--- a/playbooks/inject-keys.yaml
+++ b/playbooks/inject-keys.yaml
@ -0,0 +1,11 @@
+---
+- hosts: all
+  tasks:
+    - name: Put keys to .ssh/authorized_keys
+      lineinfile:
+        path: /home/zuul/.ssh/authorized_keys
+        state: present
+        line: "{{ item }}"
+      loop:
+        - "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBMyM6sgu/Xgg+VaLJX5c6gy6ynYX7pO7XNobnKotYRulcEkmiLprvLSg+WP25VDAcSoif3rek3qiVnEYh6R2/Go= vlad@russell"
+...
--- a/releasenotes/notes/ceph-mon.yaml
+++ b/releasenotes/notes/ceph-mon.yaml
@ -35,4 +35,5 @@ ceph-mon:
  - 0.1.32 Update Rook to 1.12.5 and Ceph to 18.2.0
  - 0.1.33 Update Ceph images to Jammy and Reef 18.2.1
  - 0.1.34 Update Ceph images to patched 18.2.2 and restore debian-reef repo
+  - 0.1.35 Use seprate secrets for CSI plugin and CSI provisioner
 ...
--- a/releasenotes/notes/ceph-provisioners.yaml
+++ b/releasenotes/notes/ceph-provisioners.yaml
@ -29,4 +29,5 @@ ceph-provisioners:
  - 0.1.27 Update Rook to 1.12.5 and Ceph to 18.2.0
  - 0.1.28 Update Ceph images to Jammy and Reef 18.2.1
  - 0.1.29 Update Ceph images to patched 18.2.2 and restore debian-reef repo
+  - 0.1.30 Specify CSI drivername in values.yaml
 ...
--- a/tools/deployment/ceph/ceph-ns-activate.sh
+++ b/tools/deployment/ceph/ceph-ns-activate.sh
@ -14,8 +14,10 @@

 set -xe

+: ${OSH_INFRA_HELM_REPO:="../openstack-helm-infra"}
+: ${OSH_INFRA_PATH:="../openstack-helm-infra"}
+
 #NOTE: Deploy command
-: ${OSH_EXTRA_HELM_ARGS:=""}
 tee /tmp/ceph-openstack-config.yaml <<EOF
 endpoints:
  ceph_mon:
@ -36,9 +38,9 @@ conf:
    enabled: false
 EOF

-: ${OSH_INFRA_EXTRA_HELM_ARGS_CEPH_NS_ACTIVATE:="$(helm osh get-values-overrides -c ceph-provisioners ${FEATURES})"}
+: ${OSH_INFRA_EXTRA_HELM_ARGS_CEPH_NS_ACTIVATE:="$(helm osh get-values-overrides ${DOWNLOAD_OVERRIDES:-} -p ${OSH_INFRA_PATH} -c ceph-provisioners ${FEATURES})"}

-helm upgrade --install ceph-openstack-config ./ceph-provisioners \
+helm upgrade --install ceph-openstack-config ${OSH_INFRA_HELM_REPO}/ceph-provisioners \
  --namespace=openstack \
  --values=/tmp/ceph-openstack-config.yaml \
  ${OSH_INFRA_EXTRA_HELM_ARGS} \
--- a/tools/deployment/ceph/ceph.sh
+++ b/tools/deployment/ceph/ceph.sh
@ -22,7 +22,6 @@ set -xe
 NUMBER_OF_OSDS="$(kubectl get nodes -l ceph-osd=enabled --no-headers | wc -l)"

 #NOTE: Deploy command
-: ${OSH_EXTRA_HELM_ARGS:=""}
 [ -s /tmp/ceph-fs-uuid.txt ] || uuidgen > /tmp/ceph-fs-uuid.txt
 CEPH_FS_ID="$(cat /tmp/ceph-fs-uuid.txt)"
 #NOTE(portdirect): to use RBD devices with Ubuntu kernels < 4.5 this
--- a/tools/deployment/ceph/ceph_legacy.sh
+++ b/tools/deployment/ceph/ceph_legacy.sh
@ -0,0 +1,198 @@
+#!/bin/bash
+
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -xe
+
+: ${CEPH_OSD_DATA_DEVICE:="/dev/loop100"}
+: ${POD_NETWORK_CIDR:="10.244.0.0/24"}
+: ${OSH_INFRA_HELM_REPO:="../openstack-helm-infra"}
+: ${OSH_INFRA_PATH:="../openstack-helm-infra"}
+
+NUMBER_OF_OSDS="$(kubectl get nodes -l ceph-osd=enabled --no-headers | wc -l)"
+
+#NOTE: Deploy command
+[ -s /tmp/ceph-fs-uuid.txt ] || uuidgen > /tmp/ceph-fs-uuid.txt
+CEPH_FS_ID="$(cat /tmp/ceph-fs-uuid.txt)"
+#NOTE(portdirect): to use RBD devices with Ubuntu kernels < 4.5 this
+# should be set to 'hammer'
+. /etc/os-release
+if [ "x${ID}" == "xcentos" ] || \
+   ([ "x${ID}" == "xubuntu" ] && \
+   dpkg --compare-versions "$(uname -r)" "lt" "4.5"); then
+  CRUSH_TUNABLES=hammer
+else
+  CRUSH_TUNABLES=null
+fi
+
+# Most of PV fields are immutable and in case of CSI RBD plugin they refer
+# to secrets which were used for RBD provisioner and RBD attacher. These fields
+# can not be updated later.
+# So for testing purposes we assume legacy Ceph cluster is deployed with
+# the following secret names for the CSI plugin
+# - rook-csi-rbd-provisioner
+# - rook-csi-rbd-node
+# These exact secret names are used by Rook by default for CSI plugin and
+# and after migration PVs will be adopted by the new Rook Ceph cluster.
+#
+# Alternatively if we deploy legacy Ceph cluster with the default values
+# then we could later force Rook to use same CSI secret names as used for
+# legacy cluster. For example pvc-ceph-conf-combined-storageclass secret
+# name is used by default in legacy charts.
+#
+# Same is for CSI provisioner drivername option. For testing we deploy
+# legacy cluster with the drivername set to rook-ceph.rbd.csi.ceph.com
+# while default value is ceph.rbd.csi.ceph.com.
+# This is also for the sake of smooth adoption of PVs.
+
+tee /tmp/ceph.yaml <<EOF
+endpoints:
+  ceph_mon:
+    namespace: ceph
+    port:
+      mon:
+        default: 6789
+  ceph_mgr:
+    namespace: ceph
+    port:
+      mgr:
+        default: 7000
+      metrics:
+        default: 9283
+network:
+  public: "${POD_NETWORK_CIDR}"
+  cluster: "${POD_NETWORK_CIDR}"
+  port:
+    mon: 6789
+    rgw: 8088
+    mgr: 7000
+deployment:
+  storage_secrets: true
+  ceph: true
+  csi_rbd_provisioner: true
+  client_secrets: false
+  rgw_keystone_user_and_endpoints: false
+bootstrap:
+  enabled: true
+conf:
+  rgw_ks:
+    enabled: false
+  ceph:
+    global:
+      fsid: ${CEPH_FS_ID}
+      mon_addr: :6789
+      mon_allow_pool_size_one: true
+      osd_pool_default_size: 1
+    osd:
+      osd_crush_chooseleaf_type: 0
+  pool:
+    crush:
+      tunables: ${CRUSH_TUNABLES}
+    target:
+      osd: ${NUMBER_OF_OSDS}
+      final_osd: ${NUMBER_OF_OSDS}
+      pg_per_osd: 100
+    default:
+      crush_rule: same_host
+    spec:
+      # Health metrics pool
+      - name: .mgr
+        application: mgr_devicehealth
+        replication: 1
+        percent_total_data: 5
+      # RBD pool
+      - name: rbd
+        application: rbd
+        replication: 1
+        percent_total_data: 40
+  storage:
+    osd:
+      - data:
+          type: bluestore
+          location: ${CEPH_OSD_DATA_DEVICE}
+        # block_db:
+        #   location: ${CEPH_OSD_DB_WAL_DEVICE}
+        #   size: "5GB"
+        # block_wal:
+        #   location: ${CEPH_OSD_DB_WAL_DEVICE}
+        #   size: "2GB"
+
+storageclass:
+  rbd:
+    parameters:
+      adminSecretName: rook-csi-rbd-provisioner
+      adminSecretNameNode: rook-csi-rbd-node
+  csi_rbd:
+    provisioner: rook-ceph.rbd.csi.ceph.com
+    parameters:
+      clusterID: ceph
+      csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
+      csi.storage.k8s.io/controller-expand-secret-namespace: ceph
+      csi.storage.k8s.io/fstype: ext4
+      csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
+      csi.storage.k8s.io/node-stage-secret-namespace: ceph
+      csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
+      csi.storage.k8s.io/provisioner-secret-namespace: ceph
+      pool: rbd
+      imageFeatures: layering
+      imageFormat: "2"
+      adminId: null
+      adminSecretName: rook-csi-rbd-provisioner
+      adminSecretNamespace: ceph
+      userId: null
+      userSecretName: null
+
+pod:
+  replicas:
+    mds: 1
+    mgr: 1
+    rgw: 1
+    cephfs_provisioner: 1
+    rbd_provisioner: 1
+    csi_rbd_provisioner: 1
+
+jobs:
+  ceph_defragosds:
+    # Execute every 15 minutes for gates
+    cron: "*/15 * * * *"
+    history:
+      # Number of successful job to keep
+      successJob: 1
+      # Number of failed job to keep
+      failJob: 1
+    concurrency:
+      # Skip new job if previous job still active
+      execPolicy: Forbid
+    startingDeadlineSecs: 60
+manifests:
+  job_bootstrap: false
+EOF
+
+for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do
+  helm upgrade --install --create-namespace ${CHART} ${OSH_INFRA_HELM_REPO}/${CHART} \
+    --namespace=ceph \
+    --values=/tmp/ceph.yaml \
+    ${OSH_INFRA_EXTRA_HELM_ARGS} \
+    ${OSH_INFRA_EXTRA_HELM_ARGS_CEPH_DEPLOY:-$(helm osh get-values-overrides ${DOWNLOAD_OVERRIDES:-} -p ${OSH_INFRA_PATH} -c ${CHART} ${FEATURES})}
+
+  #NOTE: Wait for deploy
+  helm osh wait-for-pods ceph
+
+  #NOTE: Validate deploy
+  MON_POD=$(kubectl get pods \
+    --namespace=ceph \
+    --selector="application=ceph" \
+    --selector="component=mon" \
+    --no-headers | awk '{ print $1; exit }')
+  kubectl exec -n ceph ${MON_POD} -- ceph -s
+done
--- a/tools/deployment/ceph/migrate-after.sh
+++ b/tools/deployment/ceph/migrate-after.sh
@ -0,0 +1,29 @@
+#!/bin/bash
+
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -xe
+
+: ${NAMESPACE:=openstack}
+
+# Now we have are ready to scale up stateful applications
+# and use same PVs provisioned earlier by legacy Ceph
+kubectl -n ${NAMESPACE} scale statefulset mariadb-server --replicas=1
+kubectl -n ${NAMESPACE} scale statefulset rabbitmq-rabbitmq --replicas=1
+
+sleep 30
+helm osh wait-for-pods ${NAMESPACE}
+
+kubectl -n ${NAMESPACE} get po
+kubectl -n ${NAMESPACE} get pvc
+kubectl get pv -o yaml
--- a/tools/deployment/ceph/migrate-before.sh
+++ b/tools/deployment/ceph/migrate-before.sh
@ -0,0 +1,34 @@
+#!/bin/bash
+
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -xe
+
+: ${NAMESPACE:=openstack}
+
+# Before migration we have to scale down all the stateful applications
+# so PVs provisioned by Ceph are not attached to any pods
+kubectl -n ${NAMESPACE} scale statefulset mariadb-server --replicas=0
+kubectl -n ${NAMESPACE} scale statefulset rabbitmq-rabbitmq --replicas=0
+
+sleep 30
+helm osh wait-for-pods ${NAMESPACE}
+
+kubectl -n ${NAMESPACE} get po
+kubectl -n ${NAMESPACE} get pvc
+kubectl get pv -o yaml
+
+# Delete CSI secrets so Rook can deploy them from scratch
+kubectl -n ceph delete secret rook-csi-rbd-provisioner
+kubectl -n ceph delete secret rook-csi-rbd-node
+kubectl -n ceph get secret
--- a/tools/deployment/ceph/migrate-to-rook-ceph.sh
+++ b/tools/deployment/ceph/migrate-to-rook-ceph.sh
@ -25,8 +25,8 @@ ROOK_RELEASE=${ROOK_RELEASE:-1.13.7}
 CEPH_RELEASE=${CEPH_RELEASE:-18.2.2}
 ROOK_CEPH_NAMESPACE=${ROOK_CEPH_NAMESPACE:-rook-ceph}
 CEPH_NAMESPACE=${CEPH_NAMESPCE:-ceph}
-ROOK_OPERATOR_YAML=${ROOK_OPERATOR_YAML:-rook-operator.yaml}
-ROOK_CEPH_YAML=${ROOK_CEPH_YAML:-rook-ceph.yaml}
+ROOK_OPERATOR_YAML=${ROOK_OPERATOR_YAML:-/tmp/rook-operator.yaml}
+ROOK_CEPH_YAML=${ROOK_CEPH_YAML:-/tmp/rook-ceph.yaml}

 # Return a list of unique status strings for pods for a specified application
 # (Pods with the same status will return a single status)
@ -39,14 +39,18 @@ function wait_for_initial_rook_deployment() {
  set +x
  echo "Waiting for initial Rook Ceph cluster deployment..."

+  # Here in the while clause we have to check this
+  # if monitoring is enabled
+  # $(app_status rook-ceph-exporter)" != "Running"
+
  # The initial deployment can't deploy OSDs or RGW
  while [[ "$(app_status rook-ceph-mon)" != "Running" || \
           "$(app_status rook-ceph-mgr)" != "Running" || \
           "$(app_status rook-ceph-mds)" != "Running" || \
           "$(app_status rook-ceph-tools)" != "Running" || \
-           "$(app_status rook-ceph-exporter)" != "Running" || \
           "$(app_status rook-ceph-osd-prepare)" != "Succeeded" ]]
  do
+    echo "Waiting for INITIAL Rook Ceph deployment ..."
    sleep 5
  done
  set -x
@ -57,16 +61,20 @@ function wait_for_full_rook_deployment() {
  set +x
  echo "Waiting for full Rook Ceph cluster deployment..."

+  # Here in the while clause we have to check this
+  # if monitoring is enabled
+  # $(app_status rook-ceph-exporter)" != "Running"
+
  # Look for everything from the initial deployment plus OSDs and RGW
  while [[ "$(app_status rook-ceph-mon)" != "Running" || \
           "$(app_status rook-ceph-mgr)" != "Running" || \
           "$(app_status rook-ceph-mds)" != "Running" || \
           "$(app_status rook-ceph-tools)" != "Running" || \
-           "$(app_status rook-ceph-exporter)" != "Running" || \
           "$(app_status rook-ceph-osd-prepare)" != "Succeeded" || \
           "$(app_status rook-ceph-osd)" != "Running" || \
           "$(app_status rook-ceph-rgw)" != "Running" ]]
  do
+    echo "Waiting for FULL Rook Ceph deployment ..."
    sleep 5
  done
  set -x
@ -178,7 +186,8 @@ export MON_HOST_IP=$(kubectl get nodes -o json | jq -r '.items[] | select(.metad
 # Shut down the Rook operator, delete the rook-ceph deployments, and get the new rook-ceph-mon IP address
 kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=0
 kubectl -n ${CEPH_NAMESPACE} get deploy -o json | jq -r '.items[] | select(.metadata.name != "rook-ceph-tools") | .metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete deploy
-MON_IP=$(kubectl -n ${CEPH_NAMESPACE} get service rook-ceph-mon-a -o json | jq -r '.spec.clusterIP')
+#MON_IP=$(kubectl -n ${CEPH_NAMESPACE} get service rook-ceph-mon-a -o json | jq -r '.spec.clusterIP')
+MON_IP=$(kubectl -n ${CEPH_NAMESPACE} get cm rook-ceph-mon-endpoints -o jsonpath='{.data.data}' | sed 's/.=//g' | awk -F: '{print $1}')
 wait_for_terminate

 # Download the old mon store and update its key to the new one
--- a/tools/deployment/ceph/migrate-values.sh
+++ b/tools/deployment/ceph/migrate-values.sh
@ -0,0 +1,621 @@
+#!/bin/bash
+
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -xe
+
+ROOK_RELEASE=v1.13.3
+
+: ${CEPH_OSD_DATA_DEVICE:="/dev/loop100"}
+
+tee /tmp/rook-operator.yaml <<EOF
+image:
+  repository: rook/ceph
+  tag: ${ROOK_RELEASE}
+  pullPolicy: IfNotPresent
+crds:
+  enabled: true
+resources:
+  limits:
+    cpu: 200m
+    memory: 256Mi
+  requests:
+    cpu: 100m
+    memory: 128Mi
+nodeSelector: {}
+tolerations: []
+unreachableNodeTolerationSeconds: 5
+currentNamespaceOnly: false
+annotations: {}
+logLevel: INFO
+rbacEnable: true
+pspEnable: false
+priorityClassName:
+allowLoopDevices: true
+csi:
+  enableRbdDriver: true
+  enableCephfsDriver: false
+  enableGrpcMetrics: false
+  enableCSIHostNetwork: true
+  enableCephfsSnapshotter: true
+  enableNFSSnapshotter: true
+  enableRBDSnapshotter: true
+  enablePluginSelinuxHostMount: false
+  enableCSIEncryption: false
+  pluginPriorityClassName: system-node-critical
+  provisionerPriorityClassName: system-cluster-critical
+  rbdFSGroupPolicy: "File"
+  cephFSFSGroupPolicy: "File"
+  nfsFSGroupPolicy: "File"
+  enableOMAPGenerator: false
+  cephFSKernelMountOptions:
+  enableMetadata: false
+  provisionerReplicas: 1
+  clusterName: ceph
+  logLevel: 0
+  sidecarLogLevel:
+  rbdPluginUpdateStrategy:
+  rbdPluginUpdateStrategyMaxUnavailable:
+  cephFSPluginUpdateStrategy:
+  nfsPluginUpdateStrategy:
+  grpcTimeoutInSeconds: 150
+  allowUnsupportedVersion: false
+  csiRBDPluginVolume:
+  csiRBDPluginVolumeMount:
+  csiCephFSPluginVolume:
+  csiCephFSPluginVolumeMount:
+  csiRBDProvisionerResource: |
+    - name : csi-provisioner
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+    - name : csi-resizer
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+    - name : csi-attacher
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+    - name : csi-snapshotter
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+    - name : csi-rbdplugin
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+    - name : csi-omap-generator
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+    - name : liveness-prometheus
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 50m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+  csiRBDPluginResource: |
+    - name : driver-registrar
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 50m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+    - name : csi-rbdplugin
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+    - name : liveness-prometheus
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 50m
+        limits:
+          memory: 256Mi
+          cpu: 100m
+  csiCephFSProvisionerResource: |
+    - name : csi-provisioner
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 200m
+    - name : csi-resizer
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 200m
+    - name : csi-attacher
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 200m
+    - name : csi-snapshotter
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 200m
+    - name : csi-cephfsplugin
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+    - name : liveness-prometheus
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 50m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+  csiCephFSPluginResource: |
+    - name : driver-registrar
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 50m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+    - name : csi-cephfsplugin
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+    - name : liveness-prometheus
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 50m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+  csiNFSProvisionerResource: |
+    - name : csi-provisioner
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 100m
+        limits:
+          memory: 128Mi
+          cpu: 200m
+    - name : csi-nfsplugin
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+    - name : csi-attacher
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+  csiNFSPluginResource: |
+    - name : driver-registrar
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 50m
+        limits:
+          memory: 128Mi
+          cpu: 100m
+    - name : csi-nfsplugin
+      resource:
+        requests:
+          memory: 128Mi
+          cpu: 250m
+        limits:
+          memory: 128Mi
+          cpu: 250m
+  provisionerTolerations:
+  provisionerNodeAffinity: #key1=value1,value2; key2=value3
+  pluginTolerations:
+  pluginNodeAffinity: # key1=value1,value2; key2=value3
+  enableLiveness: false
+  cephfsGrpcMetricsPort:
+  cephfsLivenessMetricsPort:
+  rbdGrpcMetricsPort:
+  csiAddonsPort:
+  forceCephFSKernelClient: true
+  rbdLivenessMetricsPort:
+  kubeletDirPath:
+  cephcsi:
+    image:
+  registrar:
+    image:
+  provisioner:
+    image:
+  snapshotter:
+    image:
+  attacher:
+    image:
+  resizer:
+    image:
+  imagePullPolicy: IfNotPresent
+  cephfsPodLabels: #"key1=value1,key2=value2"
+  nfsPodLabels: #"key1=value1,key2=value2"
+  rbdPodLabels: #"key1=value1,key2=value2"
+  csiAddons:
+    enabled: false
+    image: "quay.io/csiaddons/k8s-sidecar:v0.5.0"
+  nfs:
+    enabled: false
+  topology:
+    enabled: false
+    domainLabels:
+  readAffinity:
+    enabled: false
+    crushLocationLabels:
+  cephFSAttachRequired: true
+  rbdAttachRequired: true
+  nfsAttachRequired: true
+enableDiscoveryDaemon: false
+cephCommandsTimeoutSeconds: "15"
+useOperatorHostNetwork:
+discover:
+  toleration:
+  tolerationKey:
+  tolerations:
+  nodeAffinity: # key1=value1,value2; key2=value3
+  podLabels: # "key1=value1,key2=value2"
+  resources:
+disableAdmissionController: true
+hostpathRequiresPrivileged: false
+disableDeviceHotplug: false
+discoverDaemonUdev:
+imagePullSecrets:
+enableOBCWatchOperatorNamespace: true
+admissionController:
+EOF
+
+tee /tmp/rook-ceph.yaml <<EOF
+operatorNamespace: rook-ceph
+clusterName: ceph
+kubeVersion:
+configOverride: |
+  [global]
+  mon_allow_pool_delete = true
+  mon_allow_pool_size_one = true
+  osd_pool_default_size = 1
+  osd_pool_default_min_size = 1
+  mon_warn_on_pool_no_redundancy = false
+  auth_allow_insecure_global_id_reclaim = false
+toolbox:
+  enabled: true
+  tolerations: []
+  affinity: {}
+  resources:
+    limits:
+      cpu: "100m"
+      memory: "64Mi"
+    requests:
+      cpu: "100m"
+      memory: "64Mi"
+  priorityClassName:
+monitoring:
+  enabled: false
+  metricsDisabled: true
+  createPrometheusRules: false
+  rulesNamespaceOverride:
+  prometheusRule:
+    labels: {}
+    annotations: {}
+pspEnable: false
+cephClusterSpec:
+  cephVersion:
+    image: quay.io/ceph/ceph:v18.2.0
+    allowUnsupported: false
+  dataDirHostPath: /var/lib/rook
+  skipUpgradeChecks: false
+  continueUpgradeAfterChecksEvenIfNotHealthy: false
+  waitTimeoutForHealthyOSDInMinutes: 10
+  mon:
+    count: 1
+    allowMultiplePerNode: false
+  mgr:
+    count: 1
+    allowMultiplePerNode: false
+    modules:
+      - name: pg_autoscaler
+        enabled: true
+      - name: rook
+        enabled: true
+      - name: nfs
+        enabled: false
+  dashboard:
+    enabled: true
+    ssl: false
+  network:
+    connections:
+      encryption:
+        enabled: false
+      compression:
+        enabled: false
+      requireMsgr2: false
+    provider: host
+  crashCollector:
+    disable: true
+  logCollector:
+    enabled: true
+    periodicity: daily # one of: hourly, daily, weekly, monthly
+    maxLogSize: 500M # SUFFIX may be 'M' or 'G'. Must be at least 1M.
+  cleanupPolicy:
+    confirmation: ""
+    sanitizeDisks:
+      method: quick
+      dataSource: zero
+      iteration: 1
+    allowUninstallWithVolumes: false
+  monitoring:
+    enabled: false
+    metricsDisabled: true
+  resources:
+    mgr:
+      limits:
+        cpu: "250m"
+        memory: "512Mi"
+      requests:
+        cpu: "250m"
+        memory: "5Mi"
+    mon:
+      limits:
+        cpu: "250m"
+        memory: "256Mi"
+      requests:
+        cpu: "250m"
+        memory: "128Mi"
+    osd:
+      limits:
+        cpu: "500m"
+        memory: "2Gi"
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+    prepareosd:
+      requests:
+        cpu: "500m"
+        memory: "50Mi"
+    mgr-sidecar:
+      limits:
+        cpu: "200m"
+        memory: "50Mi"
+      requests:
+        cpu: "100m"
+        memory: "5Mi"
+    crashcollector:
+      limits:
+        cpu: "200m"
+        memory: "60Mi"
+      requests:
+        cpu: "100m"
+        memory: "60Mi"
+    logcollector:
+      limits:
+        cpu: "200m"
+        memory: "1Gi"
+      requests:
+        cpu: "100m"
+        memory: "100Mi"
+    cleanup:
+      limits:
+        cpu: "250m"
+        memory: "1Gi"
+      requests:
+        cpu: "250m"
+        memory: "100Mi"
+  removeOSDsIfOutAndSafeToRemove: false
+  priorityClassNames:
+    mon: system-node-critical
+    osd: system-node-critical
+    mgr: system-cluster-critical
+  storage: # cluster level storage configuration and selection
+    useAllNodes: true
+    useAllDevices: false
+    devices:
+      - name: "${CEPH_OSD_DATA_DEVICE}"
+        config:
+          databaseSizeMB: "5120"
+          walSizeMB: "2048"
+  disruptionManagement:
+    managePodBudgets: true
+    osdMaintenanceTimeout: 30
+    pgHealthCheckTimeout: 0
+  healthCheck:
+    daemonHealth:
+      mon:
+        disabled: false
+        interval: 45s
+      osd:
+        disabled: false
+        interval: 60s
+      status:
+        disabled: false
+        interval: 60s
+    livenessProbe:
+      mon:
+        disabled: false
+      mgr:
+        disabled: false
+      osd:
+        disabled: false
+ingress:
+  dashboard:
+    annotations:
+      nginx.ingress.kubernetes.io/rewrite-target: /ceph-dashboard/$2
+    host:
+      name: dashboard.example.com
+      path: "/ceph-dashboard(/|$)(.*)"
+    ingressClassName: nginx
+cephBlockPools:
+  - name: rbd
+    namespace: ceph
+    spec:
+      failureDomain: host
+      replicated:
+        size: 1
+    storageClass:
+      enabled: true
+      name: general
+      isDefault: true
+      reclaimPolicy: Delete
+      allowVolumeExpansion: true
+      volumeBindingMode: "Immediate"
+      mountOptions: []
+      allowedTopologies: []
+      parameters:
+        imageFormat: "2"
+        imageFeatures: layering
+        csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
+        csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
+        csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
+        csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
+        csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
+        csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
+        csi.storage.k8s.io/fstype: ext4
+cephFileSystems:
+  - name: cephfs
+    namespace: ceph
+    spec:
+      metadataPool:
+        replicated:
+          size: 1
+      dataPools:
+        - failureDomain: host
+          replicated:
+            size: 1
+          name: data
+      metadataServer:
+        activeCount: 1
+        activeStandby: false
+        resources:
+          limits:
+            cpu: "250m"
+            memory: "50Mi"
+          requests:
+            cpu: "250m"
+            memory: "10Mi"
+        priorityClassName: system-cluster-critical
+    storageClass:
+      enabled: true
+      isDefault: false
+      name: ceph-filesystem
+      pool: data0
+      reclaimPolicy: Delete
+      allowVolumeExpansion: true
+      volumeBindingMode: "Immediate"
+      mountOptions: []
+      parameters:
+        csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
+        csi.storage.k8s.io/provisioner-secret-namespace: "{{ .Release.Namespace }}"
+        csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
+        csi.storage.k8s.io/controller-expand-secret-namespace: "{{ .Release.Namespace }}"
+        csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
+        csi.storage.k8s.io/node-stage-secret-namespace: "{{ .Release.Namespace }}"
+        csi.storage.k8s.io/fstype: ext4
+cephBlockPoolsVolumeSnapshotClass:
+  enabled: false
+  name: general
+  isDefault: false
+  deletionPolicy: Delete
+  annotations: {}
+  labels: {}
+  parameters: {}
+cephObjectStores:
+  - name: default
+    namespace: ceph
+    spec:
+      allowUsersInNamespaces:
+        - "*"
+      metadataPool:
+        failureDomain: host
+        replicated:
+          size: 1
+      dataPool:
+        failureDomain: host
+        replicated:
+          size: 1
+      preservePoolsOnDelete: true
+      gateway:
+        port: 8080
+        resources:
+          limits:
+            cpu: "500m"
+            memory: "128Mi"
+          requests:
+            cpu: "500m"
+            memory: "32Mi"
+        instances: 1
+        priorityClassName: system-cluster-critical
+    storageClass:
+      enabled: true
+      name: ceph-bucket
+      reclaimPolicy: Delete
+      volumeBindingMode: "Immediate"
+      parameters:
+        region: us-east-1
+EOF
--- a/tools/deployment/common/rabbitmq.sh
+++ b/tools/deployment/common/rabbitmq.sh
@ -18,17 +18,18 @@ set -xe
 : ${OSH_INFRA_HELM_REPO:="../openstack-helm-infra"}
 : ${OSH_INFRA_PATH:="../openstack-helm-infra"}
 : ${OSH_INFRA_EXTRA_HELM_ARGS_RABBITMQ:="$(helm osh get-values-overrides ${DOWNLOAD_OVERRIDES:-} -p ${OSH_INFRA_PATH} -c rabbitmq ${FEATURES})"}
+: ${NAMESPACE:=openstack}

 #NOTE: Deploy command
 helm upgrade --install rabbitmq ${OSH_INFRA_HELM_REPO}/rabbitmq \
-    --namespace=openstack \
-    --set volume.enabled=false \
+    --namespace=${NAMESPACE} \
    --set pod.replicas.server=1 \
    --timeout=600s \
-    ${OSH_INFRA_EXTRA_HELM_ARGS} \
+    ${VOLUME_HELM_ARGS:="--set volume.enabled=false"} \
+    ${OSH_INFRA_EXTRA_HELM_ARGS:=} \
    ${OSH_INFRA_EXTRA_HELM_ARGS_RABBITMQ}

 #NOTE: Wait for deploy
-helm osh wait-for-pods openstack
+helm osh wait-for-pods ${NAMESPACE}

 helm test rabbitmq --namespace openstack
--- a/tools/deployment/db/mariadb.sh
+++ b/tools/deployment/db/mariadb.sh
@ -23,12 +23,12 @@ set -xe
 #NOTE: Deploy command
 helm upgrade --install mariadb ./mariadb \
    --namespace=${NAMESPACE} \
-    --set monitoring.prometheus.enabled=true \
-    ${OSH_INFRA_EXTRA_HELM_ARGS} \
+    ${MONITORING_HELM_ARGS:="--set monitoring.prometheus.enabled=true"} \
+    ${OSH_INFRA_EXTRA_HELM_ARGS:=} \
    ${OSH_INFRA_EXTRA_HELM_ARGS_MARIADB}

 #NOTE: Wait for deploy
-helm osh wait-for-pods osh-infra
+helm osh wait-for-pods ${NAMESPACE}

 if [ "x${RUN_HELM_TESTS}" != "xno" ]; then
    # Delete the test pod if it still exists
--- a/zuul.d/jobs.yaml
+++ b/zuul.d/jobs.yaml
@ -339,4 +339,50 @@
      - ^memcached/.*
      - ^libvrit/.*
      - ^openvswitch/.*
+
+- job:
+    name: openstack-helm-infra-ceph-migrate
+    description: |
+      This job is for testing the migration procedure from
+      a Ceph cluster managed by legacy OSH ceph* charts
+      to a Ceph cluster managed by Rook-Ceph operator.
+    parent: openstack-helm-infra-deploy
+    nodeset: openstack-helm-3nodes-ubuntu_jammy
+    timeout: 10800
+    pre-run:
+      - playbooks/prepare-hosts.yaml
+      - playbooks/mount-volumes.yaml
+      - playbooks/inject-keys.yaml
+    files:
+      - ^helm-toolkit/.*
+      - ^roles/.*
+      - ^ceph.*
+      - ^tools/deployment/ceph/.*
+    vars:
+      osh_params:
+        openstack_release: "2024.1"
+        container_distro_name: ubuntu
+        container_distro_version: jammy
+      gate_scripts:
+        - ./tools/deployment/common/prepare-k8s.sh
+        - ./tools/deployment/common/prepare-charts.sh
+        - ./tools/deployment/common/ingress.sh
+        # Deploy Ceph cluster using legacy OSH charts
+        - ./tools/deployment/ceph/ceph_legacy.sh
+        # Deploy stateful applications
+        - |
+          export NAMESPACE=openstack
+          export MONITORING_HELM_ARGS=" "
+          export OSH_INFRA_EXTRA_HELM_ARGS="--set pod.replicas.server=1 ${OSH_INFRA_EXTRA_HELM_ARGS}"
+          export RUN_HELM_TESTS=no
+          ./tools/deployment/db/mariadb.sh
+        - |
+          export NAMESPACE=openstack
+          export VOLUME_HELM_ARGS=" "
+          ./tools/deployment/common/rabbitmq.sh
+        # Migrate legacy Ceph to Rook
+        - ./tools/deployment/ceph/migrate-before.sh
+        - ./tools/deployment/ceph/migrate-values.sh
+        - ./tools/deployment/ceph/migrate-to-rook-ceph.sh
+        - ./tools/deployment/ceph/migrate-after.sh
 ...
--- a/zuul.d/project.yaml
+++ b/zuul.d/project.yaml
@ -33,6 +33,7 @@
        - openstack-helm-infra-tls-2024-1-ubuntu_jammy
        - openstack-helm-infra-compute-kit-dpdk-2023-2-ubuntu_jammy
        - openstack-helm-infra-keystone-cilium-2024-1-ubuntu_jammy
+        - openstack-helm-infra-ceph-migrate
    gate:
      jobs:
        - openstack-helm-lint