[rook-ceph] Add a script to migrate Ceph clusters to Rook

This change adds a deployment script that can be used to migrate a Ceph cluster deployed with the legacy openstack-helm-infra Ceph charts to Rook. This process is disruptive. The Ceph cluster goes down and comes back up multiple times during the migration, but the end result is a Rook-deployed Ceph cluster with the original cluster FSID and all OSD data intact. Change-Id: Ied8ff94f25cd792a9be9f889bb6fdabc45a57f2e
2024-03-20 12:08:42 -06:00 · 2024-03-20 12:08:42 -06:00 · 1e84d3f714
commit 1e84d3f714
parent cbbeebb5a1
1 changed files with 253 additions and 0 deletions
--- a/tools/deployment/ceph/migrate-to-rook-ceph.sh
+++ b/tools/deployment/ceph/migrate-to-rook-ceph.sh
@ -0,0 +1,253 @@
+#!/bin/bash
+
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -x
+
+# These variables can be set prior to running the script to deploy a specific
+# Ceph release using a specific Rook release. The namespaces for the Rook
+# operator and the Ceph cluster may also be set, along with the YAML definition
+# files that should be used for the Rook operator and Ceph cluster Helm charts.
+# The default values deploy the Rook operator in the rook-ceph namespace and
+# the Ceph cluster in the ceph namespace using rook-operator.yaml and
+# rook-ceph.yaml in the current directory.
+ROOK_RELEASE=${ROOK_RELEASE:-1.13.7}
+CEPH_RELEASE=${CEPH_RELEASE:-18.2.2}
+ROOK_CEPH_NAMESPACE=${ROOK_CEPH_NAMESPACE:-rook-ceph}
+CEPH_NAMESPACE=${CEPH_NAMESPCE:-ceph}
+ROOK_OPERATOR_YAML=${ROOK_OPERATOR_YAML:-rook-operator.yaml}
+ROOK_CEPH_YAML=${ROOK_CEPH_YAML:-rook-ceph.yaml}
+
+# Return a list of unique status strings for pods for a specified application
+# (Pods with the same status will return a single status)
+function app_status() {
+  kubectl -n ${CEPH_NAMESPACE} get pods -l app=${1} -o json | jq -r '.items[].status.phase' | sort | uniq
+}
+
+# Function to wait for the initial Rook Ceph deployment to complete
+function wait_for_initial_rook_deployment() {
+  set +x
+  echo "Waiting for initial Rook Ceph cluster deployment..."
+
+  # The initial deployment can't deploy OSDs or RGW
+  while [[ "$(app_status rook-ceph-mon)" != "Running" || \
+           "$(app_status rook-ceph-mgr)" != "Running" || \
+           "$(app_status rook-ceph-mds)" != "Running" || \
+           "$(app_status rook-ceph-tools)" != "Running" || \
+           "$(app_status rook-ceph-exporter)" != "Running" || \
+           "$(app_status rook-ceph-osd-prepare)" != "Succeeded" ]]
+  do
+    sleep 5
+  done
+  set -x
+}
+
+# Function to wait for a full cluster deployment
+function wait_for_full_rook_deployment() {
+  set +x
+  echo "Waiting for full Rook Ceph cluster deployment..."
+
+  # Look for everything from the initial deployment plus OSDs and RGW
+  while [[ "$(app_status rook-ceph-mon)" != "Running" || \
+           "$(app_status rook-ceph-mgr)" != "Running" || \
+           "$(app_status rook-ceph-mds)" != "Running" || \
+           "$(app_status rook-ceph-tools)" != "Running" || \
+           "$(app_status rook-ceph-exporter)" != "Running" || \
+           "$(app_status rook-ceph-osd-prepare)" != "Succeeded" || \
+           "$(app_status rook-ceph-osd)" != "Running" || \
+           "$(app_status rook-ceph-rgw)" != "Running" ]]
+  do
+    sleep 5
+  done
+  set -x
+}
+
+# Function to wait for all pods except rook-ceph-tools to terminate
+function wait_for_terminate() {
+  set +x
+  echo "Waiting for pods to terminate..."
+
+  while [[ $(kubectl -n ${CEPH_NAMESPACE} get pods | grep -c "Running") -gt 1 ]]
+  do
+    sleep 5
+  done
+  set -x
+}
+
+# Function to wait for Ceph to reach a HEALTH_OK state
+function wait_for_health_checks() {
+  CEPH_NAMESPACE=${1}
+  CLIENT_POD=${2}
+  set +x
+  echo "Waiting for the Ceph cluster to reach HEALTH_OK with all of the expectd resources..."
+
+  # Time out each loop after ~15 minutes
+  for retry in {0..180}
+  do
+    if [[ $(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph mon stat -f json | jq -r '.quorum[].name' | wc -l) -eq ${MON_COUNT} &&
+          $(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph mgr count-metadata name | jq '.unknown') -eq ${MGR_COUNT} &&
+          $(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph osd stat -f json | jq '.num_up_osds') -eq ${OSD_COUNT} ]]
+    then
+      break
+    fi
+    sleep 5
+  done
+
+  for retry in {0..180}
+  do
+    if [[ "$(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph health)" == "HEALTH_OK" ]]
+    then
+      break
+    fi
+    sleep 5
+  done
+
+  kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph status
+  set -x
+}
+
+# Save a legacy ceph-mon host and the existing cluster FSID for later
+export MON_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq -r '.items[0].metadata.name')
+export FSID=$(kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph fsid)
+export OLD_MON_HOST=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq -r '.items[0].spec.nodeName')
+export OLD_MON_HOST_IP=$(kubectl get nodes -o json | jq -r '.items[] | select(.metadata.name == env.OLD_MON_HOST) | .status.addresses | .[] | select(.type == "InternalIP") | .address')
+export MON_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq '.items | length')
+export MGR_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mgr -o json | jq '.items | length')
+export OSD_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=osd -o json | jq '.items | length')
+
+# Rename CephFS pools to match the expected names for Rook CephFS
+FS_SPEC="$(kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph fs ls -f json 2> /dev/null)"
+for fs in $(echo $FS_SPEC | jq -r '.[].name')
+do
+  EXPECTED_METADATA_POOL="${fs}-metadata"
+  METADATA_POOL=$(echo ${FS_SPEC} | jq -r ".[] | select(.name==\"${fs}\") | .metadata_pool")
+
+  if [[ "${METADATA_POOL}" != "${EXPECTED_METADATA_POOL}" ]]
+  then
+    kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph osd pool rename ${METADATA_POOL} ${EXPECTED_METADATA_POOL}
+  fi
+
+  EXPECTED_DATA_POOL="${fs}-data"
+  # NOTE: Only one data pool must have the expected name. Only the first one is
+  # checked here. If it is renamed and another pool with the same name already
+  # exists, the rename will fail and there is no further action needed.
+  DATA_POOL=$(echo ${FS_SPEC} | jq -r ".[] | select(.name==\"${fs}\") | .data_pools[0]")
+
+  if [[ "${DATA_POOL}" != "${EXPECTED_DATA_POOL}" ]]
+  then
+    kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph osd pool rename ${DATA_POOL} ${EXPECTED_DATA_POOL}
+  fi
+done
+
+# Destroy resources in the Ceph namespace, delete Helm charts, and remove Ceph-related node labels
+for resource in cj deploy ds service job
+do
+  kubectl -n ${CEPH_NAMESPACE} get ${resource} -o json | jq -r '.items[].metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete ${resource}
+done
+helm -n ${CEPH_NAMESPACE} delete ceph-provisioners
+helm -n ${CEPH_NAMESPACE} delete ceph-client
+helm -n ${CEPH_NAMESPACE} delete ceph-mon
+helm -n ${CEPH_NAMESPACE} delete ceph-osd
+for node in $(kubectl get nodes -o json | jq -r '.items[].metadata.name' | xargs)
+do
+  kubectl label node ${node} ceph-mds- ceph-mgr- ceph-mon- ceph-osd- ceph-rgw-
+done
+
+# Use rook-helm to deploy a new Ceph cluster
+helm repo add rook-release https://charts.rook.io/release
+helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph --version ${ROOK_RELEASE} -f ${ROOK_OPERATOR_YAML}
+helm upgrade --install --create-namespace --namespace ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster --version ${ROOK_RELEASE} -f ${ROOK_CEPH_YAML}
+wait_for_initial_rook_deployment
+
+# Retrieve the keyring from the new mon pod and save its host for further work
+export MON_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-mon -o json | jq -r '.items[0].metadata.name')
+kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- cat /etc/ceph/keyring-store/keyring > /tmp/mon-a.keyring
+export MON_HOST=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-mon -o json | jq -r '.items[0].spec.nodeName')
+export MON_HOST_IP=$(kubectl get nodes -o json | jq -r '.items[] | select(.metadata.name == env.MON_HOST) | .status.addresses | .[] | select(.type == "InternalIP") | .address')
+
+# Shut down the Rook operator, delete the rook-ceph deployments, and get the new rook-ceph-mon IP address
+kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=0
+kubectl -n ${CEPH_NAMESPACE} get deploy -o json | jq -r '.items[] | select(.metadata.name != "rook-ceph-tools") | .metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete deploy
+MON_IP=$(kubectl -n ${CEPH_NAMESPACE} get service rook-ceph-mon-a -o json | jq -r '.spec.clusterIP')
+wait_for_terminate
+
+# Download the old mon store and update its key to the new one
+ssh ${MON_HOST_IP} "sudo rm -rf /var/lib/rook/mon-a/data"
+ssh ${OLD_MON_HOST_IP} "sudo chmod -R a+rX /var/lib/openstack-helm/ceph/mon/mon/ceph-${OLD_MON_HOST}"
+scp -rp ${OLD_MON_HOST_IP}:/var/lib/openstack-helm/ceph/mon/mon/ceph-${OLD_MON_HOST} /tmp
+mv /tmp/ceph-${OLD_MON_HOST} /tmp/mon-a
+grep -A2 "\[mon\.\]" /tmp/mon-a.keyring > /tmp/mon-a/keyring
+
+# Generate a script to rewrite the monmap in the old mon store
+cat > /tmp/mon-a/fix-monmap.sh <<EOF
+#!/bin/bash
+touch /etc/ceph/ceph.conf
+cd /var/lib/rook
+ceph-mon --extract-monmap monmap --mon-data mon-a/data
+monmaptool --print monmap | awk '/mon\./{print \$3}' | cut -d. -f2 | xargs -I{} monmaptool --rm {} monmap
+monmaptool --addv a [v2:$(echo ${MON_IP}):3300,v1:$(echo ${MON_IP}):6789] monmap
+ceph-mon --inject-monmap monmap --mon-data mon-a/data
+rm monmap
+rm mon-a/data/fix-monmap.sh
+EOF
+chmod +x /tmp/mon-a/fix-monmap.sh
+
+# Upload the mon store and script to the new mon host and run the script
+scp -rp /tmp/mon-a ${MON_HOST_IP}:/tmp
+ssh ${MON_HOST_IP} "sudo mv /tmp/mon-a /var/lib/rook/mon-a"
+ssh ${MON_HOST_IP} "sudo mv /var/lib/rook/mon-a/mon-a /var/lib/rook/mon-a/data"
+ssh ${MON_HOST_IP} "docker run --rm -v /var/lib/rook:/var/lib/rook quay.io/ceph/ceph:v${CEPH_RELEASE} /var/lib/rook/mon-a/data/fix-monmap.sh"
+
+# Write the old cluster FSID to the rook-ceph-mon secret, disable authentication, and revive the Rook operator
+kubectl -n ${CEPH_NAMESPACE} get secret rook-ceph-mon -o json | jq --arg fsid "$(echo -n ${FSID} | base64)" '.data.fsid = $fsid' | kubectl apply -f -
+kubectl -n ${CEPH_NAMESPACE} get cm rook-config-override -o yaml | \
+sed '/\[global\]/a \ \ \ \ auth_supported = none' | \
+sed '/\[global\]/a \ \ \ \ auth_client_required = none' | \
+sed '/\[global\]/a \ \ \ \ auth_service_required = none' | \
+sed '/\[global\]/a \ \ \ \ auth_cluster_required = none' | \
+kubectl apply -f -
+kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=1
+wait_for_full_rook_deployment
+
+# Write the new mon key to the rook-ceph-tools pod and import it for authentication
+TOOLS_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-tools -o json | jq -r '.items[0].metadata.name')
+CLIENT_KEY=$(grep -A1 "\[client\.admin\]" /tmp/mon-a.keyring | awk '/key/{print $3}')
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e '[client.admin]' > /tmp/keyring"
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e \"        key = ${CLIENT_KEY}\" >> /tmp/keyring"
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e '        caps mds = \"allow *\"' >> /tmp/keyring"
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e '        caps mon = \"allow *\"' >> /tmp/keyring"
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e '        caps osd = \"allow *\"' >> /tmp/keyring"
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e '        caps mgr = \"allow *\"' >> /tmp/keyring"
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- ceph auth import -i /tmp/keyring
+kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- rm /tmp/keyring
+
+# Remove the auth config options to re-enable authentication
+kubectl -n ${CEPH_NAMESPACE} get cm rook-config-override -o yaml | \
+sed '/    auth_cluster_required = none/d' | \
+sed '/    auth_service_required = none/d' | \
+sed '/    auth_client_required = none/d' | \
+sed '/    auth_supported = none/d' | \
+kubectl apply -f -
+
+# Restart the Rook operator and Ceph cluster with the new config
+kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=0
+kubectl -n ${CEPH_NAMESPACE} get deploy -o json | jq -r '.items[] | select(.metadata.name != "rook-ceph-tools") | .metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete deploy
+wait_for_terminate
+kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=1
+wait_for_full_rook_deployment
+
+# Scale the mon and mgr deployments to original replica counts
+kubectl -n ${CEPH_NAMESPACE} get cephcluster ceph -o json | \
+jq ".spec.mon.count = ${MON_COUNT} | .spec.mgr.count = ${MGR_COUNT}" | \
+kubectl apply -f -
+wait_for_health_checks ${CEPH_NAMESPACE} ${TOOLS_POD}