[rook-ceph] Add a script to migrate Ceph clusters to Rook
This change adds a deployment script that can be used to migrate a Ceph cluster deployed with the legacy openstack-helm-infra Ceph charts to Rook. This process is disruptive. The Ceph cluster goes down and comes back up multiple times during the migration, but the end result is a Rook-deployed Ceph cluster with the original cluster FSID and all OSD data intact. Change-Id: Ied8ff94f25cd792a9be9f889bb6fdabc45a57f2e
This commit is contained in:
parent
cbbeebb5a1
commit
1e84d3f714
253
tools/deployment/ceph/migrate-to-rook-ceph.sh
Executable file
253
tools/deployment/ceph/migrate-to-rook-ceph.sh
Executable file
@ -0,0 +1,253 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
set -x
|
||||
|
||||
# These variables can be set prior to running the script to deploy a specific
|
||||
# Ceph release using a specific Rook release. The namespaces for the Rook
|
||||
# operator and the Ceph cluster may also be set, along with the YAML definition
|
||||
# files that should be used for the Rook operator and Ceph cluster Helm charts.
|
||||
# The default values deploy the Rook operator in the rook-ceph namespace and
|
||||
# the Ceph cluster in the ceph namespace using rook-operator.yaml and
|
||||
# rook-ceph.yaml in the current directory.
|
||||
ROOK_RELEASE=${ROOK_RELEASE:-1.13.7}
|
||||
CEPH_RELEASE=${CEPH_RELEASE:-18.2.2}
|
||||
ROOK_CEPH_NAMESPACE=${ROOK_CEPH_NAMESPACE:-rook-ceph}
|
||||
CEPH_NAMESPACE=${CEPH_NAMESPCE:-ceph}
|
||||
ROOK_OPERATOR_YAML=${ROOK_OPERATOR_YAML:-rook-operator.yaml}
|
||||
ROOK_CEPH_YAML=${ROOK_CEPH_YAML:-rook-ceph.yaml}
|
||||
|
||||
# Return a list of unique status strings for pods for a specified application
|
||||
# (Pods with the same status will return a single status)
|
||||
function app_status() {
|
||||
kubectl -n ${CEPH_NAMESPACE} get pods -l app=${1} -o json | jq -r '.items[].status.phase' | sort | uniq
|
||||
}
|
||||
|
||||
# Function to wait for the initial Rook Ceph deployment to complete
|
||||
function wait_for_initial_rook_deployment() {
|
||||
set +x
|
||||
echo "Waiting for initial Rook Ceph cluster deployment..."
|
||||
|
||||
# The initial deployment can't deploy OSDs or RGW
|
||||
while [[ "$(app_status rook-ceph-mon)" != "Running" || \
|
||||
"$(app_status rook-ceph-mgr)" != "Running" || \
|
||||
"$(app_status rook-ceph-mds)" != "Running" || \
|
||||
"$(app_status rook-ceph-tools)" != "Running" || \
|
||||
"$(app_status rook-ceph-exporter)" != "Running" || \
|
||||
"$(app_status rook-ceph-osd-prepare)" != "Succeeded" ]]
|
||||
do
|
||||
sleep 5
|
||||
done
|
||||
set -x
|
||||
}
|
||||
|
||||
# Function to wait for a full cluster deployment
|
||||
function wait_for_full_rook_deployment() {
|
||||
set +x
|
||||
echo "Waiting for full Rook Ceph cluster deployment..."
|
||||
|
||||
# Look for everything from the initial deployment plus OSDs and RGW
|
||||
while [[ "$(app_status rook-ceph-mon)" != "Running" || \
|
||||
"$(app_status rook-ceph-mgr)" != "Running" || \
|
||||
"$(app_status rook-ceph-mds)" != "Running" || \
|
||||
"$(app_status rook-ceph-tools)" != "Running" || \
|
||||
"$(app_status rook-ceph-exporter)" != "Running" || \
|
||||
"$(app_status rook-ceph-osd-prepare)" != "Succeeded" || \
|
||||
"$(app_status rook-ceph-osd)" != "Running" || \
|
||||
"$(app_status rook-ceph-rgw)" != "Running" ]]
|
||||
do
|
||||
sleep 5
|
||||
done
|
||||
set -x
|
||||
}
|
||||
|
||||
# Function to wait for all pods except rook-ceph-tools to terminate
|
||||
function wait_for_terminate() {
|
||||
set +x
|
||||
echo "Waiting for pods to terminate..."
|
||||
|
||||
while [[ $(kubectl -n ${CEPH_NAMESPACE} get pods | grep -c "Running") -gt 1 ]]
|
||||
do
|
||||
sleep 5
|
||||
done
|
||||
set -x
|
||||
}
|
||||
|
||||
# Function to wait for Ceph to reach a HEALTH_OK state
|
||||
function wait_for_health_checks() {
|
||||
CEPH_NAMESPACE=${1}
|
||||
CLIENT_POD=${2}
|
||||
set +x
|
||||
echo "Waiting for the Ceph cluster to reach HEALTH_OK with all of the expectd resources..."
|
||||
|
||||
# Time out each loop after ~15 minutes
|
||||
for retry in {0..180}
|
||||
do
|
||||
if [[ $(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph mon stat -f json | jq -r '.quorum[].name' | wc -l) -eq ${MON_COUNT} &&
|
||||
$(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph mgr count-metadata name | jq '.unknown') -eq ${MGR_COUNT} &&
|
||||
$(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph osd stat -f json | jq '.num_up_osds') -eq ${OSD_COUNT} ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
for retry in {0..180}
|
||||
do
|
||||
if [[ "$(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph health)" == "HEALTH_OK" ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph status
|
||||
set -x
|
||||
}
|
||||
|
||||
# Save a legacy ceph-mon host and the existing cluster FSID for later
|
||||
export MON_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq -r '.items[0].metadata.name')
|
||||
export FSID=$(kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph fsid)
|
||||
export OLD_MON_HOST=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq -r '.items[0].spec.nodeName')
|
||||
export OLD_MON_HOST_IP=$(kubectl get nodes -o json | jq -r '.items[] | select(.metadata.name == env.OLD_MON_HOST) | .status.addresses | .[] | select(.type == "InternalIP") | .address')
|
||||
export MON_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq '.items | length')
|
||||
export MGR_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mgr -o json | jq '.items | length')
|
||||
export OSD_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=osd -o json | jq '.items | length')
|
||||
|
||||
# Rename CephFS pools to match the expected names for Rook CephFS
|
||||
FS_SPEC="$(kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph fs ls -f json 2> /dev/null)"
|
||||
for fs in $(echo $FS_SPEC | jq -r '.[].name')
|
||||
do
|
||||
EXPECTED_METADATA_POOL="${fs}-metadata"
|
||||
METADATA_POOL=$(echo ${FS_SPEC} | jq -r ".[] | select(.name==\"${fs}\") | .metadata_pool")
|
||||
|
||||
if [[ "${METADATA_POOL}" != "${EXPECTED_METADATA_POOL}" ]]
|
||||
then
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph osd pool rename ${METADATA_POOL} ${EXPECTED_METADATA_POOL}
|
||||
fi
|
||||
|
||||
EXPECTED_DATA_POOL="${fs}-data"
|
||||
# NOTE: Only one data pool must have the expected name. Only the first one is
|
||||
# checked here. If it is renamed and another pool with the same name already
|
||||
# exists, the rename will fail and there is no further action needed.
|
||||
DATA_POOL=$(echo ${FS_SPEC} | jq -r ".[] | select(.name==\"${fs}\") | .data_pools[0]")
|
||||
|
||||
if [[ "${DATA_POOL}" != "${EXPECTED_DATA_POOL}" ]]
|
||||
then
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph osd pool rename ${DATA_POOL} ${EXPECTED_DATA_POOL}
|
||||
fi
|
||||
done
|
||||
|
||||
# Destroy resources in the Ceph namespace, delete Helm charts, and remove Ceph-related node labels
|
||||
for resource in cj deploy ds service job
|
||||
do
|
||||
kubectl -n ${CEPH_NAMESPACE} get ${resource} -o json | jq -r '.items[].metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete ${resource}
|
||||
done
|
||||
helm -n ${CEPH_NAMESPACE} delete ceph-provisioners
|
||||
helm -n ${CEPH_NAMESPACE} delete ceph-client
|
||||
helm -n ${CEPH_NAMESPACE} delete ceph-mon
|
||||
helm -n ${CEPH_NAMESPACE} delete ceph-osd
|
||||
for node in $(kubectl get nodes -o json | jq -r '.items[].metadata.name' | xargs)
|
||||
do
|
||||
kubectl label node ${node} ceph-mds- ceph-mgr- ceph-mon- ceph-osd- ceph-rgw-
|
||||
done
|
||||
|
||||
# Use rook-helm to deploy a new Ceph cluster
|
||||
helm repo add rook-release https://charts.rook.io/release
|
||||
helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph --version ${ROOK_RELEASE} -f ${ROOK_OPERATOR_YAML}
|
||||
helm upgrade --install --create-namespace --namespace ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster --version ${ROOK_RELEASE} -f ${ROOK_CEPH_YAML}
|
||||
wait_for_initial_rook_deployment
|
||||
|
||||
# Retrieve the keyring from the new mon pod and save its host for further work
|
||||
export MON_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-mon -o json | jq -r '.items[0].metadata.name')
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- cat /etc/ceph/keyring-store/keyring > /tmp/mon-a.keyring
|
||||
export MON_HOST=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-mon -o json | jq -r '.items[0].spec.nodeName')
|
||||
export MON_HOST_IP=$(kubectl get nodes -o json | jq -r '.items[] | select(.metadata.name == env.MON_HOST) | .status.addresses | .[] | select(.type == "InternalIP") | .address')
|
||||
|
||||
# Shut down the Rook operator, delete the rook-ceph deployments, and get the new rook-ceph-mon IP address
|
||||
kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=0
|
||||
kubectl -n ${CEPH_NAMESPACE} get deploy -o json | jq -r '.items[] | select(.metadata.name != "rook-ceph-tools") | .metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete deploy
|
||||
MON_IP=$(kubectl -n ${CEPH_NAMESPACE} get service rook-ceph-mon-a -o json | jq -r '.spec.clusterIP')
|
||||
wait_for_terminate
|
||||
|
||||
# Download the old mon store and update its key to the new one
|
||||
ssh ${MON_HOST_IP} "sudo rm -rf /var/lib/rook/mon-a/data"
|
||||
ssh ${OLD_MON_HOST_IP} "sudo chmod -R a+rX /var/lib/openstack-helm/ceph/mon/mon/ceph-${OLD_MON_HOST}"
|
||||
scp -rp ${OLD_MON_HOST_IP}:/var/lib/openstack-helm/ceph/mon/mon/ceph-${OLD_MON_HOST} /tmp
|
||||
mv /tmp/ceph-${OLD_MON_HOST} /tmp/mon-a
|
||||
grep -A2 "\[mon\.\]" /tmp/mon-a.keyring > /tmp/mon-a/keyring
|
||||
|
||||
# Generate a script to rewrite the monmap in the old mon store
|
||||
cat > /tmp/mon-a/fix-monmap.sh <<EOF
|
||||
#!/bin/bash
|
||||
touch /etc/ceph/ceph.conf
|
||||
cd /var/lib/rook
|
||||
ceph-mon --extract-monmap monmap --mon-data mon-a/data
|
||||
monmaptool --print monmap | awk '/mon\./{print \$3}' | cut -d. -f2 | xargs -I{} monmaptool --rm {} monmap
|
||||
monmaptool --addv a [v2:$(echo ${MON_IP}):3300,v1:$(echo ${MON_IP}):6789] monmap
|
||||
ceph-mon --inject-monmap monmap --mon-data mon-a/data
|
||||
rm monmap
|
||||
rm mon-a/data/fix-monmap.sh
|
||||
EOF
|
||||
chmod +x /tmp/mon-a/fix-monmap.sh
|
||||
|
||||
# Upload the mon store and script to the new mon host and run the script
|
||||
scp -rp /tmp/mon-a ${MON_HOST_IP}:/tmp
|
||||
ssh ${MON_HOST_IP} "sudo mv /tmp/mon-a /var/lib/rook/mon-a"
|
||||
ssh ${MON_HOST_IP} "sudo mv /var/lib/rook/mon-a/mon-a /var/lib/rook/mon-a/data"
|
||||
ssh ${MON_HOST_IP} "docker run --rm -v /var/lib/rook:/var/lib/rook quay.io/ceph/ceph:v${CEPH_RELEASE} /var/lib/rook/mon-a/data/fix-monmap.sh"
|
||||
|
||||
# Write the old cluster FSID to the rook-ceph-mon secret, disable authentication, and revive the Rook operator
|
||||
kubectl -n ${CEPH_NAMESPACE} get secret rook-ceph-mon -o json | jq --arg fsid "$(echo -n ${FSID} | base64)" '.data.fsid = $fsid' | kubectl apply -f -
|
||||
kubectl -n ${CEPH_NAMESPACE} get cm rook-config-override -o yaml | \
|
||||
sed '/\[global\]/a \ \ \ \ auth_supported = none' | \
|
||||
sed '/\[global\]/a \ \ \ \ auth_client_required = none' | \
|
||||
sed '/\[global\]/a \ \ \ \ auth_service_required = none' | \
|
||||
sed '/\[global\]/a \ \ \ \ auth_cluster_required = none' | \
|
||||
kubectl apply -f -
|
||||
kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=1
|
||||
wait_for_full_rook_deployment
|
||||
|
||||
# Write the new mon key to the rook-ceph-tools pod and import it for authentication
|
||||
TOOLS_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-tools -o json | jq -r '.items[0].metadata.name')
|
||||
CLIENT_KEY=$(grep -A1 "\[client\.admin\]" /tmp/mon-a.keyring | awk '/key/{print $3}')
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e '[client.admin]' > /tmp/keyring"
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e \" key = ${CLIENT_KEY}\" >> /tmp/keyring"
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps mds = \"allow *\"' >> /tmp/keyring"
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps mon = \"allow *\"' >> /tmp/keyring"
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps osd = \"allow *\"' >> /tmp/keyring"
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps mgr = \"allow *\"' >> /tmp/keyring"
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- ceph auth import -i /tmp/keyring
|
||||
kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- rm /tmp/keyring
|
||||
|
||||
# Remove the auth config options to re-enable authentication
|
||||
kubectl -n ${CEPH_NAMESPACE} get cm rook-config-override -o yaml | \
|
||||
sed '/ auth_cluster_required = none/d' | \
|
||||
sed '/ auth_service_required = none/d' | \
|
||||
sed '/ auth_client_required = none/d' | \
|
||||
sed '/ auth_supported = none/d' | \
|
||||
kubectl apply -f -
|
||||
|
||||
# Restart the Rook operator and Ceph cluster with the new config
|
||||
kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=0
|
||||
kubectl -n ${CEPH_NAMESPACE} get deploy -o json | jq -r '.items[] | select(.metadata.name != "rook-ceph-tools") | .metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete deploy
|
||||
wait_for_terminate
|
||||
kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=1
|
||||
wait_for_full_rook_deployment
|
||||
|
||||
# Scale the mon and mgr deployments to original replica counts
|
||||
kubectl -n ${CEPH_NAMESPACE} get cephcluster ceph -o json | \
|
||||
jq ".spec.mon.count = ${MON_COUNT} | .spec.mgr.count = ${MGR_COUNT}" | \
|
||||
kubectl apply -f -
|
||||
wait_for_health_checks ${CEPH_NAMESPACE} ${TOOLS_POD}
|
Loading…
Reference in New Issue
Block a user