[ceph-osd] Retry to create crush map for osd

while ceph-mon service down for a while.

This is update the logic to retry creating crush map for a osd if ceph-mon
service is down for a while.

Change-Id: Idffb189f0749a68a348cc0451daca5dec6796716
This commit is contained in:
Chinasubbareddy Mallavarapu 2019-09-26 13:07:35 -05:00
parent 5c97d5bc2a
commit eee6b51cb3
1 changed files with 14 additions and 6 deletions

View File

@ -61,10 +61,18 @@ function is_available {
command -v $@ &>/dev/null
}
function ceph_cmd_retry() {
cnt=0
until "ceph" "$@" || [ $cnt -ge 6 ]; do
sleep 10
((cnt++))
done
}
function crush_create_or_move {
local crush_location=${1}
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location} || true
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location}
}
function crush_add_and_move {
@ -72,15 +80,15 @@ function crush_add_and_move {
local crush_failure_domain_name=${2}
local crush_location=$(echo "root=default ${crush_failure_domain_type}=${crush_failure_domain_name} host=${HOSTNAME}")
crush_create_or_move "${crush_location}"
local crush_failure_domain_location_check=$(ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}')
local crush_failure_domain_location_check=$(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}')
if [ "x${crush_failure_domain_location_check}" != "x${crush_failure_domain_name}" ]; then
# NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations
# as create-or-move may not appropiately move them.
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush add-bucket "${crush_failure_domain_name}" "${crush_failure_domain_type}" || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${crush_failure_domain_name}" root=default || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${HOSTNAME}" "${crush_failure_domain_type}=${crush_failure_domain_name}" || true
fi
}