Merge "[CEPH] Fix race conditions with OSD POD initialization"

This commit is contained in:
Zuul 2018-12-24 22:48:53 +00:00 committed by Gerrit Code Review
commit 5cca3e74d4
2 changed files with 43 additions and 32 deletions

View File

@ -50,6 +50,13 @@ else
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
fi fi
function udev_settle {
partprobe "${OSD_DEVICE}"
# watch the udev event queue, and exit if all current events are handled
udevadm settle --timeout=600
}
# Calculate proper device names, given a device and partition number # Calculate proper device names, given a device and partition number
function dev_part { function dev_part {
local OSD_DEVICE=${1} local OSD_DEVICE=${1}
@ -121,46 +128,41 @@ function osd_disk_prepare {
fi fi
fi fi
udev_settle
# then search for some ceph metadata on the disk # then search for some ceph metadata on the disk
if [[ "$(parted --script ${OSD_DEVICE} print | egrep '^ 1.*ceph data')" ]]; then if [[ "$(parted --script ${OSD_DEVICE} print | egrep '^ 1.*ceph data')" ]]; then
if [[ ${OSD_FORCE_ZAP} -eq 1 ]]; then if [[ ${OSD_FORCE_ZAP} -eq 1 ]]; then
if [ -b "${OSD_DEVICE}1" ]; then if [ -b "${OSD_DEVICE}1" ]; then
local fs=`lsblk -fn ${OSD_DEVICE}1` local cephFSID=`ceph-conf --lookup fsid`
if [ ! -z "${fs}" ]; then if [ ! -z "${cephFSID}" ]; then
local cephFSID=`ceph-conf --lookup fsid` local tmpmnt=`mktemp -d`
if [ ! -z "${cephFSID}" ]; then mount ${OSD_DEVICE}1 ${tmpmnt}
local tmpmnt=`mktemp -d` if [ -f "${tmpmnt}/ceph_fsid" ]; then
mount ${OSD_DEVICE}1 ${tmpmnt} osdFSID=`cat "${tmpmnt}/ceph_fsid"`
if [ -f "${tmpmnt}/ceph_fsid" ]; then umount ${tmpmnt}
osdFSID=`cat "${tmpmnt}/ceph_fsid"` if [ ${osdFSID} != ${cephFSID} ]; then
umount ${tmpmnt} echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
if [ ${osdFSID} != ${cephFSID} ]; then echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
echo "Because OSD_FORCE_ZAP was set, we will zap this device."
ceph-disk -v zap ${OSD_DEVICE}
else
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_ZAP is set, but will be ignored and the device will not be zapped."
echo "Moving on, trying to activate the OSD now."
return
fi
else
umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "Because OSD_FORCE_ZAP was set, we will zap this device." echo "Because OSD_FORCE_ZAP was set, we will zap this device."
ceph-disk -v zap ${OSD_DEVICE} ceph-disk -v zap ${OSD_DEVICE}
else
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_ZAP is set, but will be ignored and the device will not be zapped."
echo "Moving on, trying to activate the OSD now."
return
fi fi
else else
echo "Unable to determine the FSID of the current cluster." umount ${tmpmnt}
echo "OSD_FORCE_ZAP is set, but this OSD will not be zapped." echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "Moving on, trying to activate the OSD now." echo "Because OSD_FORCE_ZAP was set, we will zap this device."
return ceph-disk -v zap ${OSD_DEVICE}
fi fi
else else
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no filesystem." echo "Unable to determine the FSID of the current cluster."
echo "Because OSD_FORCE_ZAP was set, we will zap this device." echo "OSD_FORCE_ZAP is set, but this OSD will not be zapped."
ceph-disk -v zap ${OSD_DEVICE} echo "Moving on, trying to activate the OSD now."
return
fi fi
else else
echo "parted says ${OSD_DEVICE}1 should exist, but we do not see it." echo "parted says ${OSD_DEVICE}1 should exist, but we do not see it."
@ -225,8 +227,7 @@ function osd_disk_prepare {
ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} ${OSD_JOURNAL} ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} ${OSD_JOURNAL}
# watch the udev event queue, and exit if all current events are handled udev_settle
udevadm settle --timeout=600
} }
if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then

View File

@ -79,6 +79,9 @@ spec:
- name: pod-var-lib-ceph - name: pod-var-lib-ceph
mountPath: /var/lib/ceph mountPath: /var/lib/ceph
readOnly: false readOnly: false
- name: pod-var-lib-ceph-tmp
mountPath: /var/lib/ceph/tmp
readOnly: false
- name: pod-run - name: pod-run
mountPath: /run mountPath: /run
readOnly: false readOnly: false
@ -160,6 +163,9 @@ spec:
- name: pod-var-lib-ceph - name: pod-var-lib-ceph
mountPath: /var/lib/ceph mountPath: /var/lib/ceph
readOnly: false readOnly: false
- name: pod-var-lib-ceph-tmp
mountPath: /var/lib/ceph/tmp
ReadOnly: false
- name: pod-run - name: pod-run
mountPath: /run mountPath: /run
readOnly: false readOnly: false
@ -288,6 +294,10 @@ spec:
path: /run/lvm path: /run/lvm
- name: pod-var-lib-ceph - name: pod-var-lib-ceph
emptyDir: {} emptyDir: {}
- name: pod-var-lib-ceph-tmp
hostPath:
path: /var/lib/openstack-helm/ceph/var-tmp
type: DirectoryOrCreate
- name: pod-var-log - name: pod-var-log
hostPath: hostPath:
path: {{ print "/var/log/ceph/" $envAll.Release.Name }} path: {{ print "/var/log/ceph/" $envAll.Release.Name }}