[ceph-osd] Synchronization audit for the ceph-volume osd-init script
There are race conditions in the ceph-volume osd-init script that occasionally cause deployment and OSD restart issues. This change attempts to resolve those and stabilize the script when multiple instances run simultaneously on the same host. Change-Id: I79407059fa20fb51c6840717a083a8dc616ba410
This commit is contained in:
parent
3a2d0f83b4
commit
16b72c1e22
@ -15,6 +15,6 @@ apiVersion: v1
|
||||
appVersion: v1.0.0
|
||||
description: OpenStack-Helm Ceph OSD
|
||||
name: ceph-osd
|
||||
version: 0.1.6
|
||||
version: 0.1.7
|
||||
home: https://github.com/ceph/ceph
|
||||
...
|
||||
|
@ -76,15 +76,15 @@ function ceph_cmd_retry() {
|
||||
|
||||
function locked() {
|
||||
exec {lock_fd}>/var/lib/ceph/tmp/init-osd.lock || exit 1
|
||||
flock -w 600 --verbose "${lock_fd}"
|
||||
flock -w 600 --verbose "${lock_fd}" &> /dev/null
|
||||
"$@"
|
||||
flock -u "${lock_fd}"
|
||||
flock -u "${lock_fd}" &> /dev/null
|
||||
}
|
||||
function global_locked() {
|
||||
exec {global_lock_fd}>/var/lib/ceph/tmp/init-osd-global.lock || exit 1
|
||||
flock -w 600 --verbose "${global_lock_fd}"
|
||||
flock -w 600 --verbose "${global_lock_fd}" &> /dev/null
|
||||
"$@"
|
||||
flock -u "${global_lock_fd}"
|
||||
flock -u "${global_lock_fd}" &> /dev/null
|
||||
}
|
||||
|
||||
function crush_create_or_move {
|
||||
@ -248,7 +248,7 @@ function disk_zap {
|
||||
locked lvremove -y ${logical_volume}
|
||||
fi
|
||||
done
|
||||
local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph")
|
||||
local volume_group=$(locked pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph")
|
||||
if [[ ${volume_group} ]]; then
|
||||
vgremove -y ${volume_group}
|
||||
pvremove -y ${device}
|
||||
@ -260,13 +260,21 @@ function disk_zap {
|
||||
dd if=/dev/zero of=${device} bs=1M count=200
|
||||
}
|
||||
|
||||
# This should be run atomically to prevent unexpected cache states
|
||||
function lvm_scan {
|
||||
pvscan --cache
|
||||
vgscan --cache
|
||||
lvscan --cache
|
||||
pvscan
|
||||
vgscan
|
||||
lvscan
|
||||
}
|
||||
|
||||
function udev_settle {
|
||||
osd_devices="${OSD_DEVICE}"
|
||||
udevadm settle --timeout=600
|
||||
partprobe "${OSD_DEVICE}"
|
||||
locked pvscan --cache
|
||||
locked vgscan --cache
|
||||
locked lvscan --cache
|
||||
locked lvm_scan
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
osd_devices="${osd_devices}\|${BLOCK_DB}"
|
||||
@ -353,7 +361,7 @@ function get_lv_size_from_device {
|
||||
device="$1"
|
||||
logical_volume="$(get_lv_from_device ${device})"
|
||||
|
||||
lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1
|
||||
locked lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1
|
||||
}
|
||||
|
||||
# Helper function to get the crush weight for an osd device
|
||||
@ -427,12 +435,12 @@ function get_lvm_path_from_device {
|
||||
select="$1"
|
||||
|
||||
options="--noheadings -o lv_dm_path"
|
||||
pvs ${options} -S "${select}" | tr -d ' '
|
||||
locked pvs ${options} -S "${select}" | tr -d ' '
|
||||
}
|
||||
|
||||
function get_vg_name_from_device {
|
||||
device="$1"
|
||||
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
|
||||
pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
|
||||
|
||||
if [[ "${pv_uuid}" ]]; then
|
||||
echo "ceph-vg-${pv_uuid}"
|
||||
@ -442,7 +450,7 @@ function get_vg_name_from_device {
|
||||
function get_lv_name_from_device {
|
||||
device="$1"
|
||||
device_type="$2"
|
||||
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
|
||||
pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
|
||||
|
||||
if [[ "${pv_uuid}" ]]; then
|
||||
echo "ceph-${device_type}-${pv_uuid}"
|
||||
|
@ -67,7 +67,7 @@ function rename_lvs {
|
||||
local lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.db_uuid)
|
||||
|
||||
if [[ "${lv_tag}" ]]; then
|
||||
local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
|
||||
local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
|
||||
|
||||
if [[ "${lv_device}" ]]; then
|
||||
local db_vg=$(echo ${lv_device} | awk -F "/" '{print $3}')
|
||||
@ -84,7 +84,7 @@ function rename_lvs {
|
||||
lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.wal_uuid)
|
||||
|
||||
if [[ "${lv_tag}" ]]; then
|
||||
local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
|
||||
local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
|
||||
|
||||
if [[ "${lv_device}" ]]; then
|
||||
local wal_vg=$(echo ${lv_device} | awk -F "/" '{print $3}')
|
||||
@ -104,10 +104,10 @@ function rename_lvs {
|
||||
# renaming should be completed prior to calling this
|
||||
function update_lv_tags {
|
||||
local data_disk=$1
|
||||
local pv_uuid=$(pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}')
|
||||
local pv_uuid=$(locked pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}')
|
||||
|
||||
if [[ "${pv_uuid}" ]]; then
|
||||
local volumes="$(lvs --no-headings | grep -e "${pv_uuid}")"
|
||||
local volumes="$(locked lvs --no-headings | grep -e "${pv_uuid}")"
|
||||
local block_device db_device wal_device vg_name
|
||||
local old_block_device old_db_device old_wal_device
|
||||
|
||||
@ -188,7 +188,7 @@ function prep_device {
|
||||
udev_settle
|
||||
vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE})
|
||||
lv_name=$(get_lv_name_from_device ${data_disk} ${device_type})
|
||||
VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
|
||||
VG=$(locked vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
|
||||
if [[ $VG ]]; then
|
||||
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
|
||||
CEPH_LVM_PREPARE=1
|
||||
@ -211,7 +211,7 @@ function prep_device {
|
||||
VG=$(get_vg_name_from_device ${BLOCK_DEVICE})
|
||||
locked vgrename "ceph-vg-${random_uuid}" "${VG}"
|
||||
fi
|
||||
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
|
||||
logical_volume=$(locked lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
|
||||
if [[ $logical_volume != "${lv_name}" ]]; then
|
||||
locked lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
|
||||
fi
|
||||
@ -399,7 +399,7 @@ function osd_disk_prepare {
|
||||
OSD_VG=${vg_name}
|
||||
fi
|
||||
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
|
||||
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
||||
if [[ ! "$(locked lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
||||
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
|
||||
fi
|
||||
OSD_LV=${OSD_VG}/${lv_name}
|
||||
@ -424,7 +424,7 @@ function osd_disk_prepare {
|
||||
global_locked prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||
fi
|
||||
else
|
||||
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||
if locked pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||
CEPH_LVM_PREPARE=0
|
||||
fi
|
||||
fi
|
||||
|
Loading…
Reference in New Issue
Block a user