Merge "[ceph-osd] Alias synchronized commands and fix descriptor leak"

This commit is contained in:
Zuul 2020-12-16 20:51:51 +00:00 committed by Gerrit Code Review
commit 81f928544b
5 changed files with 142 additions and 58 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD
name: ceph-osd
version: 0.1.13
version: 0.1.14
home: https://github.com/ceph/ceph
...

View File

@ -150,3 +150,6 @@ exec /usr/bin/ceph-osd \
--setuser ceph \
--setgroup disk & echo $! > /run/ceph-osd.pid
wait
# Clean up resources held by the common script
common_cleanup

View File

@ -111,3 +111,6 @@ exec /usr/bin/ceph-osd \
--setuser ceph \
--setgroup disk & echo $! > /run/ceph-osd.pid
wait
# Clean up resources held by the common script
common_cleanup

View File

@ -15,6 +15,9 @@ limitations under the License.
*/}}
set -ex
shopt -s expand_aliases
export lock_fd=''
export ALREADY_LOCKED=0
export PS4='+${BASH_SOURCE:+$(basename ${BASH_SOURCE}):${LINENO}:}${FUNCNAME:+${FUNCNAME}():} '
: "${CRUSH_LOCATION:=root=default host=${HOSTNAME}}"
@ -25,6 +28,85 @@ export PS4='+${BASH_SOURCE:+$(basename ${BASH_SOURCE}):${LINENO}:}${FUNCNAME:+${
: "${OSD_JOURNAL_SIZE:=$(awk '/^osd_journal_size/{print $3}' ${CEPH_CONF}.template)}"
: "${OSD_WEIGHT:=1.0}"
# Obtain a global lock on /var/lib/ceph/tmp/init-osd.lock
function lock() {
# Open a file descriptor for the lock file if there isn't one already
if [[ -z "${lock_fd}" ]]; then
exec {lock_fd}>/var/lib/ceph/tmp/init-osd.lock || exit 1
fi
flock -w 600 "${lock_fd}" &> /dev/null
ALREADY_LOCKED=1
}
# Release the global lock on /var/lib/ceph/tmp/init-osd.lock
function unlock() {
flock -u "${lock_fd}" &> /dev/null
ALREADY_LOCKED=0
}
# "Destructor" for common.sh, must be called by scripts that source this one
function common_cleanup() {
# Close the file descriptor for the lock file
if [[ ! -z "${lock_fd}" ]]; then
if [[ ${ALREADY_LOCKED} -ne 0 ]]; then
unlock
fi
eval "exec ${lock_fd}>&-"
fi
}
# Run a command within the global synchronization lock
function locked() {
local LOCK_SCOPE=0
# Allow locks to be re-entrant to avoid deadlocks
if [[ ${ALREADY_LOCKED} -eq 0 ]]; then
lock
LOCK_SCOPE=1
fi
# Execute the synchronized command
"$@"
# Only unlock if the lock was obtained in this scope
if [[ ${LOCK_SCOPE} -ne 0 ]]; then
unlock
fi
}
# Alias commands that interact with disks so they are always synchronized
alias dmsetup='locked dmsetup'
alias pvs='locked pvs'
alias vgs='locked vgs'
alias lvs='locked lvs'
alias pvdisplay='locked pvdisplay'
alias vgdisplay='locked vgdisplay'
alias lvdisplay='locked lvdisplay'
alias pvcreate='locked pvcreate'
alias vgcreate='locked vgcreate'
alias lvcreate='locked lvcreate'
alias pvremove='locked pvremove'
alias vgremove='locked vgremove'
alias lvremove='locked lvremove'
alias pvrename='locked pvrename'
alias vgrename='locked vgrename'
alias lvrename='locked lvrename'
alias pvchange='locked pvchange'
alias vgchange='locked vgchange'
alias lvchange='locked lvchange'
alias pvscan='locked pvscan'
alias vgscan='locked vgscan'
alias lvscan='locked lvscan'
alias lvm_scan='locked lvm_scan'
alias partprobe='locked partprobe'
alias ceph-volume='locked ceph-volume'
alias disk_zap='locked disk_zap'
alias zap_extra_partitions='locked zap_extra_partitions'
alias udev_settle='locked udev_settle'
alias wipefs='locked wipefs'
alias sgdisk='locked sgdisk'
alias dd='locked dd'
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
@ -74,19 +156,6 @@ function ceph_cmd_retry() {
done
}
function locked() {
exec {lock_fd}>/var/lib/ceph/tmp/init-osd.lock || exit 1
flock -w 600 --verbose "${lock_fd}" &> /dev/null
"$@"
flock -u "${lock_fd}" &> /dev/null
}
function global_locked() {
exec {global_lock_fd}>/var/lib/ceph/tmp/init-osd-global.lock || exit 1
flock -w 600 --verbose "${global_lock_fd}" &> /dev/null
"$@"
flock -u "${global_lock_fd}" &> /dev/null
}
function crush_create_or_move {
local crush_location=${1}
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
@ -242,13 +311,13 @@ function disk_zap {
dmsetup remove ${dm_device}
fi
done
local logical_volumes=$(locked lvdisplay | grep "LV Path" | grep "$device_filter" | awk '/ceph/{print $3}' | tr '\n' ' ')
local logical_volumes=$(lvdisplay | grep "LV Path" | grep "$device_filter" | awk '/ceph/{print $3}' | tr '\n' ' ')
for logical_volume in ${logical_volumes}; do
if [[ ! -z ${logical_volume} ]]; then
locked lvremove -y ${logical_volume}
lvremove -y ${logical_volume}
fi
done
local volume_group=$(locked pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph")
local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph")
if [[ ${volume_group} ]]; then
vgremove -y ${volume_group}
pvremove -y ${device}
@ -274,7 +343,7 @@ function udev_settle {
osd_devices="${OSD_DEVICE}"
udevadm settle --timeout=600
partprobe "${OSD_DEVICE}"
locked lvm_scan
lvm_scan
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
if [ ! -z "$BLOCK_DB" ]; then
osd_devices="${osd_devices}\|${BLOCK_DB}"
@ -282,9 +351,9 @@ function udev_settle {
local block_db="$BLOCK_DB"
local db_vg="$(echo $block_db | cut -d'/' -f1)"
if [ ! -z "$db_vg" ]; then
block_db=$(locked pvdisplay -ddd -v | grep -B1 "$db_vg" | awk '/PV Name/{print $3}')
block_db=$(pvdisplay -ddd -v | grep -B1 "$db_vg" | awk '/PV Name/{print $3}')
fi
locked partprobe "${block_db}"
partprobe "${block_db}"
fi
if [ ! -z "$BLOCK_WAL" ] && [ "$BLOCK_WAL" != "$BLOCK_DB" ]; then
osd_devices="${osd_devices}\|${BLOCK_WAL}"
@ -292,9 +361,9 @@ function udev_settle {
local block_wal="$BLOCK_WAL"
local wal_vg="$(echo $block_wal | cut -d'/' -f1)"
if [ ! -z "$wal_vg" ]; then
block_wal=$(locked pvdisplay -ddd -v | grep -B1 "$wal_vg" | awk '/PV Name/{print $3}')
block_wal=$(pvdisplay -ddd -v | grep -B1 "$wal_vg" | awk '/PV Name/{print $3}')
fi
locked partprobe "${block_wal}"
partprobe "${block_wal}"
fi
else
if [ "x$JOURNAL_TYPE" == "xblock-logical" ] && [ ! -z "$OSD_JOURNAL" ]; then
@ -302,7 +371,7 @@ function udev_settle {
if [ ! -z "$OSD_JOURNAL" ]; then
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
osd_devices="${osd_devices}\|${JDEV}"
locked partprobe "${JDEV}"
partprobe "${JDEV}"
fi
fi
fi
@ -328,7 +397,7 @@ function udev_settle {
function get_lv_from_device {
device="$1"
locked pvdisplay -ddd -v -m ${device} | awk '/Logical volume/{print $3}'
pvdisplay -ddd -v -m ${device} | awk '/Logical volume/{print $3}'
}
# Helper function to get an lvm tag from a logical volume
@ -341,7 +410,7 @@ function get_lvm_tag_from_volume {
echo
else
# Get and return the specified tag from the logical volume
locked lvs -o lv_tags ${logical_volume} | tr ',' '\n' | grep ${tag} | cut -d'=' -f2
lvs -o lv_tags ${logical_volume} | tr ',' '\n' | grep ${tag} | cut -d'=' -f2
fi
}
@ -361,7 +430,7 @@ function get_lv_size_from_device {
device="$1"
logical_volume="$(get_lv_from_device ${device})"
locked lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1
lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1
}
# Helper function to get the crush weight for an osd device
@ -435,12 +504,12 @@ function get_lvm_path_from_device {
select="$1"
options="--noheadings -o lv_dm_path"
locked pvs ${options} -S "${select}" | tr -d ' '
pvs ${options} -S "${select}" | tr -d ' '
}
function get_vg_name_from_device {
device="$1"
pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
if [[ "${pv_uuid}" ]]; then
echo "ceph-vg-${pv_uuid}"
@ -450,7 +519,7 @@ function get_vg_name_from_device {
function get_lv_name_from_device {
device="$1"
device_type="$2"
pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
if [[ "${pv_uuid}" ]]; then
echo "ceph-${device_type}-${pv_uuid}"

View File

@ -38,36 +38,42 @@ else
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
fi
# Set up aliases for functions that require disk synchronization
alias rename_vg='locked rename_vg'
alias rename_lvs='locked rename_lvs'
alias update_lv_tags='locked update_lv_tags'
alias prep_device='locked prep_device'
# Renames a single VG if necessary
function rename_vg {
local physical_disk=$1
local old_vg_name=$(locked pvdisplay -ddd -v ${physical_disk} | awk '/VG Name/{print $3}')
local old_vg_name=$(pvdisplay -ddd -v ${physical_disk} | awk '/VG Name/{print $3}')
local vg_name=$(get_vg_name_from_device ${physical_disk})
if [[ "${old_vg_name}" ]] && [[ "${vg_name}" != "${old_vg_name}" ]]; then
locked vgrename ${old_vg_name} ${vg_name}
vgrename ${old_vg_name} ${vg_name}
fi
}
# Renames all LVs associated with an OSD as necesasry
function rename_lvs {
local data_disk=$1
local vg_name=$(locked pvdisplay -ddd -v ${data_disk} | awk '/VG Name/{print $3}')
local vg_name=$(pvdisplay -ddd -v ${data_disk} | awk '/VG Name/{print $3}')
if [[ "${vg_name}" ]]; then
# Rename the OSD volume if necessary
local old_lv_name=$(locked lvdisplay ${vg_name} | awk '/LV Name/{print $3}')
local old_lv_name=$(lvdisplay ${vg_name} | awk '/LV Name/{print $3}')
local lv_name=$(get_lv_name_from_device ${data_disk} lv)
if [[ "${old_lv_name}" ]] && [[ "${lv_name}" != "${old_lv_name}" ]]; then
locked lvrename ${vg_name} ${old_lv_name} ${lv_name}
lvrename ${vg_name} ${old_lv_name} ${lv_name}
fi
# Rename the OSD's block.db volume if necessary, referenced by UUID
local lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.db_uuid)
if [[ "${lv_tag}" ]]; then
local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
if [[ "${lv_device}" ]]; then
local db_vg=$(echo ${lv_device} | awk -F "/" '{print $3}')
@ -75,7 +81,7 @@ function rename_lvs {
local db_name=$(get_lv_name_from_device ${data_disk} db)
if [[ "${old_lv_name}" ]] && [[ "${db_name}" != "${old_lv_name}" ]]; then
locked lvrename ${db_vg} ${old_lv_name} ${db_name}
lvrename ${db_vg} ${old_lv_name} ${db_name}
fi
fi
fi
@ -84,7 +90,7 @@ function rename_lvs {
lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.wal_uuid)
if [[ "${lv_tag}" ]]; then
local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
if [[ "${lv_device}" ]]; then
local wal_vg=$(echo ${lv_device} | awk -F "/" '{print $3}')
@ -92,7 +98,7 @@ function rename_lvs {
local wal_name=$(get_lv_name_from_device ${data_disk} wal)
if [[ "${old_lv_name}" ]] && [[ "${wal_name}" != "${old_lv_name}" ]]; then
locked lvrename ${wal_vg} ${old_lv_name} ${wal_name}
lvrename ${wal_vg} ${old_lv_name} ${wal_name}
fi
fi
fi
@ -104,10 +110,10 @@ function rename_lvs {
# renaming should be completed prior to calling this
function update_lv_tags {
local data_disk=$1
local pv_uuid=$(locked pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}')
local pv_uuid=$(pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}')
if [[ "${pv_uuid}" ]]; then
local volumes="$(locked lvs --no-headings | grep -e "${pv_uuid}")"
local volumes="$(lvs --no-headings | grep -e "${pv_uuid}")"
local block_device db_device wal_device vg_name
local old_block_device old_db_device old_wal_device
@ -131,21 +137,21 @@ function update_lv_tags {
while read lv vg other_stuff; do
if [[ "${block_device}" ]]; then
if [[ "${old_block_device}" ]]; then
locked lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv}
lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv}
fi
locked lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv}
lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv}
fi
if [[ "${db_device}" ]]; then
if [[ "${old_db_device}" ]]; then
locked lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv}
lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv}
fi
locked lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv}
lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv}
fi
if [[ "${wal_device}" ]]; then
if [[ "${old_wal_device}" ]]; then
locked lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv}
lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv}
fi
locked lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv}
lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv}
fi
done <<< ${volumes}
fi
@ -188,7 +194,7 @@ function prep_device {
udev_settle
vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE})
lv_name=$(get_lv_name_from_device ${data_disk} ${device_type})
VG=$(locked vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
if [[ $VG ]]; then
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
CEPH_LVM_PREPARE=1
@ -207,13 +213,13 @@ function prep_device {
CEPH_LVM_PREPARE=1
fi
random_uuid=$(uuidgen)
locked vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}"
vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}"
VG=$(get_vg_name_from_device ${BLOCK_DEVICE})
locked vgrename "ceph-vg-${random_uuid}" "${VG}"
vgrename "ceph-vg-${random_uuid}" "${VG}"
fi
logical_volume=$(locked lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
if [[ $logical_volume != "${lv_name}" ]]; then
locked lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
fi
if [[ "${device_type}" == "db" ]]; then
BLOCK_DB="${VG}/${lv_name}"
@ -399,7 +405,7 @@ function osd_disk_prepare {
OSD_VG=${vg_name}
fi
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
if [[ ! "$(locked lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
fi
OSD_LV=${OSD_VG}/${lv_name}
@ -416,15 +422,15 @@ function osd_disk_prepare {
block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}')
fi
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
global_locked prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
global_locked prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
global_locked prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
global_locked prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
fi
else
if locked pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
CEPH_LVM_PREPARE=0
fi
fi
@ -451,7 +457,7 @@ function osd_disk_prepare {
fi
if [[ CEPH_LVM_PREPARE -eq 1 ]]; then
locked ceph-volume lvm -v prepare ${CLI_OPTS}
ceph-volume lvm -v prepare ${CLI_OPTS}
udev_settle
fi
}
@ -502,3 +508,6 @@ function osd_journal_prepare {
if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
osd_disk_prepare
fi
# Clean up resources held by the common script
common_cleanup