Refactor Ceph OSD Init Scripts - Second PS

1) Removed some remaining unsupported ceph-disk related code.
2) Refactored the code that determines when a disk should be
   zapped. Now there will be only one place where disk_zap is
   called.
3) Refactored the code that determines when LVM prepare should
   be called.
4) Improved the logging within the OSD init files

Change-Id: I194c82985f1f71b30d172f9e41438fa814500601
This commit is contained in:
Parsons, Cliff (cp769u) 2021-04-13 20:20:22 +00:00
parent 948e07e151
commit 17d9fe4de9
8 changed files with 345 additions and 380 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD
name: ceph-osd
version: 0.1.21
version: 0.1.22
home: https://github.com/ceph/ceph
...

View File

@ -43,45 +43,32 @@ if [[ ! -b "${OSD_DEVICE}" ]]; then
exit 1
fi
CEPH_DISK_OPTIONS=""
ACTIVATE_OPTIONS=""
CEPH_OSD_OPTIONS=""
udev_settle
OSD_ID=$(ceph-volume inventory ${OSD_DEVICE} | grep "osd id" | awk '{print $3}')
simple_activate=0
if [[ -z ${OSD_ID} ]]; then
echo "Looks like ceph-disk has been used earlier to activate the OSD."
tmpmnt=$(mktemp -d)
mount ${OSD_DEVICE}1 ${tmpmnt}
OSD_ID=$(cat ${tmpmnt}/whoami)
umount ${tmpmnt}
simple_activate=1
echo "OSD_ID not found from device ${OSD_DEVICE}"
exit 1
fi
OSD_FSID=$(ceph-volume inventory ${OSD_DEVICE} | grep "osd fsid" | awk '{print $3}')
if [[ -z ${OSD_FSID} ]]; then
echo "Looks like ceph-disk has been used earlier to activate the OSD."
tmpmnt=$(mktemp -d)
mount ${OSD_DEVICE}1 ${tmpmnt}
OSD_FSID=$(cat ${tmpmnt}/fsid)
umount ${tmpmnt}
simple_activate=1
echo "OSD_FSID not found from device ${OSD_DEVICE}"
exit 1
fi
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
OSD_KEYRING="${OSD_PATH}/keyring"
mkdir -p ${OSD_PATH}
if [[ ${simple_activate} -eq 1 ]]; then
ceph-volume simple activate --no-systemd ${OSD_ID} ${OSD_FSID}
else
ceph-volume lvm -v \
--setuser ceph \
--setgroup disk \
activate ${CEPH_DISK_OPTIONS} \
--auto-detect-objectstore \
--no-systemd ${OSD_ID} ${OSD_FSID}
fi
ceph-volume lvm -v \
--setuser ceph \
--setgroup disk \
activate ${ACTIVATE_OPTIONS} \
--auto-detect-objectstore \
--no-systemd ${OSD_ID} ${OSD_FSID}
# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary)
OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE})

View File

@ -32,60 +32,47 @@ if [[ ! -b "${OSD_DEVICE}" ]]; then
exit 1
fi
CEPH_DISK_OPTIONS=""
ACTIVATE_OPTIONS=""
CEPH_OSD_OPTIONS=""
udev_settle
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
simple_activate=0
if [[ -z ${OSD_ID} ]]; then
echo "Looks like ceph-disk has been used earlier to activate the OSD."
tmpmnt=$(mktemp -d)
mount ${OSD_DEVICE}1 ${tmpmnt}
OSD_ID=$(cat ${tmpmnt}/whoami)
umount ${tmpmnt}
simple_activate=1
echo "OSD_ID not found from device ${OSD_DEVICE}"
exit 1
fi
OSD_FSID=$(get_osd_fsid_from_device ${OSD_DEVICE})
if [[ -z ${OSD_FSID} ]]; then
echo "Looks like ceph-disk has been used earlier to activate the OSD."
tmpmnt=$(mktemp -d)
mount ${OSD_DEVICE}1 ${tmpmnt}
OSD_FSID=$(cat ${tmpmnt}/fsid)
umount ${tmpmnt}
simple_activate=1
echo "OSD_FSID not found from device ${OSD_DEVICE}"
exit 1
fi
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
OSD_KEYRING="${OSD_PATH}/keyring"
mkdir -p ${OSD_PATH}
if [[ ${simple_activate} -eq 1 ]]; then
ceph-volume simple activate --no-systemd ${OSD_ID} ${OSD_FSID}
else
ceph-volume lvm -v \
--setuser ceph \
--setgroup disk \
activate ${CEPH_DISK_OPTIONS} \
--auto-detect-objectstore \
--no-systemd ${OSD_ID} ${OSD_FSID}
# Cross check the db and wal symlinks if missed
DB_DEV=$(get_osd_db_device_from_device ${OSD_DEVICE})
if [[ ! -z ${DB_DEV} ]]; then
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.db ]]; then
ln -snf ${DB_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
chown -h ceph:ceph ${DB_DEV}
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
fi
ceph-volume lvm -v \
--setuser ceph \
--setgroup disk \
activate ${ACTIVATE_OPTIONS} \
--auto-detect-objectstore \
--no-systemd ${OSD_ID} ${OSD_FSID}
# Cross check the db and wal symlinks if missed
DB_DEV=$(get_osd_db_device_from_device ${OSD_DEVICE})
if [[ ! -z ${DB_DEV} ]]; then
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.db ]]; then
ln -snf ${DB_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
chown -h ceph:ceph ${DB_DEV}
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
fi
WAL_DEV=$(get_osd_wal_device_from_device ${OSD_DEVICE})
if [[ ! -z ${WAL_DEV} ]]; then
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal ]]; then
ln -snf ${WAL_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
chown -h ceph:ceph ${WAL_DEV}
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
fi
fi
WAL_DEV=$(get_osd_wal_device_from_device ${OSD_DEVICE})
if [[ ! -z ${WAL_DEV} ]]; then
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal ]]; then
ln -snf ${WAL_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
chown -h ceph:ceph ${WAL_DEV}
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
fi
fi

View File

@ -310,7 +310,7 @@ function zap_extra_partitions {
}
function disk_zap {
# Run all the commands that ceph-disk zap uses to clear a disk
# Run all the commands to clear a disk
local device=${1}
local dm_devices=$(get_dm_devices_from_osd_device "${device}" | xargs)
for dm_device in ${dm_devices}; do

View File

@ -28,169 +28,113 @@ else
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
fi
function osd_disk_prepare {
if [[ -z "${OSD_DEVICE}" ]]; then
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
exit 1
# Check OSD FSID and journalling metadata
# Returns 1 if the disk should be zapped; 0 otherwise.
function check_osd_metadata {
local ceph_fsid=$1
retcode=0
local tmpmnt=$(mktemp -d)
mount ${DM_DEV} ${tmpmnt}
if [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
if [ -f "${tmpmnt}/whoami" ]; then
OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal")
local osd_id=$(cat "${tmpmnt}/whoami")
if [ ! -b "${OSD_JOURNAL_DISK}" ]; then
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
if [ ${jdev} == ${OSD_JOURNAL} ]; then
echo "OSD Init: It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}."
echo "OSD Init: Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
rm -rf ${tmpmnt}/ceph_fsid
else
echo "OSD Init: It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}."
echo "OSD Init: Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will"
echo "OSD Init: attempt to recreate the missing journal device partitions."
osd_journal_create ${OSD_JOURNAL}
ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal
echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid
chown ceph. ${OSD_JOURNAL}
# During OSD start we will format the journal and set the fsid
touch ${tmpmnt}/run_mkjournal
fi
fi
else
echo "OSD Init: It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata."
echo "OSD Init: The device may contain inconsistent metadata or be corrupted."
echo "OSD Init: Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
rm -rf ${tmpmnt}/ceph_fsid
fi
fi
if [[ ! -b "${OSD_DEVICE}" ]]; then
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
exit 1
fi
if [ -f "${tmpmnt}/ceph_fsid" ]; then
local osd_fsid=$(cat "${tmpmnt}/ceph_fsid")
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
exit 1
if [ ${osd_fsid} != ${ceph_fsid} ]; then
echo "OSD Init: ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
echo "OSD Init: The OSD FSID is ${osd_fsid} while this cluster is ${ceph_fsid}"
echo "OSD Init: Because OSD_FORCE_REPAIR was set, we will zap this device."
ZAP_EXTRA_PARTITIONS=${tmpmnt}
retcode=1
else
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
echo "Moving on, trying to activate the OSD now."
fi
else
echo "OSD Init: ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "OSD Init: Because OSD_FORCE_REPAIR was set, we will zap this device."
ZAP_EXTRA_PARTITIONS=${tmpmnt}
retcode=1
fi
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
umount ${tmpmnt}
return ${retcode}
}
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
CEPH_DISK_USED=0
CEPH_LVM_PREPARE=1
udev_settle
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE})
CLUSTER_FSID=$(ceph-conf --lookup fsid)
DISK_ZAPPED=0
function determine_what_needs_zapping {
if [[ ! -z ${OSD_ID} ]]; then
DM_NUM=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1)
DM_DEV="/dev/dm-"${DM_NUM}
local dm_num=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1)
DM_DEV="/dev/dm-"${dm_num}
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
CEPH_DISK_USED=1
# Ceph-disk was used to initialize the disk, but this is not supported
echo "OSD Init: ceph-disk was used to initialize the disk, but this is no longer supported"
exit 1
else
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway"
disk_zap ${OSD_DEVICE}
DISK_ZAPPED=1
echo "OSD Init: It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway"
ZAP_DEVICE=1
else
echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
echo "It would be too dangerous to destroy it without any notification."
echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk."
echo "OSD Init: Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
echo "OSD Init: It would be too dangerous to destroy it without any notification."
echo "OSD Init: Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk."
exit 1
fi
fi
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
if [ -b $DM_DEV ]; then
local cephFSID=$(ceph-conf --lookup fsid)
if [ ! -z "${cephFSID}" ]; then
local tmpmnt=$(mktemp -d)
mount ${DM_DEV} ${tmpmnt}
if [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
if [ -f "${tmpmnt}/whoami" ]; then
OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal")
local osd_id=$(cat "${tmpmnt}/whoami")
if [ ! -b "${OSD_JOURNAL_DISK}" ]; then
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
if [ ${jdev} == ${OSD_JOURNAL} ]; then
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}."
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
rm -rf ${tmpmnt}/ceph_fsid
else
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}."
echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will"
echo "attempt to recreate the missing journal device partitions."
osd_journal_create ${OSD_JOURNAL}
ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal
echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid
chown ceph. ${OSD_JOURNAL}
# During OSD start we will format the journal and set the fsid
touch ${tmpmnt}/run_mkjournal
fi
fi
else
echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata."
echo "The device may contain inconsistent metadata or be corrupted."
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
rm -rf ${tmpmnt}/ceph_fsid
fi
fi
if [ -f "${tmpmnt}/ceph_fsid" ]; then
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
if [ ${osdFSID} != ${cephFSID} ]; then
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE}
else
umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
echo "Moving on, trying to activate the OSD now."
fi
else
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE}
local ceph_fsid=$(ceph-conf --lookup fsid)
if [ ! -z "${ceph_fsid}" ]; then
# Check the OSD metadata and zap the disk if necessary
if [[ $(check_osd_metadata ${ceph_fsid}) -eq 1 ]]; then
echo "OSD Init: ${OSD_DEVICE} needs to be zapped..."
ZAP_DEVICE=1
fi
else
echo "Unable to determine the FSID of the current cluster."
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
echo "Moving on, trying to activate the OSD now."
return
fi
else
echo "parted says ${DM_DEV} should exist, but we do not see it."
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
echo "Moving on, trying to activate the OSD now."
return
fi
else
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
echo "Moving on, trying to prepare and activate the OSD LVM now."
fi
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
udev_settle
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
udev_settle
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
if [[ "${vg_name}" ]]; then
OSD_VG=${vg_name}
else
random_uuid=$(uuidgen)
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
vgrename ceph-vg-${random_uuid} ${vg_name}
OSD_VG=${vg_name}
fi
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
fi
OSD_LV=${OSD_VG}/${lv_name}
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
CEPH_LVM_PREPARE=1
udev_settle
fi
if [ ${CEPH_DISK_USED} -eq 0 ] ; then
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
CEPH_LVM_PREPARE=0
fi
fi
osd_journal_prepare
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}"
udev_settle
if [ ! -z "$DEVICE_CLASS" ]; then
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
fi
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
ceph-volume lvm -v prepare ${CLI_OPTS}
udev_settle
fi
}
function osd_journal_create {
@ -205,7 +149,7 @@ function osd_journal_create {
OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition})
udev_settle
else
echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system."
echo "OSD Init: The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system."
exit 1
fi
}
@ -235,3 +179,36 @@ function osd_journal_prepare {
fi
CLI_OPTS="${CLI_OPTS} --filestore"
}
function osd_disk_prepare {
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
udev_settle
RESULTING_VG=""; RESULTING_LV="";
create_vg_if_needed "${OSD_DEVICE}"
create_lv_if_needed "${OSD_DEVICE}" "${RESULTING_VG}" "--yes -l 100%FREE"
CLI_OPTS="${CLI_OPTS} --data ${RESULTING_LV}"
CEPH_LVM_PREPARE=1
udev_settle
fi
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
echo "OSD Init: Device is already set up. LVM prepare does not need to be called."
CEPH_LVM_PREPARE=0
fi
osd_journal_prepare
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}"
udev_settle
if [ ! -z "$DEVICE_CLASS" ]; then
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
fi
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
echo "OSD Init: Calling ceph-volume lvm-v prepare ${CLI_OPTS}"
ceph-volume lvm -v prepare ${CLI_OPTS}
udev_settle
fi
}

View File

@ -18,156 +18,139 @@ set -ex
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
export OSD_BLUESTORE=1
alias prep_device='locked prep_device'
function osd_disk_prepare {
if [[ -z "${OSD_DEVICE}" ]]; then
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
exit 1
fi
function check_block_device_for_zap {
local block_device=$1
local device_type=$2
if [[ ! -b "${OSD_DEVICE}" ]]; then
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
exit 1
fi
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
exit 1
fi
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
CEPH_DISK_USED=0
CEPH_LVM_PREPARE=1
udev_settle
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE})
CLUSTER_FSID=$(ceph-conf --lookup fsid)
DISK_ZAPPED=0
if [[ ! -z "${OSD_FSID}" ]]; then
if [[ "${OSD_FSID}" == "${CLUSTER_FSID}" ]]; then
if [[ ! -z "${OSD_ID}" ]]; then
if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then
echo "Running bluestore mode and ${OSD_DEVICE} already bootstrapped"
if [[ ${block_device} ]]; then
local vg_name=$(get_vg_name_from_device ${block_device})
local lv_name=$(get_lv_name_from_device ${OSD_DEVICE} ${device_type})
local vg=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
if [[ "${vg}" ]]; then
local device_osd_id=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
CEPH_LVM_PREPARE=1
if [[ -n "${device_osd_id}" ]] && [[ -n "${OSD_ID}" ]]; then
if [[ "${device_osd_id}" == "${OSD_ID}" ]]; then
echo "OSD Init: OSD ID matches the OSD ID already on the data volume. LVM prepare does not need to be called."
CEPH_LVM_PREPARE=0
elif [[ $OSD_FORCE_REPAIR -eq 1 ]]; then
else
echo "OSD Init: OSD ID does match the OSD ID on the data volume. Device needs to be zapped."
ZAP_DEVICE=1
fi
fi
# Check if this device (db or wal) has no associated data volume
local logical_volumes="$(lvs --noheadings -o lv_name ${vg} | xargs)"
for volume in ${logical_volumes}; do
local data_volume=$(echo ${volume} | sed -E -e 's/-db-|-wal-/-lv-/g')
if [[ -z $(lvs --noheadings -o lv_name -S "lv_name=${data_volume}") ]]; then
# DB or WAL volume without a corresponding data volume, remove it
lvremove -y /dev/${vg}/${volume}
echo "OSD Init: LV /dev/${vg}/${volume} was removed as it did not have a data volume."
fi
done
else
if [[ "${vg_name}" ]]; then
local logical_devices=$(get_dm_devices_from_osd_device "${OSD_DEVICE}")
local device_filter=$(echo "${vg_name}" | sed 's/-/--/g')
local logical_devices=$(echo "${logical_devices}" | grep "${device_filter}" | xargs)
if [[ "$logical_devices" ]]; then
echo "OSD Init: No VG resources found with name ${vg_name}. Device needs to be zapped."
ZAP_DEVICE=1
fi
fi
fi
fi
}
function determine_what_needs_zapping {
local osd_fsid=$(get_cluster_fsid_from_device ${OSD_DEVICE})
local cluster_fsid=$(ceph-conf --lookup fsid)
# If the OSD FSID is defined within the device, check if we're already bootstrapped.
if [[ ! -z "${osd_fsid}" ]]; then
# Check if the OSD FSID is the same as the cluster FSID. If so, then we're
# already bootstrapped; otherwise, this is an old disk and needs to
# be zapped.
if [[ "${osd_fsid}" == "${cluster_fsid}" ]]; then
if [[ ! -z "${OSD_ID}" ]]; then
# Check to see what needs to be done to prepare the disk. If the OSD
# ID is in the Ceph OSD list, then LVM prepare does not need to be done.
if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then
echo "OSD Init: Running bluestore mode and ${OSD_DEVICE} already bootstrapped. LVM prepare does not need to be called."
CEPH_LVM_PREPARE=0
elif [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
echo "OSD initialized for this cluster, but OSD ID not found in the cluster, reinitializing"
else
echo "OSD initialized for this cluster, but OSD ID not found in the cluster"
fi
fi
else
echo "OSD initialized for a different cluster, zapping it"
disk_zap ${OSD_DEVICE}
udev_settle
echo "OSD Init: OSD FSID ${osd_fsid} initialized for a different cluster. It needs to be zapped."
ZAP_DEVICE=1
fi
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
CEPH_DISK_USED=1
else
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled"
disk_zap ${OSD_DEVICE}
else
echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled."
echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
exit 1
fi
fi
# Ceph-disk was used to initialize the disk, but this is not supported
echo "ceph-disk was used to initialize the disk, but this is no longer supported"
exit 1
fi
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
if [ -b $DM_DEV ]; then
local cephFSID=$(ceph-conf --lookup fsid)
if [ ! -z "${cephFSID}" ]; then
local tmpmnt=$(mktemp -d)
mount ${DM_DEV} ${tmpmnt}
if [ -f "${tmpmnt}/ceph_fsid" ]; then
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
if [ ${osdFSID} != ${cephFSID} ]; then
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE}
else
umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
echo "Moving on, trying to activate the OSD now."
fi
else
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE}
fi
else
echo "Unable to determine the FSID of the current cluster."
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
echo "Moving on, trying to activate the OSD now."
return
fi
else
echo "parted says ${DM_DEV} should exist, but we do not see it."
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
echo "Moving on, trying to activate the OSD now."
return
fi
else
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
echo "Moving on, trying to prepare and activate the OSD LVM now."
fi
check_block_device_for_zap "${BLOCK_DB}" db
check_block_device_for_zap "${BLOCK_WAL}" wal
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
# Zapping extra partitions isn't done for bluestore
ZAP_EXTRA_PARTITIONS=0
}
function prep_device {
local block_device=$1
local block_device_size=$2
local device_type=$3
local vg_name lv_name vg device_osd_id logical_devices logical_volume
RESULTING_VG=""; RESULTING_LV="";
udev_settle
vg_name=$(get_vg_name_from_device ${block_device})
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} ${device_type})
vg=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
if [[ -z "${vg}" ]]; then
create_vg_if_needed "${block_device}"
vg=${RESULTING_VG}
fi
udev_settle
create_lv_if_needed "${block_device}" "${vg}" "-L ${block_device_size}" "${lv_name}"
if [[ "${device_type}" == "db" ]]; then
BLOCK_DB=${RESULTING_LV}
elif [[ "${device_type}" == "wal" ]]; then
BLOCK_WAL=${RESULTING_LV}
fi
udev_settle
}
function osd_disk_prepare {
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
udev_settle
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
udev_settle
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
if [[ "${vg_name}" ]]; then
OSD_VG=${vg_name}
else
random_uuid=$(uuidgen)
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
vgrename ceph-vg-${random_uuid} ${vg_name}
OSD_VG=${vg_name}
fi
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
fi
OSD_LV=${OSD_VG}/${lv_name}
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
RESULTING_VG=""; RESULTING_LV="";
create_vg_if_needed "${OSD_DEVICE}"
create_lv_if_needed "${OSD_DEVICE}" "${RESULTING_VG}" "--yes -l 100%FREE"
CLI_OPTS="${CLI_OPTS} --data ${RESULTING_LV}"
CEPH_LVM_PREPARE=1
udev_settle
fi
if [ ${CEPH_DISK_USED} -eq 0 ]; then
if [[ ${BLOCK_DB} ]]; then
block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2 "-" $3}')
fi
if [[ ${BLOCK_WAL} ]]; then
block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}')
fi
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
fi
else
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
CEPH_LVM_PREPARE=0
fi
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
fi
CLI_OPTS="${CLI_OPTS} --bluestore"
@ -185,6 +168,7 @@ function osd_disk_prepare {
fi
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
echo "OSD Init: Calling ceph-volume lvm-v prepare ${CLI_OPTS}"
ceph-volume lvm -v prepare ${CLI_OPTS}
udev_settle
fi

View File

@ -16,17 +16,17 @@ limitations under the License.
set -ex
: "${OSD_FORCE_REPAIR:=0}"
source /tmp/osd-common-ceph-volume.sh
source /tmp/init-ceph-volume-helper-${STORAGE_TYPE}.sh
: "${OSD_FORCE_REPAIR:=0}"
# Set up aliases for functions that require disk synchronization
alias rename_vg='locked rename_vg'
alias rename_lvs='locked rename_lvs'
alias update_lv_tags='locked update_lv_tags'
alias prep_device='locked prep_device'
# Renames a single VG if necessary
function rename_vg {
@ -36,6 +36,7 @@ function rename_vg {
if [[ "${old_vg_name}" ]] && [[ "${vg_name}" != "${old_vg_name}" ]]; then
vgrename ${old_vg_name} ${vg_name}
echo "OSD Init: Renamed volume group ${old_vg_name} to ${vg_name}."
fi
}
@ -51,6 +52,7 @@ function rename_lvs {
if [[ "${old_lv_name}" ]] && [[ "${lv_name}" != "${old_lv_name}" ]]; then
lvrename ${vg_name} ${old_lv_name} ${lv_name}
echo "OSD Init: Renamed logical volume ${old_lv_name} (from group ${vg_name}) to ${lv_name}."
fi
# Rename the OSD's block.db volume if necessary, referenced by UUID
@ -66,6 +68,7 @@ function rename_lvs {
if [[ "${old_lv_name}" ]] && [[ "${db_name}" != "${old_lv_name}" ]]; then
lvrename ${db_vg} ${old_lv_name} ${db_name}
echo "OSD Init: Renamed DB logical volume ${old_lv_name} (from group ${db_vg}) to ${db_name}."
fi
fi
fi
@ -83,6 +86,7 @@ function rename_lvs {
if [[ "${old_lv_name}" ]] && [[ "${wal_name}" != "${old_lv_name}" ]]; then
lvrename ${wal_vg} ${old_lv_name} ${wal_name}
echo "OSD Init: Renamed WAL logical volume ${old_lv_name} (from group ${wal_vg}) to ${wal_name}."
fi
fi
fi
@ -124,80 +128,84 @@ function update_lv_tags {
lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv}
fi
lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv}
echo "OSD Init: Updated lv tags for data volume ${block_device}."
fi
if [[ "${db_device}" ]]; then
if [[ "${old_db_device}" ]]; then
lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv}
fi
lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv}
echo "OSD Init: Updated lv tags for DB volume ${db_device}."
fi
if [[ "${wal_device}" ]]; then
if [[ "${old_wal_device}" ]]; then
lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv}
fi
lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv}
echo "OSD Init: Updated lv tags for WAL volume ${wal_device}."
fi
done <<< ${volumes}
fi
}
function prep_device {
local BLOCK_DEVICE=$1
local BLOCK_DEVICE_SIZE=$2
local device_type=$3
local data_disk=$4
local vg_name lv_name VG DEVICE_OSD_ID logical_devices logical_volume
udev_settle
vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE})
lv_name=$(get_lv_name_from_device ${data_disk} ${device_type})
VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
if [[ "${VG}" ]]; then
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
CEPH_LVM_PREPARE=1
if [[ -n "${DEVICE_OSD_ID}" ]] && [[ -n "${OSD_ID}" ]]; then
if [[ "${DEVICE_OSD_ID}" == "${OSD_ID}" ]]; then
CEPH_LVM_PREPARE=0
else
disk_zap "${OSD_DEVICE}"
fi
fi
logical_volumes="$(lvs --noheadings -o lv_name ${VG} | xargs)"
for volume in ${logical_volumes}; do
data_volume=$(echo ${volume} | sed -E -e 's/-db-|-wal-/-lv-/g')
if [[ -z $(lvs --noheadings -o lv_name -S "lv_name=${data_volume}") ]]; then
# DB or WAL volume without a corresponding data volume, remove it
lvremove -y /dev/${VG}/${volume}
fi
done
else
if [[ "${vg_name}" ]]; then
logical_devices=$(get_dm_devices_from_osd_device "${data_disk}")
device_filter=$(echo "${vg_name}" | sed 's/-/--/g')
logical_devices=$(echo "${logical_devices}" | grep "${device_filter}" | xargs)
if [[ "$logical_devices" ]]; then
dmsetup remove $logical_devices
disk_zap "${OSD_DEVICE}"
CEPH_LVM_PREPARE=1
fi
fi
random_uuid=$(uuidgen)
vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}"
VG=$(get_vg_name_from_device ${BLOCK_DEVICE})
vgrename "ceph-vg-${random_uuid}" "${VG}"
function create_vg_if_needed {
local bl_device=$1
local vg_name=$(get_vg_name_from_device ${bl_device})
if [[ -z "${vg_name}" ]]; then
local random_uuid=$(uuidgen)
vgcreate ceph-vg-${random_uuid} ${bl_device}
vg_name=$(get_vg_name_from_device ${bl_device})
vgrename ceph-vg-${random_uuid} ${vg_name}
echo "OSD Init: Created volume group ${vg_name} for device ${bl_device}."
fi
udev_settle
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
if [[ $logical_volume != "${lv_name}" ]]; then
lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
RESULTING_VG=${vg_name}
}
function create_lv_if_needed {
local bl_device=$1
local vg_name=$2
local options=$3
local lv_name=${4:-$(get_lv_name_from_device ${bl_device} lv)}
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
lvcreate ${options} -n ${lv_name} ${vg_name}
echo "OSD Init: Created logical volume ${lv_name} in group ${vg_name} for device ${bl_device}."
fi
if [[ "${device_type}" == "db" ]]; then
BLOCK_DB="${VG}/${lv_name}"
elif [[ "${device_type}" == "wal" ]]; then
BLOCK_WAL="${VG}/${lv_name}"
RESULTING_LV=${vg_name}/${lv_name}
}
function osd_disk_prechecks {
if [[ -z "${OSD_DEVICE}" ]]; then
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
exit 1
fi
if [[ ! -b "${OSD_DEVICE}" ]]; then
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
exit 1
fi
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
exit 1
fi
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
}
function perform_zap {
if [[ ${ZAP_EXTRA_PARTITIONS} != "" ]]; then
# This used for filestore/blockstore only
echo "OSD Init: Zapping extra partitions ${ZAP_EXTRA_PARTITIONS}"
zap_extra_partitions "${ZAP_EXTRA_PARTITIONS}"
fi
echo "OSD Init: Zapping device ${OSD_DEVICE}..."
disk_zap ${OSD_DEVICE}
DISK_ZAPPED=1
udev_settle
}
#######################################################################
# Main program
#######################################################################
@ -213,11 +221,13 @@ if [[ "${STORAGE_TYPE}" != "directory" ]]; then
rename_vg ${OSD_DEVICE}
fi
# Rename block DB device VG next
if [[ "${BLOCK_DB}" ]]; then
BLOCK_DB=$(readlink -f ${BLOCK_DB})
rename_vg ${BLOCK_DB}
fi
# Rename block WAL device VG next
if [[ "${BLOCK_WAL}" ]]; then
BLOCK_WAL=$(readlink -f ${BLOCK_WAL})
rename_vg ${BLOCK_WAL}
@ -232,6 +242,25 @@ if [[ "${STORAGE_TYPE}" != "directory" ]]; then
# Settle LVM changes again after any changes have been made
udev_settle
# Check to make sure we have what we need to continue
osd_disk_prechecks
# Initialize some important global variables
CEPH_LVM_PREPARE=1
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
DISK_ZAPPED=0
ZAP_DEVICE=0
ZAP_EXTRA_PARTITIONS=""
# The disk may need to be zapped or some LVs may need to be deleted before
# moving on with the disk preparation.
determine_what_needs_zapping
if [[ ${ZAP_DEVICE} -eq 1 ]]; then
perform_zap
fi
# Prepare the disk for use
osd_disk_prepare
# Clean up resources held by the common script

View File

@ -22,4 +22,5 @@ ceph-osd:
- 0.1.19 Update rbac api version
- 0.1.20 Update directory-based OSD deployment for image changes
- 0.1.21 Refactor Ceph OSD Init Scripts - First PS
- 0.1.22 Refactor Ceph OSD Init Scripts - Second PS
...