Refactor Ceph OSD Init Scripts - Second PS
1) Removed some remaining unsupported ceph-disk related code. 2) Refactored the code that determines when a disk should be zapped. Now there will be only one place where disk_zap is called. 3) Refactored the code that determines when LVM prepare should be called. 4) Improved the logging within the OSD init files Change-Id: I194c82985f1f71b30d172f9e41438fa814500601
This commit is contained in:
parent
948e07e151
commit
17d9fe4de9
|
@ -15,6 +15,6 @@ apiVersion: v1
|
||||||
appVersion: v1.0.0
|
appVersion: v1.0.0
|
||||||
description: OpenStack-Helm Ceph OSD
|
description: OpenStack-Helm Ceph OSD
|
||||||
name: ceph-osd
|
name: ceph-osd
|
||||||
version: 0.1.21
|
version: 0.1.22
|
||||||
home: https://github.com/ceph/ceph
|
home: https://github.com/ceph/ceph
|
||||||
...
|
...
|
||||||
|
|
|
@ -43,45 +43,32 @@ if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CEPH_DISK_OPTIONS=""
|
ACTIVATE_OPTIONS=""
|
||||||
CEPH_OSD_OPTIONS=""
|
CEPH_OSD_OPTIONS=""
|
||||||
|
|
||||||
udev_settle
|
udev_settle
|
||||||
|
|
||||||
OSD_ID=$(ceph-volume inventory ${OSD_DEVICE} | grep "osd id" | awk '{print $3}')
|
OSD_ID=$(ceph-volume inventory ${OSD_DEVICE} | grep "osd id" | awk '{print $3}')
|
||||||
simple_activate=0
|
|
||||||
if [[ -z ${OSD_ID} ]]; then
|
if [[ -z ${OSD_ID} ]]; then
|
||||||
echo "Looks like ceph-disk has been used earlier to activate the OSD."
|
echo "OSD_ID not found from device ${OSD_DEVICE}"
|
||||||
tmpmnt=$(mktemp -d)
|
exit 1
|
||||||
mount ${OSD_DEVICE}1 ${tmpmnt}
|
|
||||||
OSD_ID=$(cat ${tmpmnt}/whoami)
|
|
||||||
umount ${tmpmnt}
|
|
||||||
simple_activate=1
|
|
||||||
fi
|
fi
|
||||||
OSD_FSID=$(ceph-volume inventory ${OSD_DEVICE} | grep "osd fsid" | awk '{print $3}')
|
OSD_FSID=$(ceph-volume inventory ${OSD_DEVICE} | grep "osd fsid" | awk '{print $3}')
|
||||||
if [[ -z ${OSD_FSID} ]]; then
|
if [[ -z ${OSD_FSID} ]]; then
|
||||||
echo "Looks like ceph-disk has been used earlier to activate the OSD."
|
echo "OSD_FSID not found from device ${OSD_DEVICE}"
|
||||||
tmpmnt=$(mktemp -d)
|
exit 1
|
||||||
mount ${OSD_DEVICE}1 ${tmpmnt}
|
|
||||||
OSD_FSID=$(cat ${tmpmnt}/fsid)
|
|
||||||
umount ${tmpmnt}
|
|
||||||
simple_activate=1
|
|
||||||
fi
|
fi
|
||||||
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
||||||
OSD_KEYRING="${OSD_PATH}/keyring"
|
OSD_KEYRING="${OSD_PATH}/keyring"
|
||||||
|
|
||||||
mkdir -p ${OSD_PATH}
|
mkdir -p ${OSD_PATH}
|
||||||
|
|
||||||
if [[ ${simple_activate} -eq 1 ]]; then
|
ceph-volume lvm -v \
|
||||||
ceph-volume simple activate --no-systemd ${OSD_ID} ${OSD_FSID}
|
--setuser ceph \
|
||||||
else
|
--setgroup disk \
|
||||||
ceph-volume lvm -v \
|
activate ${ACTIVATE_OPTIONS} \
|
||||||
--setuser ceph \
|
--auto-detect-objectstore \
|
||||||
--setgroup disk \
|
--no-systemd ${OSD_ID} ${OSD_FSID}
|
||||||
activate ${CEPH_DISK_OPTIONS} \
|
|
||||||
--auto-detect-objectstore \
|
|
||||||
--no-systemd ${OSD_ID} ${OSD_FSID}
|
|
||||||
fi
|
|
||||||
|
|
||||||
# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary)
|
# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary)
|
||||||
OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE})
|
OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE})
|
||||||
|
|
|
@ -32,60 +32,47 @@ if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CEPH_DISK_OPTIONS=""
|
ACTIVATE_OPTIONS=""
|
||||||
CEPH_OSD_OPTIONS=""
|
CEPH_OSD_OPTIONS=""
|
||||||
|
|
||||||
udev_settle
|
udev_settle
|
||||||
|
|
||||||
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
||||||
simple_activate=0
|
|
||||||
if [[ -z ${OSD_ID} ]]; then
|
if [[ -z ${OSD_ID} ]]; then
|
||||||
echo "Looks like ceph-disk has been used earlier to activate the OSD."
|
echo "OSD_ID not found from device ${OSD_DEVICE}"
|
||||||
tmpmnt=$(mktemp -d)
|
exit 1
|
||||||
mount ${OSD_DEVICE}1 ${tmpmnt}
|
|
||||||
OSD_ID=$(cat ${tmpmnt}/whoami)
|
|
||||||
umount ${tmpmnt}
|
|
||||||
simple_activate=1
|
|
||||||
fi
|
fi
|
||||||
OSD_FSID=$(get_osd_fsid_from_device ${OSD_DEVICE})
|
OSD_FSID=$(get_osd_fsid_from_device ${OSD_DEVICE})
|
||||||
if [[ -z ${OSD_FSID} ]]; then
|
if [[ -z ${OSD_FSID} ]]; then
|
||||||
echo "Looks like ceph-disk has been used earlier to activate the OSD."
|
echo "OSD_FSID not found from device ${OSD_DEVICE}"
|
||||||
tmpmnt=$(mktemp -d)
|
exit 1
|
||||||
mount ${OSD_DEVICE}1 ${tmpmnt}
|
|
||||||
OSD_FSID=$(cat ${tmpmnt}/fsid)
|
|
||||||
umount ${tmpmnt}
|
|
||||||
simple_activate=1
|
|
||||||
fi
|
fi
|
||||||
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
||||||
OSD_KEYRING="${OSD_PATH}/keyring"
|
OSD_KEYRING="${OSD_PATH}/keyring"
|
||||||
|
|
||||||
mkdir -p ${OSD_PATH}
|
mkdir -p ${OSD_PATH}
|
||||||
|
|
||||||
if [[ ${simple_activate} -eq 1 ]]; then
|
ceph-volume lvm -v \
|
||||||
ceph-volume simple activate --no-systemd ${OSD_ID} ${OSD_FSID}
|
--setuser ceph \
|
||||||
else
|
--setgroup disk \
|
||||||
ceph-volume lvm -v \
|
activate ${ACTIVATE_OPTIONS} \
|
||||||
--setuser ceph \
|
--auto-detect-objectstore \
|
||||||
--setgroup disk \
|
--no-systemd ${OSD_ID} ${OSD_FSID}
|
||||||
activate ${CEPH_DISK_OPTIONS} \
|
# Cross check the db and wal symlinks if missed
|
||||||
--auto-detect-objectstore \
|
DB_DEV=$(get_osd_db_device_from_device ${OSD_DEVICE})
|
||||||
--no-systemd ${OSD_ID} ${OSD_FSID}
|
if [[ ! -z ${DB_DEV} ]]; then
|
||||||
# Cross check the db and wal symlinks if missed
|
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.db ]]; then
|
||||||
DB_DEV=$(get_osd_db_device_from_device ${OSD_DEVICE})
|
ln -snf ${DB_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
|
||||||
if [[ ! -z ${DB_DEV} ]]; then
|
chown -h ceph:ceph ${DB_DEV}
|
||||||
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.db ]]; then
|
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
|
||||||
ln -snf ${DB_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
|
|
||||||
chown -h ceph:ceph ${DB_DEV}
|
|
||||||
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.db
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
WAL_DEV=$(get_osd_wal_device_from_device ${OSD_DEVICE})
|
fi
|
||||||
if [[ ! -z ${WAL_DEV} ]]; then
|
WAL_DEV=$(get_osd_wal_device_from_device ${OSD_DEVICE})
|
||||||
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal ]]; then
|
if [[ ! -z ${WAL_DEV} ]]; then
|
||||||
ln -snf ${WAL_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
|
if [[ ! -h /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal ]]; then
|
||||||
chown -h ceph:ceph ${WAL_DEV}
|
ln -snf ${WAL_DEV} /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
|
||||||
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
|
chown -h ceph:ceph ${WAL_DEV}
|
||||||
fi
|
chown -h ceph:ceph /var/lib/ceph/osd/ceph-${OSD_ID}/block.wal
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -310,7 +310,7 @@ function zap_extra_partitions {
|
||||||
}
|
}
|
||||||
|
|
||||||
function disk_zap {
|
function disk_zap {
|
||||||
# Run all the commands that ceph-disk zap uses to clear a disk
|
# Run all the commands to clear a disk
|
||||||
local device=${1}
|
local device=${1}
|
||||||
local dm_devices=$(get_dm_devices_from_osd_device "${device}" | xargs)
|
local dm_devices=$(get_dm_devices_from_osd_device "${device}" | xargs)
|
||||||
for dm_device in ${dm_devices}; do
|
for dm_device in ${dm_devices}; do
|
||||||
|
|
|
@ -28,169 +28,113 @@ else
|
||||||
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
|
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
|
||||||
fi
|
fi
|
||||||
|
|
||||||
function osd_disk_prepare {
|
# Check OSD FSID and journalling metadata
|
||||||
if [[ -z "${OSD_DEVICE}" ]]; then
|
# Returns 1 if the disk should be zapped; 0 otherwise.
|
||||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
function check_osd_metadata {
|
||||||
exit 1
|
local ceph_fsid=$1
|
||||||
|
retcode=0
|
||||||
|
local tmpmnt=$(mktemp -d)
|
||||||
|
mount ${DM_DEV} ${tmpmnt}
|
||||||
|
|
||||||
|
if [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||||
|
if [ -f "${tmpmnt}/whoami" ]; then
|
||||||
|
OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal")
|
||||||
|
local osd_id=$(cat "${tmpmnt}/whoami")
|
||||||
|
if [ ! -b "${OSD_JOURNAL_DISK}" ]; then
|
||||||
|
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||||
|
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||||
|
if [ ${jdev} == ${OSD_JOURNAL} ]; then
|
||||||
|
echo "OSD Init: It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}."
|
||||||
|
echo "OSD Init: Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||||
|
rm -rf ${tmpmnt}/ceph_fsid
|
||||||
|
else
|
||||||
|
echo "OSD Init: It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}."
|
||||||
|
echo "OSD Init: Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will"
|
||||||
|
echo "OSD Init: attempt to recreate the missing journal device partitions."
|
||||||
|
osd_journal_create ${OSD_JOURNAL}
|
||||||
|
ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal
|
||||||
|
echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid
|
||||||
|
chown ceph. ${OSD_JOURNAL}
|
||||||
|
# During OSD start we will format the journal and set the fsid
|
||||||
|
touch ${tmpmnt}/run_mkjournal
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "OSD Init: It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata."
|
||||||
|
echo "OSD Init: The device may contain inconsistent metadata or be corrupted."
|
||||||
|
echo "OSD Init: Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||||
|
rm -rf ${tmpmnt}/ceph_fsid
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
||||||
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
|
local osd_fsid=$(cat "${tmpmnt}/ceph_fsid")
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
|
if [ ${osd_fsid} != ${ceph_fsid} ]; then
|
||||||
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
|
echo "OSD Init: ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
||||||
exit 1
|
echo "OSD Init: The OSD FSID is ${osd_fsid} while this cluster is ${ceph_fsid}"
|
||||||
|
echo "OSD Init: Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||||
|
ZAP_EXTRA_PARTITIONS=${tmpmnt}
|
||||||
|
retcode=1
|
||||||
|
else
|
||||||
|
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
||||||
|
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
||||||
|
echo "Moving on, trying to activate the OSD now."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "OSD Init: ${OSD_DEVICE} has a ceph data partition but no FSID."
|
||||||
|
echo "OSD Init: Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||||
|
ZAP_EXTRA_PARTITIONS=${tmpmnt}
|
||||||
|
retcode=1
|
||||||
fi
|
fi
|
||||||
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
|
umount ${tmpmnt}
|
||||||
|
return ${retcode}
|
||||||
|
}
|
||||||
|
|
||||||
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
|
function determine_what_needs_zapping {
|
||||||
CEPH_DISK_USED=0
|
|
||||||
CEPH_LVM_PREPARE=1
|
|
||||||
udev_settle
|
|
||||||
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
|
||||||
OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE})
|
|
||||||
CLUSTER_FSID=$(ceph-conf --lookup fsid)
|
|
||||||
DISK_ZAPPED=0
|
|
||||||
|
|
||||||
if [[ ! -z ${OSD_ID} ]]; then
|
if [[ ! -z ${OSD_ID} ]]; then
|
||||||
DM_NUM=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1)
|
local dm_num=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1)
|
||||||
DM_DEV="/dev/dm-"${DM_NUM}
|
DM_DEV="/dev/dm-"${dm_num}
|
||||||
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
||||||
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
# Ceph-disk was used to initialize the disk, but this is not supported
|
||||||
CEPH_DISK_USED=1
|
echo "OSD Init: ceph-disk was used to initialize the disk, but this is no longer supported"
|
||||||
|
exit 1
|
||||||
else
|
else
|
||||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||||
echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway"
|
echo "OSD Init: It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway"
|
||||||
disk_zap ${OSD_DEVICE}
|
ZAP_DEVICE=1
|
||||||
DISK_ZAPPED=1
|
|
||||||
else
|
else
|
||||||
echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
|
echo "OSD Init: Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
|
||||||
echo "It would be too dangerous to destroy it without any notification."
|
echo "OSD Init: It would be too dangerous to destroy it without any notification."
|
||||||
echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk."
|
echo "OSD Init: Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
|
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
|
||||||
if [ -b $DM_DEV ]; then
|
if [ -b $DM_DEV ]; then
|
||||||
local cephFSID=$(ceph-conf --lookup fsid)
|
local ceph_fsid=$(ceph-conf --lookup fsid)
|
||||||
if [ ! -z "${cephFSID}" ]; then
|
if [ ! -z "${ceph_fsid}" ]; then
|
||||||
local tmpmnt=$(mktemp -d)
|
# Check the OSD metadata and zap the disk if necessary
|
||||||
mount ${DM_DEV} ${tmpmnt}
|
if [[ $(check_osd_metadata ${ceph_fsid}) -eq 1 ]]; then
|
||||||
if [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
echo "OSD Init: ${OSD_DEVICE} needs to be zapped..."
|
||||||
if [ -f "${tmpmnt}/whoami" ]; then
|
ZAP_DEVICE=1
|
||||||
OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal")
|
|
||||||
local osd_id=$(cat "${tmpmnt}/whoami")
|
|
||||||
if [ ! -b "${OSD_JOURNAL_DISK}" ]; then
|
|
||||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
|
||||||
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
|
||||||
if [ ${jdev} == ${OSD_JOURNAL} ]; then
|
|
||||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}."
|
|
||||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
|
||||||
rm -rf ${tmpmnt}/ceph_fsid
|
|
||||||
else
|
|
||||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}."
|
|
||||||
echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will"
|
|
||||||
echo "attempt to recreate the missing journal device partitions."
|
|
||||||
osd_journal_create ${OSD_JOURNAL}
|
|
||||||
ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal
|
|
||||||
echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid
|
|
||||||
chown ceph. ${OSD_JOURNAL}
|
|
||||||
# During OSD start we will format the journal and set the fsid
|
|
||||||
touch ${tmpmnt}/run_mkjournal
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata."
|
|
||||||
echo "The device may contain inconsistent metadata or be corrupted."
|
|
||||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
|
||||||
rm -rf ${tmpmnt}/ceph_fsid
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
|
||||||
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
|
|
||||||
if [ ${osdFSID} != ${cephFSID} ]; then
|
|
||||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
|
||||||
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
|
|
||||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
|
||||||
zap_extra_partitions ${tmpmnt}
|
|
||||||
umount ${tmpmnt}
|
|
||||||
disk_zap ${OSD_DEVICE}
|
|
||||||
else
|
|
||||||
umount ${tmpmnt}
|
|
||||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
|
||||||
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
|
||||||
echo "Moving on, trying to activate the OSD now."
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
|
|
||||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
|
||||||
zap_extra_partitions ${tmpmnt}
|
|
||||||
umount ${tmpmnt}
|
|
||||||
disk_zap ${OSD_DEVICE}
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Unable to determine the FSID of the current cluster."
|
echo "Unable to determine the FSID of the current cluster."
|
||||||
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
|
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
|
||||||
echo "Moving on, trying to activate the OSD now."
|
echo "Moving on, trying to activate the OSD now."
|
||||||
return
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "parted says ${DM_DEV} should exist, but we do not see it."
|
echo "parted says ${DM_DEV} should exist, but we do not see it."
|
||||||
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
|
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
|
||||||
echo "Moving on, trying to activate the OSD now."
|
echo "Moving on, trying to activate the OSD now."
|
||||||
return
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
|
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
|
||||||
echo "Moving on, trying to prepare and activate the OSD LVM now."
|
echo "Moving on, trying to prepare and activate the OSD LVM now."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
|
||||||
udev_settle
|
|
||||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
|
|
||||||
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
|
||||||
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
|
||||||
udev_settle
|
|
||||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
|
||||||
if [[ "${vg_name}" ]]; then
|
|
||||||
OSD_VG=${vg_name}
|
|
||||||
else
|
|
||||||
random_uuid=$(uuidgen)
|
|
||||||
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
|
|
||||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
|
||||||
vgrename ceph-vg-${random_uuid} ${vg_name}
|
|
||||||
OSD_VG=${vg_name}
|
|
||||||
fi
|
|
||||||
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
|
|
||||||
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
|
||||||
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
|
|
||||||
fi
|
|
||||||
OSD_LV=${OSD_VG}/${lv_name}
|
|
||||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
|
|
||||||
CEPH_LVM_PREPARE=1
|
|
||||||
udev_settle
|
|
||||||
fi
|
|
||||||
if [ ${CEPH_DISK_USED} -eq 0 ] ; then
|
|
||||||
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
|
||||||
CEPH_LVM_PREPARE=0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
osd_journal_prepare
|
|
||||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}"
|
|
||||||
udev_settle
|
|
||||||
|
|
||||||
if [ ! -z "$DEVICE_CLASS" ]; then
|
|
||||||
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
|
|
||||||
ceph-volume lvm -v prepare ${CLI_OPTS}
|
|
||||||
udev_settle
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function osd_journal_create {
|
function osd_journal_create {
|
||||||
|
@ -205,7 +149,7 @@ function osd_journal_create {
|
||||||
OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition})
|
OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition})
|
||||||
udev_settle
|
udev_settle
|
||||||
else
|
else
|
||||||
echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system."
|
echo "OSD Init: The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
@ -235,3 +179,36 @@ function osd_journal_prepare {
|
||||||
fi
|
fi
|
||||||
CLI_OPTS="${CLI_OPTS} --filestore"
|
CLI_OPTS="${CLI_OPTS} --filestore"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function osd_disk_prepare {
|
||||||
|
|
||||||
|
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
||||||
|
udev_settle
|
||||||
|
RESULTING_VG=""; RESULTING_LV="";
|
||||||
|
create_vg_if_needed "${OSD_DEVICE}"
|
||||||
|
create_lv_if_needed "${OSD_DEVICE}" "${RESULTING_VG}" "--yes -l 100%FREE"
|
||||||
|
|
||||||
|
CLI_OPTS="${CLI_OPTS} --data ${RESULTING_LV}"
|
||||||
|
CEPH_LVM_PREPARE=1
|
||||||
|
udev_settle
|
||||||
|
fi
|
||||||
|
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||||
|
echo "OSD Init: Device is already set up. LVM prepare does not need to be called."
|
||||||
|
CEPH_LVM_PREPARE=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
osd_journal_prepare
|
||||||
|
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}"
|
||||||
|
udev_settle
|
||||||
|
|
||||||
|
if [ ! -z "$DEVICE_CLASS" ]; then
|
||||||
|
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
|
||||||
|
echo "OSD Init: Calling ceph-volume lvm-v prepare ${CLI_OPTS}"
|
||||||
|
ceph-volume lvm -v prepare ${CLI_OPTS}
|
||||||
|
udev_settle
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,156 +18,139 @@ set -ex
|
||||||
|
|
||||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||||
export OSD_BLUESTORE=1
|
export OSD_BLUESTORE=1
|
||||||
|
alias prep_device='locked prep_device'
|
||||||
|
|
||||||
function osd_disk_prepare {
|
function check_block_device_for_zap {
|
||||||
if [[ -z "${OSD_DEVICE}" ]]; then
|
local block_device=$1
|
||||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
local device_type=$2
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
if [[ ${block_device} ]]; then
|
||||||
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
|
local vg_name=$(get_vg_name_from_device ${block_device})
|
||||||
exit 1
|
local lv_name=$(get_lv_name_from_device ${OSD_DEVICE} ${device_type})
|
||||||
fi
|
local vg=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
|
||||||
|
if [[ "${vg}" ]]; then
|
||||||
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
|
local device_osd_id=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
|
||||||
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
|
CEPH_LVM_PREPARE=1
|
||||||
exit 1
|
if [[ -n "${device_osd_id}" ]] && [[ -n "${OSD_ID}" ]]; then
|
||||||
fi
|
if [[ "${device_osd_id}" == "${OSD_ID}" ]]; then
|
||||||
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
|
echo "OSD Init: OSD ID matches the OSD ID already on the data volume. LVM prepare does not need to be called."
|
||||||
|
|
||||||
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
|
|
||||||
CEPH_DISK_USED=0
|
|
||||||
CEPH_LVM_PREPARE=1
|
|
||||||
udev_settle
|
|
||||||
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
|
||||||
OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE})
|
|
||||||
CLUSTER_FSID=$(ceph-conf --lookup fsid)
|
|
||||||
DISK_ZAPPED=0
|
|
||||||
|
|
||||||
if [[ ! -z "${OSD_FSID}" ]]; then
|
|
||||||
if [[ "${OSD_FSID}" == "${CLUSTER_FSID}" ]]; then
|
|
||||||
if [[ ! -z "${OSD_ID}" ]]; then
|
|
||||||
if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then
|
|
||||||
echo "Running bluestore mode and ${OSD_DEVICE} already bootstrapped"
|
|
||||||
CEPH_LVM_PREPARE=0
|
CEPH_LVM_PREPARE=0
|
||||||
elif [[ $OSD_FORCE_REPAIR -eq 1 ]]; then
|
else
|
||||||
|
echo "OSD Init: OSD ID does match the OSD ID on the data volume. Device needs to be zapped."
|
||||||
|
ZAP_DEVICE=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if this device (db or wal) has no associated data volume
|
||||||
|
local logical_volumes="$(lvs --noheadings -o lv_name ${vg} | xargs)"
|
||||||
|
for volume in ${logical_volumes}; do
|
||||||
|
local data_volume=$(echo ${volume} | sed -E -e 's/-db-|-wal-/-lv-/g')
|
||||||
|
if [[ -z $(lvs --noheadings -o lv_name -S "lv_name=${data_volume}") ]]; then
|
||||||
|
# DB or WAL volume without a corresponding data volume, remove it
|
||||||
|
lvremove -y /dev/${vg}/${volume}
|
||||||
|
echo "OSD Init: LV /dev/${vg}/${volume} was removed as it did not have a data volume."
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
|
if [[ "${vg_name}" ]]; then
|
||||||
|
local logical_devices=$(get_dm_devices_from_osd_device "${OSD_DEVICE}")
|
||||||
|
local device_filter=$(echo "${vg_name}" | sed 's/-/--/g')
|
||||||
|
local logical_devices=$(echo "${logical_devices}" | grep "${device_filter}" | xargs)
|
||||||
|
if [[ "$logical_devices" ]]; then
|
||||||
|
echo "OSD Init: No VG resources found with name ${vg_name}. Device needs to be zapped."
|
||||||
|
ZAP_DEVICE=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function determine_what_needs_zapping {
|
||||||
|
|
||||||
|
local osd_fsid=$(get_cluster_fsid_from_device ${OSD_DEVICE})
|
||||||
|
local cluster_fsid=$(ceph-conf --lookup fsid)
|
||||||
|
|
||||||
|
# If the OSD FSID is defined within the device, check if we're already bootstrapped.
|
||||||
|
if [[ ! -z "${osd_fsid}" ]]; then
|
||||||
|
# Check if the OSD FSID is the same as the cluster FSID. If so, then we're
|
||||||
|
# already bootstrapped; otherwise, this is an old disk and needs to
|
||||||
|
# be zapped.
|
||||||
|
if [[ "${osd_fsid}" == "${cluster_fsid}" ]]; then
|
||||||
|
if [[ ! -z "${OSD_ID}" ]]; then
|
||||||
|
# Check to see what needs to be done to prepare the disk. If the OSD
|
||||||
|
# ID is in the Ceph OSD list, then LVM prepare does not need to be done.
|
||||||
|
if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then
|
||||||
|
echo "OSD Init: Running bluestore mode and ${OSD_DEVICE} already bootstrapped. LVM prepare does not need to be called."
|
||||||
|
CEPH_LVM_PREPARE=0
|
||||||
|
elif [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||||
echo "OSD initialized for this cluster, but OSD ID not found in the cluster, reinitializing"
|
echo "OSD initialized for this cluster, but OSD ID not found in the cluster, reinitializing"
|
||||||
else
|
else
|
||||||
echo "OSD initialized for this cluster, but OSD ID not found in the cluster"
|
echo "OSD initialized for this cluster, but OSD ID not found in the cluster"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "OSD initialized for a different cluster, zapping it"
|
echo "OSD Init: OSD FSID ${osd_fsid} initialized for a different cluster. It needs to be zapped."
|
||||||
disk_zap ${OSD_DEVICE}
|
ZAP_DEVICE=1
|
||||||
udev_settle
|
|
||||||
fi
|
fi
|
||||||
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
||||||
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
# Ceph-disk was used to initialize the disk, but this is not supported
|
||||||
CEPH_DISK_USED=1
|
echo "ceph-disk was used to initialize the disk, but this is no longer supported"
|
||||||
else
|
exit 1
|
||||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
|
||||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
|
||||||
echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled"
|
|
||||||
disk_zap ${OSD_DEVICE}
|
|
||||||
else
|
|
||||||
echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled."
|
|
||||||
echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
|
check_block_device_for_zap "${BLOCK_DB}" db
|
||||||
if [ -b $DM_DEV ]; then
|
check_block_device_for_zap "${BLOCK_WAL}" wal
|
||||||
local cephFSID=$(ceph-conf --lookup fsid)
|
|
||||||
if [ ! -z "${cephFSID}" ]; then
|
|
||||||
local tmpmnt=$(mktemp -d)
|
|
||||||
mount ${DM_DEV} ${tmpmnt}
|
|
||||||
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
|
||||||
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
|
|
||||||
if [ ${osdFSID} != ${cephFSID} ]; then
|
|
||||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
|
||||||
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
|
|
||||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
|
||||||
zap_extra_partitions ${tmpmnt}
|
|
||||||
umount ${tmpmnt}
|
|
||||||
disk_zap ${OSD_DEVICE}
|
|
||||||
else
|
|
||||||
umount ${tmpmnt}
|
|
||||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
|
||||||
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
|
||||||
echo "Moving on, trying to activate the OSD now."
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
|
|
||||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
|
||||||
zap_extra_partitions ${tmpmnt}
|
|
||||||
umount ${tmpmnt}
|
|
||||||
disk_zap ${OSD_DEVICE}
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Unable to determine the FSID of the current cluster."
|
|
||||||
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
|
|
||||||
echo "Moving on, trying to activate the OSD now."
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "parted says ${DM_DEV} should exist, but we do not see it."
|
|
||||||
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
|
|
||||||
echo "Moving on, trying to activate the OSD now."
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
|
|
||||||
echo "Moving on, trying to prepare and activate the OSD LVM now."
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
# Zapping extra partitions isn't done for bluestore
|
||||||
|
ZAP_EXTRA_PARTITIONS=0
|
||||||
|
}
|
||||||
|
|
||||||
|
function prep_device {
|
||||||
|
local block_device=$1
|
||||||
|
local block_device_size=$2
|
||||||
|
local device_type=$3
|
||||||
|
local vg_name lv_name vg device_osd_id logical_devices logical_volume
|
||||||
|
RESULTING_VG=""; RESULTING_LV="";
|
||||||
|
|
||||||
|
udev_settle
|
||||||
|
vg_name=$(get_vg_name_from_device ${block_device})
|
||||||
|
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} ${device_type})
|
||||||
|
vg=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
|
||||||
|
if [[ -z "${vg}" ]]; then
|
||||||
|
create_vg_if_needed "${block_device}"
|
||||||
|
vg=${RESULTING_VG}
|
||||||
|
fi
|
||||||
|
udev_settle
|
||||||
|
|
||||||
|
create_lv_if_needed "${block_device}" "${vg}" "-L ${block_device_size}" "${lv_name}"
|
||||||
|
if [[ "${device_type}" == "db" ]]; then
|
||||||
|
BLOCK_DB=${RESULTING_LV}
|
||||||
|
elif [[ "${device_type}" == "wal" ]]; then
|
||||||
|
BLOCK_WAL=${RESULTING_LV}
|
||||||
|
fi
|
||||||
|
udev_settle
|
||||||
|
}
|
||||||
|
|
||||||
|
function osd_disk_prepare {
|
||||||
|
|
||||||
|
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
||||||
udev_settle
|
udev_settle
|
||||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
|
RESULTING_VG=""; RESULTING_LV="";
|
||||||
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
create_vg_if_needed "${OSD_DEVICE}"
|
||||||
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
create_lv_if_needed "${OSD_DEVICE}" "${RESULTING_VG}" "--yes -l 100%FREE"
|
||||||
udev_settle
|
|
||||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
CLI_OPTS="${CLI_OPTS} --data ${RESULTING_LV}"
|
||||||
if [[ "${vg_name}" ]]; then
|
|
||||||
OSD_VG=${vg_name}
|
|
||||||
else
|
|
||||||
random_uuid=$(uuidgen)
|
|
||||||
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
|
|
||||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
|
||||||
vgrename ceph-vg-${random_uuid} ${vg_name}
|
|
||||||
OSD_VG=${vg_name}
|
|
||||||
fi
|
|
||||||
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
|
|
||||||
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
|
||||||
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
|
|
||||||
fi
|
|
||||||
OSD_LV=${OSD_VG}/${lv_name}
|
|
||||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
|
|
||||||
CEPH_LVM_PREPARE=1
|
CEPH_LVM_PREPARE=1
|
||||||
udev_settle
|
udev_settle
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ${CEPH_DISK_USED} -eq 0 ]; then
|
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||||
if [[ ${BLOCK_DB} ]]; then
|
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||||
block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2 "-" $3}')
|
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||||
fi
|
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||||
if [[ ${BLOCK_WAL} ]]; then
|
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||||
block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}')
|
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
|
||||||
fi
|
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||||
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
|
||||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
|
||||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
|
||||||
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
|
||||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
|
||||||
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
|
|
||||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
|
||||||
CEPH_LVM_PREPARE=0
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CLI_OPTS="${CLI_OPTS} --bluestore"
|
CLI_OPTS="${CLI_OPTS} --bluestore"
|
||||||
|
@ -185,6 +168,7 @@ function osd_disk_prepare {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
|
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
|
||||||
|
echo "OSD Init: Calling ceph-volume lvm-v prepare ${CLI_OPTS}"
|
||||||
ceph-volume lvm -v prepare ${CLI_OPTS}
|
ceph-volume lvm -v prepare ${CLI_OPTS}
|
||||||
udev_settle
|
udev_settle
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -16,17 +16,17 @@ limitations under the License.
|
||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
|
: "${OSD_FORCE_REPAIR:=0}"
|
||||||
|
|
||||||
source /tmp/osd-common-ceph-volume.sh
|
source /tmp/osd-common-ceph-volume.sh
|
||||||
|
|
||||||
source /tmp/init-ceph-volume-helper-${STORAGE_TYPE}.sh
|
source /tmp/init-ceph-volume-helper-${STORAGE_TYPE}.sh
|
||||||
|
|
||||||
: "${OSD_FORCE_REPAIR:=0}"
|
|
||||||
|
|
||||||
# Set up aliases for functions that require disk synchronization
|
# Set up aliases for functions that require disk synchronization
|
||||||
alias rename_vg='locked rename_vg'
|
alias rename_vg='locked rename_vg'
|
||||||
alias rename_lvs='locked rename_lvs'
|
alias rename_lvs='locked rename_lvs'
|
||||||
alias update_lv_tags='locked update_lv_tags'
|
alias update_lv_tags='locked update_lv_tags'
|
||||||
alias prep_device='locked prep_device'
|
|
||||||
|
|
||||||
# Renames a single VG if necessary
|
# Renames a single VG if necessary
|
||||||
function rename_vg {
|
function rename_vg {
|
||||||
|
@ -36,6 +36,7 @@ function rename_vg {
|
||||||
|
|
||||||
if [[ "${old_vg_name}" ]] && [[ "${vg_name}" != "${old_vg_name}" ]]; then
|
if [[ "${old_vg_name}" ]] && [[ "${vg_name}" != "${old_vg_name}" ]]; then
|
||||||
vgrename ${old_vg_name} ${vg_name}
|
vgrename ${old_vg_name} ${vg_name}
|
||||||
|
echo "OSD Init: Renamed volume group ${old_vg_name} to ${vg_name}."
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,6 +52,7 @@ function rename_lvs {
|
||||||
|
|
||||||
if [[ "${old_lv_name}" ]] && [[ "${lv_name}" != "${old_lv_name}" ]]; then
|
if [[ "${old_lv_name}" ]] && [[ "${lv_name}" != "${old_lv_name}" ]]; then
|
||||||
lvrename ${vg_name} ${old_lv_name} ${lv_name}
|
lvrename ${vg_name} ${old_lv_name} ${lv_name}
|
||||||
|
echo "OSD Init: Renamed logical volume ${old_lv_name} (from group ${vg_name}) to ${lv_name}."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Rename the OSD's block.db volume if necessary, referenced by UUID
|
# Rename the OSD's block.db volume if necessary, referenced by UUID
|
||||||
|
@ -66,6 +68,7 @@ function rename_lvs {
|
||||||
|
|
||||||
if [[ "${old_lv_name}" ]] && [[ "${db_name}" != "${old_lv_name}" ]]; then
|
if [[ "${old_lv_name}" ]] && [[ "${db_name}" != "${old_lv_name}" ]]; then
|
||||||
lvrename ${db_vg} ${old_lv_name} ${db_name}
|
lvrename ${db_vg} ${old_lv_name} ${db_name}
|
||||||
|
echo "OSD Init: Renamed DB logical volume ${old_lv_name} (from group ${db_vg}) to ${db_name}."
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
@ -83,6 +86,7 @@ function rename_lvs {
|
||||||
|
|
||||||
if [[ "${old_lv_name}" ]] && [[ "${wal_name}" != "${old_lv_name}" ]]; then
|
if [[ "${old_lv_name}" ]] && [[ "${wal_name}" != "${old_lv_name}" ]]; then
|
||||||
lvrename ${wal_vg} ${old_lv_name} ${wal_name}
|
lvrename ${wal_vg} ${old_lv_name} ${wal_name}
|
||||||
|
echo "OSD Init: Renamed WAL logical volume ${old_lv_name} (from group ${wal_vg}) to ${wal_name}."
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
@ -124,80 +128,84 @@ function update_lv_tags {
|
||||||
lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv}
|
lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv}
|
||||||
fi
|
fi
|
||||||
lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv}
|
lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv}
|
||||||
|
echo "OSD Init: Updated lv tags for data volume ${block_device}."
|
||||||
fi
|
fi
|
||||||
if [[ "${db_device}" ]]; then
|
if [[ "${db_device}" ]]; then
|
||||||
if [[ "${old_db_device}" ]]; then
|
if [[ "${old_db_device}" ]]; then
|
||||||
lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv}
|
lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv}
|
||||||
fi
|
fi
|
||||||
lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv}
|
lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv}
|
||||||
|
echo "OSD Init: Updated lv tags for DB volume ${db_device}."
|
||||||
fi
|
fi
|
||||||
if [[ "${wal_device}" ]]; then
|
if [[ "${wal_device}" ]]; then
|
||||||
if [[ "${old_wal_device}" ]]; then
|
if [[ "${old_wal_device}" ]]; then
|
||||||
lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv}
|
lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv}
|
||||||
fi
|
fi
|
||||||
lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv}
|
lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv}
|
||||||
|
echo "OSD Init: Updated lv tags for WAL volume ${wal_device}."
|
||||||
fi
|
fi
|
||||||
done <<< ${volumes}
|
done <<< ${volumes}
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
function prep_device {
|
function create_vg_if_needed {
|
||||||
local BLOCK_DEVICE=$1
|
local bl_device=$1
|
||||||
local BLOCK_DEVICE_SIZE=$2
|
local vg_name=$(get_vg_name_from_device ${bl_device})
|
||||||
local device_type=$3
|
if [[ -z "${vg_name}" ]]; then
|
||||||
local data_disk=$4
|
local random_uuid=$(uuidgen)
|
||||||
local vg_name lv_name VG DEVICE_OSD_ID logical_devices logical_volume
|
vgcreate ceph-vg-${random_uuid} ${bl_device}
|
||||||
udev_settle
|
vg_name=$(get_vg_name_from_device ${bl_device})
|
||||||
vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE})
|
vgrename ceph-vg-${random_uuid} ${vg_name}
|
||||||
lv_name=$(get_lv_name_from_device ${data_disk} ${device_type})
|
echo "OSD Init: Created volume group ${vg_name} for device ${bl_device}."
|
||||||
VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
|
|
||||||
if [[ "${VG}" ]]; then
|
|
||||||
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
|
|
||||||
CEPH_LVM_PREPARE=1
|
|
||||||
if [[ -n "${DEVICE_OSD_ID}" ]] && [[ -n "${OSD_ID}" ]]; then
|
|
||||||
if [[ "${DEVICE_OSD_ID}" == "${OSD_ID}" ]]; then
|
|
||||||
CEPH_LVM_PREPARE=0
|
|
||||||
else
|
|
||||||
disk_zap "${OSD_DEVICE}"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
logical_volumes="$(lvs --noheadings -o lv_name ${VG} | xargs)"
|
|
||||||
for volume in ${logical_volumes}; do
|
|
||||||
data_volume=$(echo ${volume} | sed -E -e 's/-db-|-wal-/-lv-/g')
|
|
||||||
if [[ -z $(lvs --noheadings -o lv_name -S "lv_name=${data_volume}") ]]; then
|
|
||||||
# DB or WAL volume without a corresponding data volume, remove it
|
|
||||||
lvremove -y /dev/${VG}/${volume}
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
else
|
|
||||||
if [[ "${vg_name}" ]]; then
|
|
||||||
logical_devices=$(get_dm_devices_from_osd_device "${data_disk}")
|
|
||||||
device_filter=$(echo "${vg_name}" | sed 's/-/--/g')
|
|
||||||
logical_devices=$(echo "${logical_devices}" | grep "${device_filter}" | xargs)
|
|
||||||
if [[ "$logical_devices" ]]; then
|
|
||||||
dmsetup remove $logical_devices
|
|
||||||
disk_zap "${OSD_DEVICE}"
|
|
||||||
CEPH_LVM_PREPARE=1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
random_uuid=$(uuidgen)
|
|
||||||
vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}"
|
|
||||||
VG=$(get_vg_name_from_device ${BLOCK_DEVICE})
|
|
||||||
vgrename "ceph-vg-${random_uuid}" "${VG}"
|
|
||||||
fi
|
fi
|
||||||
udev_settle
|
RESULTING_VG=${vg_name}
|
||||||
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
|
}
|
||||||
if [[ $logical_volume != "${lv_name}" ]]; then
|
|
||||||
lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
|
function create_lv_if_needed {
|
||||||
|
local bl_device=$1
|
||||||
|
local vg_name=$2
|
||||||
|
local options=$3
|
||||||
|
local lv_name=${4:-$(get_lv_name_from_device ${bl_device} lv)}
|
||||||
|
|
||||||
|
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
||||||
|
lvcreate ${options} -n ${lv_name} ${vg_name}
|
||||||
|
echo "OSD Init: Created logical volume ${lv_name} in group ${vg_name} for device ${bl_device}."
|
||||||
fi
|
fi
|
||||||
if [[ "${device_type}" == "db" ]]; then
|
RESULTING_LV=${vg_name}/${lv_name}
|
||||||
BLOCK_DB="${VG}/${lv_name}"
|
}
|
||||||
elif [[ "${device_type}" == "wal" ]]; then
|
|
||||||
BLOCK_WAL="${VG}/${lv_name}"
|
function osd_disk_prechecks {
|
||||||
|
if [[ -z "${OSD_DEVICE}" ]]; then
|
||||||
|
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||||
|
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
|
||||||
|
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
function perform_zap {
|
||||||
|
if [[ ${ZAP_EXTRA_PARTITIONS} != "" ]]; then
|
||||||
|
# This used for filestore/blockstore only
|
||||||
|
echo "OSD Init: Zapping extra partitions ${ZAP_EXTRA_PARTITIONS}"
|
||||||
|
zap_extra_partitions "${ZAP_EXTRA_PARTITIONS}"
|
||||||
|
fi
|
||||||
|
echo "OSD Init: Zapping device ${OSD_DEVICE}..."
|
||||||
|
disk_zap ${OSD_DEVICE}
|
||||||
|
DISK_ZAPPED=1
|
||||||
udev_settle
|
udev_settle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#######################################################################
|
#######################################################################
|
||||||
# Main program
|
# Main program
|
||||||
#######################################################################
|
#######################################################################
|
||||||
|
@ -213,11 +221,13 @@ if [[ "${STORAGE_TYPE}" != "directory" ]]; then
|
||||||
rename_vg ${OSD_DEVICE}
|
rename_vg ${OSD_DEVICE}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Rename block DB device VG next
|
||||||
if [[ "${BLOCK_DB}" ]]; then
|
if [[ "${BLOCK_DB}" ]]; then
|
||||||
BLOCK_DB=$(readlink -f ${BLOCK_DB})
|
BLOCK_DB=$(readlink -f ${BLOCK_DB})
|
||||||
rename_vg ${BLOCK_DB}
|
rename_vg ${BLOCK_DB}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Rename block WAL device VG next
|
||||||
if [[ "${BLOCK_WAL}" ]]; then
|
if [[ "${BLOCK_WAL}" ]]; then
|
||||||
BLOCK_WAL=$(readlink -f ${BLOCK_WAL})
|
BLOCK_WAL=$(readlink -f ${BLOCK_WAL})
|
||||||
rename_vg ${BLOCK_WAL}
|
rename_vg ${BLOCK_WAL}
|
||||||
|
@ -232,6 +242,25 @@ if [[ "${STORAGE_TYPE}" != "directory" ]]; then
|
||||||
# Settle LVM changes again after any changes have been made
|
# Settle LVM changes again after any changes have been made
|
||||||
udev_settle
|
udev_settle
|
||||||
|
|
||||||
|
# Check to make sure we have what we need to continue
|
||||||
|
osd_disk_prechecks
|
||||||
|
|
||||||
|
# Initialize some important global variables
|
||||||
|
CEPH_LVM_PREPARE=1
|
||||||
|
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
||||||
|
DISK_ZAPPED=0
|
||||||
|
ZAP_DEVICE=0
|
||||||
|
ZAP_EXTRA_PARTITIONS=""
|
||||||
|
|
||||||
|
# The disk may need to be zapped or some LVs may need to be deleted before
|
||||||
|
# moving on with the disk preparation.
|
||||||
|
determine_what_needs_zapping
|
||||||
|
|
||||||
|
if [[ ${ZAP_DEVICE} -eq 1 ]]; then
|
||||||
|
perform_zap
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Prepare the disk for use
|
||||||
osd_disk_prepare
|
osd_disk_prepare
|
||||||
|
|
||||||
# Clean up resources held by the common script
|
# Clean up resources held by the common script
|
||||||
|
|
|
@ -22,4 +22,5 @@ ceph-osd:
|
||||||
- 0.1.19 Update rbac api version
|
- 0.1.19 Update rbac api version
|
||||||
- 0.1.20 Update directory-based OSD deployment for image changes
|
- 0.1.20 Update directory-based OSD deployment for image changes
|
||||||
- 0.1.21 Refactor Ceph OSD Init Scripts - First PS
|
- 0.1.21 Refactor Ceph OSD Init Scripts - First PS
|
||||||
|
- 0.1.22 Refactor Ceph OSD Init Scripts - Second PS
|
||||||
...
|
...
|
||||||
|
|
Loading…
Reference in New Issue