Merge "Fix wipedisk to not break Ceph OSDs during B&R"

This commit is contained in:
Zuul 2020-09-24 17:05:54 +00:00 committed by Gerrit Code Review
commit f5725ad694
2 changed files with 19 additions and 35 deletions

View File

@ -148,6 +148,7 @@ else
WIPE_HDD="" WIPE_HDD=""
# Partition type OSD has a unique globally identifier # Partition type OSD has a unique globally identifier
CEPH_OSD_GUID="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D" CEPH_OSD_GUID="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
CEPH_JOURNAL_GUID="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"
# Check if we wipe OSDs # Check if we wipe OSDs
if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ]; then if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ]; then
@ -227,9 +228,9 @@ do
part=${dev}p${part_number} part=${dev}p${part_number}
;; ;;
esac esac
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
if [ "$dev" == "$rootfs_device" ]; then if [ "$dev" == "$rootfs_device" ]; then
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
part_fstype=$(exec_retry 5 0.5 "blkid -s TYPE -o value $part") part_fstype=$(exec_retry 5 0.5 "blkid -s TYPE -o value $part")
if [ "$part_type_guid" == $BACKUP_PART_GUID -a "${part_fstype}" == "ext4" ]; then if [ "$part_type_guid" == $BACKUP_PART_GUID -a "${part_fstype}" == "ext4" ]; then
wlog "Skipping wipe backup partition $part" wlog "Skipping wipe backup partition $part"
@ -238,8 +239,17 @@ do
fi fi
fi fi
wlog "Wiping partition $part" wlog "Wiping partition $part"
dd if=/dev/zero of=$part bs=512 count=34 if [ $WIPE_CEPH_OSDS == "true" -a "$part_type_guid" == $CEPH_JOURNAL_GUID ]; then
dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34)) # Journal partitions require additional wiping. Based on the ceph-manage-journal.py
# script in the integ repo (at the ceph/ceph/files/ceph-manage-journal.py location)
# wiping 100MB of data at the beginning of the partition should be enough. We also
# wipe 100MB at the end, just to be safe.
dd if=/dev/zero of=$part bs=1M count=100
dd if=/dev/zero of=$part bs=1M count=100 seek=$(( `blockdev --getsz $part` / (1024 * 2) - 100 ))
else
dd if=/dev/zero of=$part bs=512 count=34
dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34))
fi
exec_retry 5 0.5 "parted -s $dev rm $part_number" exec_retry 5 0.5 "parted -s $dev rm $part_number"
# LP 1876374: On some nvme systems udev doesn't correctly remove the # LP 1876374: On some nvme systems udev doesn't correctly remove the
# links to the deleted partitions from /dev/nvme* causing them to be # links to the deleted partitions from /dev/nvme* causing them to be

View File

@ -70,22 +70,6 @@ done
WIPE_HDD="$pvs_to_delete $WIPE_HDD" WIPE_HDD="$pvs_to_delete $WIPE_HDD"
# During host reinstalls ceph journals also require wiping, so we also gather information on
# journal partitions. Even if this script is also called during upgrades, there was no issue
# observed during that operation, so we skip wiping the journals during upgrades.
JOURNAL_DISKS=""
HOST_IN_UPGRADE=$(curl -sf http://controller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)
# The "ceph-disk list" command works even if the ceph cluster is not operational (for example if
# too many monitors are down) so we can grab journal info from the node, even in such scenarios.
# As a safety measure, we also wrap the command in a timeout command; it should never take long
# for the command to return, but if it does it's safer to just time it out after 15 seconds.
CEPH_DISK_OUTPUT=$(timeout 15 ceph-disk list 2>/dev/null)
if [[ $? == 0 && "$HOST_IN_UPGRADE" != "true" ]]; then
JOURNAL_DISKS=$(echo "$CEPH_DISK_OUTPUT" | grep "ceph journal" | awk '{print $1}')
fi
WIPE_HDD="$JOURNAL_DISKS $WIPE_HDD"
if [ ! $FORCE ] if [ ! $FORCE ]
then then
echo "This will result in the loss of all data on the hard drives and" echo "This will result in the loss of all data on the hard drives and"
@ -158,21 +142,11 @@ do
echo "Wiping $dev..." echo "Wiping $dev..."
wipefs -f -a $dev wipefs -f -a $dev
echo "$JOURNAL_DISKS" | grep -qw "$dev" # Clearing previous GPT tables or LVM data
if [[ $? == 0 ]]; then # Delete the first few bytes at the start and end of the partition. This is required with
# Journal partitions require additional wiping. Based on the ceph-manage-journal.py # GPT partitions, they save partition info at the start and the end of the block.
# script in the integ repo (at the ceph/ceph/files/ceph-manage-journal.py location) dd if=/dev/zero of=$dev bs=512 count=34
# wiping 100MB of data at the beginning of the partition should be enough. We also dd if=/dev/zero of=$dev bs=512 count=34 seek=$((`blockdev --getsz $dev` - 34))
# wipe 100MB at the end, just to be safe.
dd if=/dev/zero of=$dev bs=1M count=100
dd if=/dev/zero of=$dev bs=1M count=100 seek=$((`blockdev --getsz $dev` - 204800))
else
# Clearing previous GPT tables or LVM data
# Delete the first few bytes at the start and end of the partition. This is required with
# GPT partitions, they save partition info at the start and the end of the block.
dd if=/dev/zero of=$dev bs=512 count=34
dd if=/dev/zero of=$dev bs=512 count=34 seek=$((`blockdev --getsz $dev` - 34))
fi
fi fi
fi fi
done done