Remove all volume groups by UUID

In cases when wipedisk isn't run or isn't working correctly,
pre-existing volume groups, physical volumes, and logical volumes will
be present on the root disk. Depending on the sizes and layout of the
previous install along with partial or aborted cleanup activities, this
may lead [unknown] PVs with duplicate volume group names.

Adjust the cleanup logic to:
- Discover existing volume groups by UUID so that duplicate volume
  groups (i.e two occurrences of cgts-vg) can be handled individually.
- Ignore [unknown] physical volumes in a volume group as they cannnot be
  removed. Cleaning up existing physical volumes across all volume
  groups will resolve any [unknown] physical volumes.

In addition, unify if/then for/do syntax in the %pre-part hook

Test Plan:
PASS - create a scenario with multiple partitions along with a
       nova-local and cgts-vg volume group that result in an [unknown]
       physical volume and a duplicate cgts-vg. Do not wipe the disks
       and install an ISO with the above changes. Observe proper cleanup
       and install.
PASS - Perform consecutive installs without wipedisk and observe proper
       cleanup and install

Change-Id: Idf845cf00ca3c009d72dedef0805a77d94fa3d97
Partial-Bug: #1998204
Signed-off-by: Robert Church <robert.church@windriver.com>
This commit is contained in:
Robert Church 2022-11-29 00:34:53 -06:00
parent 651bd76566
commit b0066dcd27
2 changed files with 75 additions and 93 deletions

View File

@ -1102,10 +1102,10 @@ EOF
if [ -e /usr/sbin/ntpd ] ; then
/usr/sbin/ntpd -g -q -n -c /etc/ntp_kickstart.conf
rc=$?
if [ ${rc} -eq 0 ]; then
if [ ${rc} -eq 0 ] ; then
if [ -e /sbin/hwclock ] ; then
/sbin/hwclock --systohc --utc
if [ $? -ne 0 ]; then
if [ $? -ne 0 ] ; then
wlog "failed hwclock command ; /sbin/hwclock --systohc --utc"
else
ilog "ntpd and hwclock set ok"
@ -1142,7 +1142,7 @@ fi
# operations.
ilog "Detected storage devices:"
STOR_DEVS=""
for f in /dev/disk/by-path/*; do
for f in /dev/disk/by-path/* ; do
dev=$(readlink -f $f)
# dlog "found device ${f}"
exec_retry 2 0.5 "lsblk --nodeps --pairs $dev" | grep -q 'TYPE="disk"'
@ -1168,7 +1168,7 @@ fi
# devices. Since udev events are asynchronous this could lead to a case
# where /dev/ links for existing partitions are briefly missing.
# Missing /dev links leads to command execution failures.
for dev in $STOR_DEVS; do
for dev in $STOR_DEVS ; do
exec {fd}>$dev || report_failure_with_msg "Error creating file descriptor for $dev."
flock -n "$fd" || report_failure_with_msg "Can't get a lock on fd $fd of device $dev."
STOR_DEV_FDS="$STOR_DEV_FDS $fd"
@ -1176,7 +1176,7 @@ for dev in $STOR_DEVS; do
done
# Log info about system state at beginning of partitioning operation
for dev in $STOR_DEVS; do
for dev in $STOR_DEVS ; do
ilog "Initial partition table for $dev is:"
# log "Initial partition table for $dev is:"
parted -s $dev unit mib print
@ -1189,7 +1189,7 @@ display_mount_info
# Consider removing since LAT already handles this failure mode
# Ensure specified device is not a USB drive
udevadm info --query=property --name=${INSTDEV} |grep -q '^ID_BUS=usb'
if [ $? -eq 0 ]; then
if [ $? -eq 0 ] ; then
report_failure_with_msg "Specified installation ($INSTDEV) device is a USB drive."
fi
@ -1200,41 +1200,35 @@ ilog "Volume Groups : ${VOLUME_GROUPS} ; $STOR_DEV_FDS"
sed -i "s#obtain_device_list_from_udev = 1#obtain_device_list_from_udev = 0#" /etc/lvm/lvm.conf
# Deactivate existing volume groups to avoid Anaconda issues with pre-existing groups
# TODO: May not need this Anaconda specific behavior work around
vgs=$(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name 2>/dev/null")
if [ -z ${vgs} ] ; then
vgs=( $(exec_no_fds "$STOR_DEV_FDS" "vgdisplay -C --noheadings -o vg_uuid 2>/dev/null") )
if [ ${#vgs[@]} -eq 0 ] ; then
ilog "No volume groups found"
else
ilog "Found '${vgs}' volume groups"
for vg in $vgs; do
ilog "... disabling $vg"
exec_no_fds "$STOR_DEV_FDS" "vgchange -an $vg 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to disable $vg."
for vg in ${vgs[@]} ; do
ilog "Disable volume group ${vg}"
exec_no_fds "$STOR_DEV_FDS" "vgchange -an --select vg_uuid=${vg} 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to disable ${vg}."
done
# Remove the volume groups that have physical volumes on the root disk
for vg in $(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name"); do
exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name" | grep -q "${INSTDEV}"
if [ $? -ne 0 ]; then
wlog "Found $vg with no PV on rootfs, ignoring."
continue
fi
ilog "Removing LVs on $vg."
exec_no_fds "$STOR_DEV_FDS" "lvremove --force $vg" 5 0.5 || wlog "WARNING: Failed to remove lvs on $vg."
pvs=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name")
wlog "VG $vg has PVs: $(echo $pvs), removing them."
for pv in $pvs; do
ilog "Removing PV $pv."
exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes $pv" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove PV."
done
# VG should no longer be present
vg_check=$(exec_no_fds "$STOR_DEV_FDS" "vgs --select \"vg_name=$vg\" --noheadings -o vg_name")
if [ -n "$vg_check" ]; then
wlog "WARNING: VG $vg is still present after removing PVs! Removing it by force."
exec_no_fds "$STOR_DEV_FDS" "vgremove --force $vg" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove VG."
for vg in ${vgs[@]} ; do
vg_name=$(exec_no_fds "$STOR_DEV_FDS" "vgdisplay -C --noheadings --select vg_uuid=${vg} -o vg_name 2>/dev/null" | xargs)
pvs=( $(exec_no_fds "$STOR_DEV_FDS" "pvs --select vg_uuid=${vg} --noheadings -o pv_name 2>/dev/null" | grep -v unknown) )
if [ ${#pvs[@]} -ne 0 ] ; then
ilog "Remove logical volumes from ${vg_name} (${vg})"
exec_no_fds "$STOR_DEV_FDS" "lvremove --force --select vg_uuid=${vg} 2>/dev/null" 5 0.5 || wlog "WARNING: Failed to remove lvs on ${vg_name} (${vg})."
for pv in ${pvs[@]} ; do
ilog "Remove physical volume ${pv} from ${vg_name} (${vg})"
exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes ${pv} 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove ${pv}."
done
fi
ilog "Force remove volume group ${vg_name} (${vg})"
exec_no_fds "$STOR_DEV_FDS" "vgremove --force --select vg_uuid=${vg} 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove ${vg_name} (${vg})."
done
fi
@ -1255,7 +1249,7 @@ part_type_flags_str="Attribute flags"
hostname="hostname"
if [ "$(curl -sf http://pxecontroller:6385/v1/upgrade/${hostname}/in_upgrade 2>/dev/null)" = "true" ]; then
if [ "$(curl -sf http://pxecontroller:6385/v1/upgrade/${hostname}/in_upgrade 2>/dev/null)" = "true" ] ; then
# In an upgrade, only wipe the disk with the rootfs and boot partition
wlog "In upgrade, wiping only ${INSTDEV}"
@ -1273,7 +1267,7 @@ else
CEPH_JOURNAL_GUID="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"
# Check if we wipe OSDs
if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ]; then
if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ] ; then
ilog "Wipe OSD data"
WIPE_CEPH_OSDS="true"
else
@ -1281,8 +1275,7 @@ else
WIPE_CEPH_OSDS="false"
fi
for dev in $STOR_DEVS
do
for dev in $STOR_DEVS ; do
# TODO: Allowing the install dev 'in' results in a failure mode where
# every second install fails with the following error string
# and unrecoverable mount failure.
@ -1307,11 +1300,11 @@ else
udevadm info --query=property --name=$dev |grep -q '^ID_BUS=usb' && continue
# Avoid wiping ceph osds if sysinv tells us so
if [ ${WIPE_CEPH_OSDS} == "false" ]; then
if [ ${WIPE_CEPH_OSDS} == "false" ] ; then
wipe_dev="true"
exec_no_fds "$STOR_DEV_FDS" "pvs" | grep -q "$dev *ceph"
if [ $? -eq 0 ]; then
if [ $? -eq 0 ] ; then
wlog "skip rook provisoned disk $dev"
continue
fi
@ -1319,33 +1312,33 @@ else
part_numbers=( `parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}'` )
# Scanning the partitions looking for CEPH OSDs and
# skipping any disk found with such partitions
for part_number in "${part_numbers[@]}"; do
for part_number in "${part_numbers[@]}" ; do
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
if [ "$part_type_guid" == $CEPH_OSD_GUID ]; then
if [ "$part_type_guid" == $CEPH_OSD_GUID ] ; then
wlog "OSD found on $dev, skipping wipe"
wipe_dev="false"
break
fi
exec_no_fds "$STOR_DEV_FDS" "pvs" | grep -q -e "${dev}${part_number} *ceph" -e "${dev}p${part_number} *ceph"
if [ $? -eq 0 ]; then
if [ $? -eq 0 ] ; then
wlog "Rook OSD found on $dev$part_number, skip wipe"
wipe_dev="false"
break
fi
done
if [ "$wipe_dev" == "false" ]; then
if [ "$wipe_dev" == "false" ] ; then
continue
fi
fi
# Add device to the wipe list
devname=$(basename $dev)
if [ -e $dev -a "$ISO_DEV" != "../../$devname" -a "$USB_DEV" != "../../$devname" ]; then
if [ -e $dev -a "$ISO_DEV" != "../../$devname" -a "$USB_DEV" != "../../$devname" ] ; then
ilog "Adding ${dev} to list of disks to be wiped"
if [ -n "$WIPE_HDD" ]; then
if [ -n "$WIPE_HDD" ] ; then
ilog "WIPE_HDD=$WIPE_HDD,$dev"
WIPE_HDD=$WIPE_HDD,$dev
else
@ -1362,14 +1355,13 @@ ilog "WIPE DISKs: ${WIPE_HDD}"
ilog "==========="
by_dev=${INSTDEV}
# TODO: Avoid this loop if the INSTDEV does not have by-path in its name
for f in /dev/disk/by-path/*; do
for f in /dev/disk/by-path/* ; do
if [ "${f}" == "${INSTDEV}" ] ; then
by_dev=$(get_disk "${INSTDEV}")
break
fi
done
for dev in ${WIPE_HDD//,/ }
do
for dev in ${WIPE_HDD//,/ } ; do
ilog "Wiping $dev"
# Clear previous GPT tables or LVM data on each disk.
@ -1383,7 +1375,7 @@ do
part_numbers=( $(parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}') )
# For each '/dev/${dev}${part_number} apply wipe rules
for part_number in "${part_numbers[@]}"; do
for part_number in "${part_numbers[@]}" ; do
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_name=$(echo "$sgdisk_part_info" | grep "$part_type_name_str" | awk '{print $3;}')
@ -1452,7 +1444,7 @@ do
# and contains a proper filesystem
if [ "${part_number}" == "${BACKUP_PART_NO}" ] ; then
part_fstype=$(exec_retry 5 0.5 "blkid -s TYPE -o value $part")
if [ "${part_fstype}" == "ext4" ]; then
if [ "${part_fstype}" == "ext4" ] ; then
ilog "Discovered persistent backup partition, ${part}, is in the expected location and is formatted correctly. Maintaining..."
BACKUP_PART_FOUND=1
continue
@ -1485,7 +1477,7 @@ do
fi
fi
if [ $WIPE_CEPH_OSDS == "true" -a "$part_type_guid" == $CEPH_JOURNAL_GUID ]; then
if [ $WIPE_CEPH_OSDS == "true" -a "$part_type_guid" == $CEPH_JOURNAL_GUID ] ; then
# Journal partitions require additional wiping. Based on the ceph-manage-journal.py
# script in the integ repo (at the ceph/ceph/files/ceph-manage-journal.py location)
# wiping 100MB of data at the beginning of the partition should be enough. We also
@ -1500,7 +1492,7 @@ do
fi
done
if [ ${BACKUP_PART_FOUND} -eq 0 -o "${dev}" != "${by_dev}" ]; then
if [ ${BACKUP_PART_FOUND} -eq 0 -o "${dev}" != "${by_dev}" ] ; then
ilog "Creating disk label for $dev"
parted -s $dev mktable gpt
ilog "... done"
@ -1508,8 +1500,7 @@ do
done
ilog "Ensure any LAT installer root/boot partitions are zapped/wiped"
for oldrootlabel in otaroot otaroot_1 otaroot_b otaroot_b_1
do
for oldrootlabel in otaroot otaroot_1 otaroot_b otaroot_b_1 ; do
oldrootpart=$(blkid --label $oldrootlabel)
[ -z "$oldrootpart" ] && continue

View File

@ -1190,41 +1190,35 @@ ilog "Volume Groups : ${VOLUME_GROUPS} ; $STOR_DEV_FDS"
sed -i "s#obtain_device_list_from_udev = 1#obtain_device_list_from_udev = 0#" /etc/lvm/lvm.conf
# Deactivate existing volume groups to avoid Anaconda issues with pre-existing groups
# TODO: May not need this Anaconda specific behavior work around
vgs=$(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name 2>/dev/null")
if [ -z ${vgs} ] ; then
vgs=( $(exec_no_fds "$STOR_DEV_FDS" "vgdisplay -C --noheadings -o vg_uuid 2>/dev/null") )
if [ ${#vgs[@]} -eq 0 ] ; then
ilog "No volume groups found"
else
ilog "Found '${vgs}' volume groups"
for vg in $vgs; do
ilog "... disabling $vg"
exec_no_fds "$STOR_DEV_FDS" "vgchange -an $vg 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to disable $vg."
for vg in ${vgs[@]}; do
ilog "Disable volume group ${vg}"
exec_no_fds "$STOR_DEV_FDS" "vgchange -an --select vg_uuid=${vg} 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to disable ${vg}."
done
# Remove the volume groups that have physical volumes on the root disk
for vg in $(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name"); do
exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name" | grep -q "${INSTDEV}"
if [ $? -ne 0 ]; then
wlog "Found $vg with no PV on rootfs, ignoring."
continue
fi
ilog "Removing LVs on $vg."
exec_no_fds "$STOR_DEV_FDS" "lvremove --force $vg" 5 0.5 || wlog "WARNING: Failed to remove lvs on $vg."
pvs=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name")
wlog "VG $vg has PVs: $(echo $pvs), removing them."
for pv in $pvs; do
ilog "Removing PV $pv."
exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes $pv" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove PV."
done
# VG should no longer be present
vg_check=$(exec_no_fds "$STOR_DEV_FDS" "vgs --select \"vg_name=$vg\" --noheadings -o vg_name")
if [ -n "$vg_check" ]; then
wlog "WARNING: VG $vg is still present after removing PVs! Removing it by force."
exec_no_fds "$STOR_DEV_FDS" "vgremove --force $vg" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove VG."
for vg in ${vgs[@]}; do
vg_name=$(exec_no_fds "$STOR_DEV_FDS" "vgdisplay -C --noheadings --select vg_uuid=${vg} -o vg_name 2>/dev/null" | xargs)
pvs=( $(exec_no_fds "$STOR_DEV_FDS" "pvs --select vg_uuid=${vg} --noheadings -o pv_name 2>/dev/null" | grep -v unknown) )
if [ ${#pvs[@]} -ne 0 ] ; then
ilog "Remove logical volumes from ${vg_name} (${vg})"
exec_no_fds "$STOR_DEV_FDS" "lvremove --force --select vg_uuid=${vg} 2>/dev/null" 5 0.5 || wlog "WARNING: Failed to remove lvs on ${vg_name} (${vg})."
for pv in ${pvs[@]}; do
ilog "Remove physical volume ${pv} from ${vg_name} (${vg})"
exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes ${pv} 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove ${pv}."
done
fi
ilog "Force remove volume group ${vg_name} (${vg})"
exec_no_fds "$STOR_DEV_FDS" "vgremove --force --select vg_uuid=${vg} 2>/dev/null" 5 0.5
[ $? -ne 0 ] && report_failure_with_msg "Failed to remove ${vg_name} (${vg})."
done
fi
@ -1271,8 +1265,7 @@ else
WIPE_CEPH_OSDS="false"
fi
for dev in $STOR_DEVS
do
for dev in $STOR_DEVS ; do
# TODO: Allowing the install dev 'in' results in a failure mode where
# every second install fails with the following error string
# and unrecoverable mount failure.
@ -1358,8 +1351,7 @@ for f in /dev/disk/by-path/*; do
break
fi
done
for dev in ${WIPE_HDD//,/ }
do
for dev in ${WIPE_HDD//,/ } ; do
ilog "Wiping $dev"
# Clear previous GPT tables or LVM data on each disk.
@ -1502,8 +1494,7 @@ do
done
ilog "Ensure any LAT installer root/boot partitions are zapped/wiped"
for oldrootlabel in otaroot otaroot_1 otaroot_b otaroot_b_1
do
for oldrootlabel in otaroot otaroot_1 otaroot_b otaroot_b_1 ; do
oldrootpart=$(blkid --label $oldrootlabel)
[ -z "$oldrootpart" ] && continue