metal/bsp-files/kickstarts/pre_disk_setup_common.cfg
Ovidiu Poncea 7a0a2dac1a Fix issues with controller node Anaconda hang
On some deployments install fails as we keep one FD open
during install. This leads to hangs when Anaconda
'post' stage returns.

On other deployments install fails as udev sometimes creates
multiple links to the same devices in /dev/disk/by-path.
We iterate through this list and, because they are not unique,
we try to run flocks multiple times for the same device.
Locking a device multiple times doesn't work, the second
flock waits for first lock to release.

This commit:
 o removes 'exec {stdout}>&1' from ks-functions.sh so it no
   longer opens FDs in 'post' stage. For the pre stage we open
   it only when needed;
 o makes sure that list of storage devices is unique;
 o increases timeout of udevadm settle from its default of 180s
   to 300s, the value used throughout Anaconda. This helps
   with slower hardware.

Closes-Bug: 1889427
Change-Id: I348f10d96a78ea2c1c25fe6cf48462b0bc31fb84
Signed-off-by: Ovidiu Poncea <ovidiu.poncea@windriver.com>
2020-07-30 14:20:33 +00:00

275 lines
10 KiB
INI

%pre --erroronfail
# Get the FD used by subshells to log output
exec {stdout}>&1
# Source common functions
. /tmp/ks-functions.sh
wlog "ISO_DEV='$ISO_DEV'."
wlog "USB_DEV='$USB_DEV'."
# This is a really fancy way of finding the first usable disk for the
# install and not stomping on the USB device if it comes up first
# First, parse /proc/cmdline to find the boot args
set -- `cat /proc/cmdline`
for I in $*; do case "$I" in *=*) eval $I 2>/dev/null;; esac; done
if [ -z "$boot_device" ]; then
boot_device=$(get_disk_dev)
fi
if [ -z "$rootfs_device" ]; then
rootfs_device=$(get_disk_dev)
fi
# Get root and boot devices
orig_rootfs_device=$rootfs_device
by_path_rootfs_device=$(get_by_path $rootfs_device)
rootfs_device=$(get_disk $by_path_rootfs_device)
wlog "Found rootfs $orig_rootfs_device on: $by_path_rootfs_device->$rootfs_device."
orig_boot_device=$boot_device
by_path_boot_device=$(get_by_path $boot_device)
boot_device=$(get_disk $by_path_boot_device)
wlog "Found boot $orig_boot_device on: $by_path_boot_device->$boot_device."
# Check if boot and rootfs devices are valid
if [ ! -e "$rootfs_device" -o ! -e "$boot_device" ] ; then
# Touch this file to prevent Anaconda from dying an ungraceful death
touch /tmp/part-include
report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is invalid."
fi
# Get all block devices of type disk in the system. This includes solid
# state devices.
# Note: /dev/* are managed by kernel tmpdevfs while links in /dev/disk/by-path/
# are managed by udev which updates them asynchronously so we should avoid using
# them while performing partition operations.
STOR_DEVS=""
wlog "Detected storage devices:"
for f in /dev/disk/by-path/*; do
dev=$(readlink -f $f)
exec_retry 2 0.5 "lsblk --nodeps --pairs $dev" | grep -q 'TYPE="disk"'
if [ $? -eq 0 ]
then
STOR_DEVS="$STOR_DEVS $dev"
wlog " ${f}->${dev}"
fi
done
# Filter STOR_DEVS variable for any duplicates as on some systems udev
# creates multiple links to the same device. This causes issues due to
# attempting to acquire a flock on the same device multiple times.
STOR_DEVS=$(echo "$STOR_DEVS" | xargs -n 1 | sort -u | xargs)
wlog "Unique storage devices: $STOR_DEVS."
if [ -z "$STOR_DEVS" ]
then
report_pre_failure_with_msg "ERROR: No storage devices available."
fi
# Lock all devices so that udev doesn't trigger a kernel partition table
# rescan that removes and recreates all /dev nodes for partitions on those
# devices. Since udev events are asynchronous this could lead to a case
# where /dev/ links for existing partitions are briefly missing.
# Missing /dev links leads to command execution failures.
STOR_DEV_FDS="$stdout"
for dev in $STOR_DEVS; do
exec {fd}>$dev || report_pre_failure_with_msg "ERROR: Error creating file descriptor for $dev."
flock -n "$fd" || report_pre_failure_with_msg "ERROR: Can't get a lock on fd $fd of device $dev."
STOR_DEV_FDS="$STOR_DEV_FDS $fd"
done
# Log info about system state at beginning of partitioning operation
for dev in $STOR_DEVS; do
wlog "Initial partition table for $dev is:"
parted -s $dev unit mib print
done
# Ensure specified device is not a USB drive
udevadm info --query=property --name=$rootfs_device |grep -q '^ID_BUS=usb' || \
udevadm info --query=property --name=$boot_device |grep -q '^ID_BUS=usb'
if [ $? -eq 0 ]; then
# Touch this file to prevent Anaconda from dying an ungraceful death
touch /tmp/part-include
report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is a USB drive."
fi
# Deactivate existing volume groups to avoid Anaconda issues with pre-existing groups
vgs=$(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name")
for vg in $vgs; do
wlog "Disabling $vg."
exec_no_fds "$STOR_DEV_FDS" "vgchange -an $vg" 5 0.5
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to disable $vg."
done
# Remove the volume groups that have physical volumes on the root disk
for vg in $(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name"); do
exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name" | grep -q "$rootfs_device"
if [ $? -ne 0 ]; then
wlog "Found $vg with no PV on rootfs, ignoring."
continue
fi
wlog "Removing LVs on $vg."
exec_no_fds "$STOR_DEV_FDS" "lvremove --force $vg" 5 0.5 || wlog "WARNING: Failed to remove lvs on $vg."
pvs=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name")
wlog "VG $vg has PVs: $(echo $pvs), removing them."
for pv in $pvs; do
wlog "Removing PV $pv."
exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes $pv" 5 0.5
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove PV."
done
# VG should no longer be present
vg_check=$(exec_no_fds "$STOR_DEV_FDS" "vgs --select \"vg_name=$vg\" --noheadings -o vg_name")
if [ -n "$vg_check" ]; then
wlog "WARNING: VG $vg is still present after removing PVs! Removing it by force."
exec_no_fds "$STOR_DEV_FDS" "vgremove --force $vg" 5 0.5
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove VG."
fi
done
ONLYUSE_HDD=""
part_type_guid_str="Partition GUID code"
if [ "$(curl -sf http://pxecontroller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)" = "true" ]; then
# In an upgrade, only wipe the disk with the rootfs and boot partition
wlog "In upgrade, wiping only $rootfs_device"
WIPE_HDD=$rootfs_device
ONLYUSE_HDD="$(basename $rootfs_device)"
if [ "$rootfs_device" != "$boot_device" ]; then
WIPE_HDD="$WIPE_HDD,$boot_device"
ONLYUSE_HDD="$ONLYUSE_HDD,$(basename $boot_device)"
fi
else
# Make a list of all the hard drives that are to be wiped
WIPE_HDD=""
# Partition type OSD has a unique globally identifier
CEPH_OSD_GUID="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
# Check if we wipe OSDs
if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ]; then
wlog "Wipe OSD data."
WIPE_CEPH_OSDS="true"
else
wlog "Skip Ceph OSD data wipe."
WIPE_CEPH_OSDS="false"
fi
for dev in $STOR_DEVS
do
# Avoid wiping USB drives
udevadm info --query=property --name=$dev |grep -q '^ID_BUS=usb' && continue
# Avoid wiping ceph osds if sysinv tells us so
if [ ${WIPE_CEPH_OSDS} == "false" ]; then
wipe_dev="true"
part_numbers=( `parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}'` )
# Scanning the partitions looking for CEPH OSDs and
# skipping any disk found with such partitions
for part_number in "${part_numbers[@]}"; do
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
if [ "$part_type_guid" == $CEPH_OSD_GUID ]; then
wlog "OSD found on $dev, skipping wipe"
wipe_dev="false"
break
fi
done
if [ "$wipe_dev" == "false" ]; then
continue
fi
fi
# Add device to the wipe list
devname=$(basename $dev)
if [ -e $dev -a "$ISO_DEV" != "../../$devname" -a "$USB_DEV" != "../../$devname" ]; then
if [ -n "$WIPE_HDD" ]; then
WIPE_HDD=$WIPE_HDD,$dev
else
WIPE_HDD=$dev
fi
fi
done
wlog "Not in upgrade, wiping disks: $WIPE_HDD"
fi
ROOTFS_PART_PREFIX=$rootfs_device
#check if disk is nvme
case $rootfs_device in
*"nvme"*)
ROOTFS_PART_PREFIX=${ROOTFS_PART_PREFIX}p
;;
esac
BACKUP_CREATED=0
# Note that the BA5EBA11-0000-1111-2222- is the prefix used by STX and it's defined in sysinv constants.py.
# Since the 000000000001 suffix is used by custom stx LVM partitions,
# the next suffix is used for the persistent backup partition (000000000002)
BACKUP_PART_LABEL="Platform Backup"
BACKUP_PART_GUID="BA5EBA11-0000-1111-2222-000000000002"
for dev in ${WIPE_HDD//,/ }
do
# Clearing previous GPT tables or LVM data
# Delete the first few bytes at the start and end of the partition. This is required with
# GPT partitions, they save partition info at the start and the end of the block.
# Do this for each partition on the disk, as well.
part_numbers=( $(parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}') )
for part_number in "${part_numbers[@]}"; do
part=$dev$part_number
case $part in
*"nvme"*)
part=${dev}p${part_number}
;;
esac
if [ "$dev" == "$rootfs_device" ]; then
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
part_fstype=$(exec_retry 5 0.5 "blkid -s TYPE -o value $part")
if [ "$part_type_guid" == $BACKUP_PART_GUID -a "${part_fstype}" == "ext4" ]; then
wlog "Skipping wipe backup partition $part"
BACKUP_CREATED=1
continue
fi
fi
wlog "Wiping partition $part"
dd if=/dev/zero of=$part bs=512 count=34
dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34))
exec_retry 5 0.5 "parted -s $dev rm $part_number"
# LP 1876374: On some nvme systems udev doesn't correctly remove the
# links to the deleted partitions from /dev/nvme* causing them to be
# seen as non block devices.
exec_retry 5 0.3 "rm -f $part" # Delete remaining /dev node leftover
done
if [ $BACKUP_CREATED -eq 0 -o "$dev" != "$rootfs_device" ]; then
wlog "Creating disk label for $dev"
parted -s $dev mktable gpt
fi
done
# Check for remaining cgts-vg PVs, which could potentially happen
# in an upgrade where we're not wiping all disks.
# If we ever create other volume groups from kickstart in the future,
# include them in this search as well.
partitions=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select 'vg_name=cgts-vg' -o pv_name --noheading" | grep -v '\[unknown\]')
for p in $partitions
do
wlog "Pre-wiping $p from kickstart (cgts-vg present)"
dd if=/dev/zero of=$p bs=512 count=34
dd if=/dev/zero of=$p bs=512 count=34 seek=$((`blockdev --getsz $p` - 34))
done
let -i gb=1024*1024*1024
if [ -n "$ONLYUSE_HDD" ]; then
cat<<EOF>>/tmp/part-include
ignoredisk --only-use=$ONLYUSE_HDD
EOF
fi