From 2d47d4157c69e9de84fa3dec9f58bfd82bca6007 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Tue, 23 Nov 2021 16:30:50 +1100 Subject: [PATCH] Fix BLS based bootloader installation This reverts I2701260d54cf6bc79f1ac765b512d99d799e8c43, Idf2a471453c5490d927979fb97aa916418172153 and part of Iecf7f7e4c992bb23437b6461cdd04cdca96aafa6 which added special flags to update kernels via grubby. These changes actually ended up reverting the behaviour on Fedora 35, which is what led me to investigate what was going on more fully. All distros still support setting GRUB_DEVICE in /etc/default/grub; even the BLS based ones (i.e. everything !centos7). The implementation *is* confusing -- in earlier distros each BLS entry would refer to the variable $kernelopts; which grub2-mkconfig would write into /boot/grub2/grubenv. After commit [1] this was reverted, and the kernel options are directly written into the BLS entry. But the real problem is this bit from [2] get_sorted_bls() { if ! [ -d "${blsdir}" ] || ! [ -e /etc/machine-id ]; then return fi ... files=($(for bls in ${blsdir}/${machine_id}-*.conf; do ... } i.e., to avoid overwriting BLS entries for other OS-boots (?), grub2-mkconfig will only update those BLS entries that match the current machine-id. The problem for DIB is that we are clearing the machine-id early in finalise.d/01-clear-machine-id, but then running the bootloader update later in finalise.d/50-bootloader. The result is that the bootloader entry generated when we installed the kernel (which guessed at the root= device, etc.) is *not* updated. Even more annoyingly, the gate doesn't pick this up -- because the gate tests run on a DIB image that was booted with "root=LABEL=cloudimg-rootfs" the kernel initially installed with "install-kernel" (that we never updated) is actually correct. But this fails when built on a production host. Thus we don't need any of the explicit grubby updates; these are reverted here. This moves the machine-id clearing to after the bootloader setup, which allows grub2-mkconfig to setup the BLS entries correctly. [1] https://src.fedoraproject.org/rpms/grub2/c/4a742183a39f344a7685bccdc76d5e64dea3766a?branch=master [2] https://src.fedoraproject.org/rpms/grub2/blob/rawhide/f/0062-Add-BLS-support-to-grub-mkconfig.patch Depends-On: https://review.opendev.org/c/zuul/nodepool/+/818705 Change-Id: Ia0e49980eb50eae29a5377d24ef0b31e4d78d346 --- .../bootloader/finalise.d/50-bootloader | 33 ++++--------------- ...1-clear-machine-id => 99-clear-machine-id} | 0 2 files changed, 7 insertions(+), 26 deletions(-) rename diskimage_builder/elements/sysprep/finalise.d/{01-clear-machine-id => 99-clear-machine-id} (100%) diff --git a/diskimage_builder/elements/bootloader/finalise.d/50-bootloader b/diskimage_builder/elements/bootloader/finalise.d/50-bootloader index 433a2c677..1a7512e44 100755 --- a/diskimage_builder/elements/bootloader/finalise.d/50-bootloader +++ b/diskimage_builder/elements/bootloader/finalise.d/50-bootloader @@ -157,27 +157,7 @@ else fi fi -# Fedora 30 and RHEL-8.2 onwards support the Bootloader Spec and use grubby -# to manage kernel menu entries and kernel arguments. -# https://fedoraproject.org/wiki/Changes/BootLoaderSpecByDefault -USE_GRUBBY= -if grep -qe "^\s*GRUB_ENABLE_BLSCFG=true" /etc/default/grub; then - USE_GRUBBY=true -fi - -# When building CentOS9 with centos-minimal /etc/default/grub does not exist -# after grub2-tools installation. However we need CS9 to use grubby. -if [[ "$DISTRO_NAME" == "centos" ]] && [[ $DIB_RELEASE =~ 9 ]]; then - USE_GRUBBY=true -fi - -# Override the root device to the default label, and disable uuid -# lookup. -if [ -n "$USE_GRUBBY" ]; then - grubby --update-kernel=ALL --args="root=LABEL=${DIB_ROOT_LABEL}" -else - echo "GRUB_DEVICE=LABEL=${DIB_ROOT_LABEL}" >> /etc/default/grub -fi +echo "GRUB_DEVICE=LABEL=${DIB_ROOT_LABEL}" >> /etc/default/grub echo 'GRUB_DISABLE_LINUX_UUID=true' >> /etc/default/grub echo "GRUB_TIMEOUT=${DIB_GRUB_TIMEOUT:-5}" >>/etc/default/grub echo 'GRUB_TERMINAL="serial console"' >>/etc/default/grub @@ -195,11 +175,7 @@ else fi GRUB_CMDLINE_LINUX_DEFAULT="console=tty0 console=${SERIAL_CONSOLE} no_timer_check" -if [ -n "$USE_GRUBBY" ]; then - grubby --update-kernel=ALL --args="${GRUB_CMDLINE_LINUX_DEFAULT} ${DIB_BOOTLOADER_DEFAULT_CMDLINE}" -else - echo "GRUB_CMDLINE_LINUX_DEFAULT=\"${GRUB_CMDLINE_LINUX_DEFAULT} ${DIB_BOOTLOADER_DEFAULT_CMDLINE}\"" >>/etc/default/grub -fi +echo "GRUB_CMDLINE_LINUX_DEFAULT=\"${GRUB_CMDLINE_LINUX_DEFAULT} ${DIB_BOOTLOADER_DEFAULT_CMDLINE}\"" >>/etc/default/grub echo 'GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=1"' >>/etc/default/grub # os-prober leaks /dev/sda into config file in dual-boot host @@ -221,6 +197,11 @@ fi # support uefi specific functionality like secure boot. $GRUB_MKCONFIG -o $GRUB_CFG +# If we are using BLS, for debugging purposes dump out the kernel +if [[ -e /boot/loader/entries ]]; then + grubby --info=ALL +fi + # Remove the fix to disable os_prober if [ -n "$PROBER_DISABLED" ]; then sed -i '$d' /etc/default/grub diff --git a/diskimage_builder/elements/sysprep/finalise.d/01-clear-machine-id b/diskimage_builder/elements/sysprep/finalise.d/99-clear-machine-id similarity index 100% rename from diskimage_builder/elements/sysprep/finalise.d/01-clear-machine-id rename to diskimage_builder/elements/sysprep/finalise.d/99-clear-machine-id