From 031b54b10aa6558b8cad3e5785970aa55053de17 Mon Sep 17 00:00:00 2001 From: Arne Wiebalck Date: Wed, 16 Aug 2023 15:51:59 +0200 Subject: [PATCH] Conditional creation of RAIDed ESP for UEFI Software RAID Rebuilding an instance on a RAIDed ESPs will fail due to sgdisk running against an non-clean disk and bailing out. Check if there is a RAIDed ESP already and skip creation if it exists. Change-Id: I13617ae77515a9d34bc4bb3caf9fae73d5e4e578 (cherry picked from commit 286d66709a1b2f2068360db641079c67197b21b6) --- ironic_python_agent/raid_utils.py | 100 +++++++++++------- .../tests/unit/samples/hardware_samples.py | 30 ++++++ .../tests/unit/test_raid_utils.py | 26 ++++- .../rebuild_on_esp_raid-33f359bdf5ccaa09.yaml | 5 + 4 files changed, 120 insertions(+), 41 deletions(-) create mode 100644 releasenotes/notes/rebuild_on_esp_raid-33f359bdf5ccaa09.yaml diff --git a/ironic_python_agent/raid_utils.py b/ironic_python_agent/raid_utils.py index 84c6941fd..27b0bb5b2 100644 --- a/ironic_python_agent/raid_utils.py +++ b/ironic_python_agent/raid_utils.py @@ -12,6 +12,7 @@ import copy import re +import shlex from ironic_lib import disk_utils from ironic_lib import utils as il_utils @@ -342,50 +343,58 @@ def prepare_boot_partitions_for_softraid(device, holders, efi_part, if efi_part: efi_part = '{}p{}'.format(device, efi_part['number']) - LOG.info("Creating EFI partitions on software RAID holder disks") - # We know that we kept this space when configuring raid,see - # hardware.GenericHardwareManager.create_configuration. - # We could also directly get the EFI partition size. - partsize_mib = ESP_SIZE_MIB - partlabel_prefix = 'uefi-holder-' - efi_partitions = [] - for number, holder in enumerate(holders): - # NOTE: see utils.get_partition_table_type_from_specs - # for uefi we know that we have setup a gpt partition table, - # sgdisk can be used to edit table, more user friendly - # for alignment and relative offsets - partlabel = '{}{}'.format(partlabel_prefix, number) - out, _u = utils.execute('sgdisk', '-F', holder) - start_sector = '{}s'.format(out.splitlines()[-1].strip()) - out, _u = utils.execute( - 'sgdisk', '-n', '0:{}:+{}MiB'.format(start_sector, - partsize_mib), - '-t', '0:ef00', '-c', '0:{}'.format(partlabel), holder) + # check if we have a RAIDed ESP already + md_device = find_esp_raid() + if md_device: + LOG.info("Found RAIDed ESP %s, skip creation", md_device) + else: + LOG.info("Creating EFI partitions on software RAID holder disks") + # We know that we kept this space when configuring raid,see + # hardware.GenericHardwareManager.create_configuration. + # We could also directly get the EFI partition size. + partsize_mib = ESP_SIZE_MIB + partlabel_prefix = 'uefi-holder-' + efi_partitions = [] + for number, holder in enumerate(holders): + # NOTE: see utils.get_partition_table_type_from_specs + # for uefi we know that we have setup a gpt partition table, + # sgdisk can be used to edit table, more user friendly + # for alignment and relative offsets + partlabel = '{}{}'.format(partlabel_prefix, number) + out, _u = utils.execute('sgdisk', '-F', holder) + start_sector = '{}s'.format(out.splitlines()[-1].strip()) + out, _u = utils.execute( + 'sgdisk', '-n', '0:{}:+{}MiB'.format(start_sector, + partsize_mib), + '-t', '0:ef00', '-c', '0:{}'.format(partlabel), holder) - # Refresh part table - utils.execute("partprobe") - utils.execute("blkid") + # Refresh part table + utils.execute("partprobe") + utils.execute("blkid") - target_part, _u = utils.execute( - "blkid", "-l", "-t", "PARTLABEL={}".format(partlabel), holder) + target_part, _u = utils.execute( + "blkid", "-l", "-t", "PARTLABEL={}".format(partlabel), + holder) - target_part = target_part.splitlines()[-1].split(':', 1)[0] - efi_partitions.append(target_part) + target_part = target_part.splitlines()[-1].split(':', 1)[0] + efi_partitions.append(target_part) - LOG.debug("EFI partition %s created on holder disk %s", - target_part, holder) + LOG.debug("EFI partition %s created on holder disk %s", + target_part, holder) - # RAID the ESPs, metadata=1.0 is mandatory to be able to boot - md_device = get_next_free_raid_device() - LOG.debug("Creating md device %(md_device)s for the ESPs " - "on %(efi_partitions)s", - {'md_device': md_device, 'efi_partitions': efi_partitions}) - utils.execute('mdadm', '--create', md_device, '--force', - '--run', '--metadata=1.0', '--level', '1', - '--name', 'esp', '--raid-devices', len(efi_partitions), - *efi_partitions) + # RAID the ESPs, metadata=1.0 is mandatory to be able to boot + md_device = get_next_free_raid_device() + LOG.debug("Creating md device %(md_device)s for the ESPs " + "on %(efi_partitions)s", + {'md_device': md_device, + 'efi_partitions': efi_partitions}) + utils.execute('mdadm', '--create', md_device, '--force', + '--run', '--metadata=1.0', '--level', '1', + '--name', 'esp', '--raid-devices', + len(efi_partitions), + *efi_partitions) - disk_utils.trigger_device_rescan(md_device) + disk_utils.trigger_device_rescan(md_device) if efi_part: # Blockdev copy the source ESP and erase it @@ -420,3 +429,18 @@ def prepare_boot_partitions_for_softraid(device, holders, efi_part, # disk, as in virtual disk, where to load the data from. # Since there is a structural difference, this means it will # fail. + + +def find_esp_raid(): + """Find the ESP md device in case of a rebuild.""" + + # find devices of type 'RAID1' and fstype 'VFAT' + lsblk = utils.execute('lsblk', '-PbioNAME,TYPE,FSTYPE') + report = lsblk[0] + for line in report.split('\n'): + dev = {} + vals = shlex.split(line) + for key, val in (v.split('=', 1) for v in vals): + dev[key] = val.strip() + if dev.get('TYPE') == 'raid1' and dev.get('FSTYPE') == 'vfat': + return '/dev/' + dev.get('NAME') diff --git a/ironic_python_agent/tests/unit/samples/hardware_samples.py b/ironic_python_agent/tests/unit/samples/hardware_samples.py index b800fb9e2..5e588eb8e 100644 --- a/ironic_python_agent/tests/unit/samples/hardware_samples.py +++ b/ironic_python_agent/tests/unit/samples/hardware_samples.py @@ -1769,3 +1769,33 @@ MULTIPATH_LINKS_DM = ( ' `-+- policy=\'service-time 0\' prio=1 status=active\n' ' `- 0:0:0:0 device s 8:0 active ready running\n' ) + +LSBLK_OUPUT = (""" +NAME="sda" TYPE="disk" FSTYPE="" +NAME="sdb" TYPE="disk" FSTYPE="" +""") + +LSBLK_OUPUT_ESP_RAID = (""" +NAME="sda" TYPE="disk" FSTYPE="" +NAME="sda1" TYPE="part" FSTYPE="linux_raid_member" +NAME="md127" TYPE="raid1" FSTYPE="" +NAME="md127p1" TYPE="md" FSTYPE="xfs" +NAME="md127p2" TYPE="md" FSTYPE="iso9660" +NAME="md127p14" TYPE="md" FSTYPE="" +NAME="md127p15" TYPE="md" FSTYPE="" +NAME="sda2" TYPE="part" FSTYPE="linux_raid_member" +NAME="md126" TYPE="raid0" FSTYPE="" +NAME="sda3" TYPE="part" FSTYPE="linux_raid_member" +NAME="md125" TYPE="raid1" FSTYPE="vfat" +NAME="sdb" TYPE="disk" FSTYPE="" +NAME="sdb1" TYPE="part" FSTYPE="linux_raid_member" +NAME="md127" TYPE="raid1" FSTYPE="" +NAME="md127p1" TYPE="md" FSTYPE="xfs" +NAME="md127p2" TYPE="md" FSTYPE="iso9660" +NAME="md127p14" TYPE="md" FSTYPE="" +NAME="md127p15" TYPE="md" FSTYPE="" +NAME="sdb2" TYPE="part" FSTYPE="linux_raid_member" +NAME="md126" TYPE="raid0" FSTYPE="" +NAME="sdb3" TYPE="part" FSTYPE="linux_raid_member" +NAME="md125" TYPE="raid1" FSTYPE="vfat" +""") diff --git a/ironic_python_agent/tests/unit/test_raid_utils.py b/ironic_python_agent/tests/unit/test_raid_utils.py index a82027c8a..f68796056 100644 --- a/ironic_python_agent/tests/unit/test_raid_utils.py +++ b/ironic_python_agent/tests/unit/test_raid_utils.py @@ -153,6 +153,7 @@ class TestRaidUtils(base.IronicAgentTest): volume_name = raid_utils.get_volume_name_of_raid_device('/dev/md0') self.assertIsNone(volume_name) + @mock.patch.object(raid_utils, 'find_esp_raid', autospec=True) @mock.patch.object(disk_utils, 'trigger_device_rescan', autospec=True) @mock.patch.object(raid_utils, 'get_next_free_raid_device', autospec=True, return_value='/dev/md42') @@ -161,7 +162,7 @@ class TestRaidUtils(base.IronicAgentTest): @mock.patch.object(disk_utils, 'find_efi_partition', autospec=True) def test_prepare_boot_partitions_for_softraid_uefi_gpt( self, mock_efi_part, mock_execute, mock_dispatch, - mock_free_raid_device, mock_rescan): + mock_free_raid_device, mock_rescan, mock_find_esp): mock_efi_part.return_value = {'number': '12'} mock_execute.side_effect = [ ('451', None), # sgdisk -F @@ -178,6 +179,7 @@ class TestRaidUtils(base.IronicAgentTest): (None, None), # cp (None, None), # wipefs ] + mock_find_esp.return_value = None efi_part = raid_utils.prepare_boot_partitions_for_softraid( '/dev/md0', ['/dev/sda', '/dev/sdb'], None, @@ -209,6 +211,7 @@ class TestRaidUtils(base.IronicAgentTest): self.assertEqual(efi_part, '/dev/md42') mock_rescan.assert_called_once_with('/dev/md42') + @mock.patch.object(raid_utils, 'find_esp_raid', autospec=True) @mock.patch.object(disk_utils, 'trigger_device_rescan', autospec=True) @mock.patch.object(raid_utils, 'get_next_free_raid_device', autospec=True, return_value='/dev/md42') @@ -218,7 +221,7 @@ class TestRaidUtils(base.IronicAgentTest): @mock.patch.object(ilib_utils, 'mkfs', autospec=True) def test_prepare_boot_partitions_for_softraid_uefi_gpt_esp_not_found( self, mock_mkfs, mock_efi_part, mock_execute, mock_dispatch, - mock_free_raid_device, mock_rescan): + mock_free_raid_device, mock_rescan, mock_find_esp): mock_efi_part.return_value = None mock_execute.side_effect = [ ('451', None), # sgdisk -F @@ -233,6 +236,7 @@ class TestRaidUtils(base.IronicAgentTest): ('/dev/sdb14: whatever', None), # blkid (None, None), # mdadm ] + mock_find_esp.return_value = None efi_part = raid_utils.prepare_boot_partitions_for_softraid( '/dev/md0', ['/dev/sda', '/dev/sdb'], None, @@ -262,6 +266,7 @@ class TestRaidUtils(base.IronicAgentTest): self.assertEqual(efi_part, '/dev/md42') mock_rescan.assert_called_once_with('/dev/md42') + @mock.patch.object(raid_utils, 'find_esp_raid', autospec=True) @mock.patch.object(disk_utils, 'trigger_device_rescan', autospec=True) @mock.patch.object(raid_utils, 'get_next_free_raid_device', autospec=True, return_value='/dev/md42') @@ -269,7 +274,7 @@ class TestRaidUtils(base.IronicAgentTest): @mock.patch.object(ilib_utils, 'execute', autospec=True) def test_prepare_boot_partitions_for_softraid_uefi_gpt_efi_provided( self, mock_execute, mock_dispatch, mock_free_raid_device, - mock_rescan): + mock_rescan, mock_find_esp): mock_execute.side_effect = [ ('451', None), # sgdisk -F (None, None), # sgdisk create part @@ -285,6 +290,7 @@ class TestRaidUtils(base.IronicAgentTest): (None, None), # cp (None, None), # wipefs ] + mock_find_esp.return_value = None efi_part = raid_utils.prepare_boot_partitions_for_softraid( '/dev/md0', ['/dev/sda', '/dev/sdb'], '/dev/md0p15', @@ -389,3 +395,17 @@ class TestGetNextFreeRaidDevice(base.IronicAgentTest): ] self.assertRaises(errors.SoftwareRAIDError, raid_utils.get_next_free_raid_device) + + +@mock.patch.object(utils, 'execute', autospec=True) +class TestFindESPRAID(base.IronicAgentTest): + + def test_no_esp_raid(self, mock_execute): + mock_execute.side_effect = [(hws.LSBLK_OUPUT, '')] + result = raid_utils.find_esp_raid() + self.assertIsNone(result) + + def test_esp_raid(self, mock_execute): + mock_execute.side_effect = [(hws.LSBLK_OUPUT_ESP_RAID, '')] + result = raid_utils.find_esp_raid() + self.assertEqual('/dev/md125', result) diff --git a/releasenotes/notes/rebuild_on_esp_raid-33f359bdf5ccaa09.yaml b/releasenotes/notes/rebuild_on_esp_raid-33f359bdf5ccaa09.yaml new file mode 100644 index 000000000..09e8bf4d1 --- /dev/null +++ b/releasenotes/notes/rebuild_on_esp_raid-33f359bdf5ccaa09.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes an issue with rebuilding instances on Software RAID with + RAIDed ESP partitions.