Fix bootloader install issue with MDRAID

When no root_device hint is set, an MDRAID partition can be incorrectly
selected as the root device which causes installation of the bootloader
to the physical disks behind the MDRAID volume to fail. See the notes
in the referenced Story for more detail.

This change adds a little more specificity to the listing of block
devices.

Change-Id: I66db457e71a0586723ee753bef961aec5bf58827
Story: 2007905
Task: 40303
This commit is contained in:
Doug Szumski 2020-07-09 14:11:28 +01:00 committed by Julia Kreger
parent c66bec0f43
commit 5e95b1321d
3 changed files with 52 additions and 6 deletions

View File

@ -380,12 +380,24 @@ def list_all_block_devices(block_type='disk',
# Other possible type values, which we skip recording:
# lvm, part, rom, loop
if devtype != block_type:
if (devtype is not None
and any(x in devtype for x in ['raid', 'md'])
and not ignore_raid):
if (devtype is None or ignore_raid):
LOG.debug("Skipping: {!r}".format(line))
continue
elif ('raid' in devtype
and block_type in ['raid', 'disk']):
LOG.debug(
"TYPE detected to contain 'raid' or 'md', signifying a "
"TYPE detected to contain 'raid', signifying a "
"RAID volume. Found: {!r}".format(line))
elif (devtype == 'md'
and (block_type == 'part'
or block_type == 'md')):
# NOTE(dszumski): Partitions on software RAID devices have type
# 'md'. This may also contain RAID devices in a broken state in
# rare occasions. See https://review.opendev.org/#/c/670807 for
# more detail.
LOG.debug(
"TYPE detected to contain 'md', signifying a "
"RAID partition. Found: {!r}".format(line))
else:
LOG.debug(
"TYPE did not match. Wanted: {!r} but found: {!r}".format(
@ -1790,6 +1802,16 @@ class GenericHardwareManager(HardwareManager):
raid_devices = list_all_block_devices(block_type='raid',
ignore_raid=False,
ignore_empty=False)
# NOTE(dszumski): Fetch all devices of type 'md'. This
# will generally contain partitions on a software RAID
# device, but crucially may also contain devices in a
# broken state. See https://review.opendev.org/#/c/670807/
# for more detail.
raid_devices.extend(
list_all_block_devices(block_type='md',
ignore_raid=False,
ignore_empty=False)
)
return raid_devices
raid_devices = _scan_raids()

View File

@ -159,13 +159,24 @@ BLK_DEVICE_TEMPLATE_SMALL_DEVICES = [
# NOTE(TheJulia): This list intentionally contains duplicates
# as the code filters them out by kernel device name.
# NOTE(dszumski): We include some partitions here to verify that
# they are filtered out when not requested. It is assumed that
# ROTA has been set to 0 on some software RAID devices for testing
# purposes. In practice is appears to inherit from the underyling
# devices, so in this example it would normally be 1.
RAID_BLK_DEVICE_TEMPLATE = (
'KNAME="sda" MODEL="DRIVE 0" SIZE="1765517033472" '
'ROTA="1" TYPE="disk"\n'
'KNAME="sda1" MODEL="DRIVE 0" SIZE="107373133824" '
'ROTA="1" TYPE="part"\n'
'KNAME="sdb" MODEL="DRIVE 1" SIZE="1765517033472" '
'ROTA="1" TYPE="disk"\n'
'KNAME="sdb" MODEL="DRIVE 1" SIZE="1765517033472" '
'ROTA="1" TYPE="disk"\n'
'KNAME="sdb1" MODEL="DRIVE 1" SIZE="107373133824" '
'ROTA="1" TYPE="part"\n'
'KNAME="md0p1" MODEL="RAID" SIZE="107236818944" '
'ROTA="0" TYPE="md"\n'
'KNAME="md0" MODEL="RAID" SIZE="1765517033470" '
'ROTA="0" TYPE="raid1"\n'
'KNAME="md0" MODEL="RAID" SIZE="1765517033470" '
@ -3843,9 +3854,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
hardware.list_all_block_devices.side_effect = [
[raid_device1, raid_device2], # list_all_block_devices raid
[], # list_all_block_devices raid (md)
[sda, sdb, sdc], # list_all_block_devices disks
[], # list_all_block_devices parts
[], # list_all_block_devices raid
[], # list_all_block_devices raid (md)
]
mocked_get_component.side_effect = [
["/dev/sda1", "/dev/sdb1"],
@ -3925,13 +3938,14 @@ class TestGenericHardwareManager(base.IronicAgentTest):
raid_device1_part1 = hardware.BlockDevice('/dev/md0p1', 'RAID-1',
1073741824, True)
hardware.list_all_block_devices.side_effect = [
[raid_device1_part1], # list_all_block_devices raid
[], # list_all_block_devices raid
[raid_device1_part1], # list_all_block_devices raid (md)
[], # list_all_block_devices disks
[], # list_all_block_devices parts
[], # list_all_block_devices raid
[], # list_all_block_devices raid (md)
]
mocked_get_component.return_value = []
self.assertIsNone(self.hardware.delete_configuration(self.node, []))
mocked_execute.assert_has_calls([
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
@ -3952,12 +3966,15 @@ class TestGenericHardwareManager(base.IronicAgentTest):
hardware.list_all_block_devices.side_effect = [
[raid_device1], # list_all_block_devices raid
[], # list_all_block_devices raid (type md)
[], # list_all_block_devices disks
[], # list_all_block_devices parts
[raid_device1], # list_all_block_devices raid
[], # list_all_block_devices raid (type md)
[], # list_all_block_devices disks
[], # list_all_block_devices parts
[raid_device1], # list_all_block_devices raid
[], # list_all_block_devices raid (type md)
]
mocked_get_component.return_value = []

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Fixes an issue where the bootloader installation can fail on a software
RAID volume when no root_device hint is set.
See `Story 2007905
<https://storyboard.openstack.org/#!/story/2007905>`_