From 521811cbccfb1a665522f50500d75115917649b5 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Mon, 23 Jun 2025 16:55:08 +0200 Subject: [PATCH] Fix software RAID creation on different physical devices When creating multiple software RAID logical disks that use different sets of physical devices, the partition indices were incorrectly shared across all devices. This caused the second RAID array creation to fail because it tried to use partition indices that didn't exist on those specific devices. This change fixes the issue by tracking partition indices separately for each physical device, ensuring that each device's partitions are numbered correctly starting from their first available index. Closes-Bug: #2115211 Change-Id: I440db4654f3d1d54274d1eee8c4b21c2b0a18d22 Signed-off-by: Mohammed Naser --- ironic_python_agent/hardware.py | 8 +- ironic_python_agent/raid_utils.py | 11 +- .../tests/unit/test_hardware.py | 111 ++++++++++++++++++ .../tests/unit/test_raid_utils.py | 29 ++++- ...id-different-devices-3298f735fdbd3a05.yaml | 7 ++ 5 files changed, 159 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/fix-software-raid-different-devices-3298f735fdbd3a05.yaml diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 965c0fabf..2c0e64285 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -3057,9 +3057,11 @@ class GenericHardwareManager(HardwareManager): parted_start_dict[device] = end - # Create the RAID devices. + # Create the RAID devices. The indices mapping tracks the last used + # partition index for each physical device. + indices = {} for index, logical_disk in enumerate(logical_disks): - raid_utils.create_raid_device(index, logical_disk) + raid_utils.create_raid_device(index, logical_disk, indices) LOG.info("Successfully created Software RAID") @@ -3354,6 +3356,8 @@ class GenericHardwareManager(HardwareManager): size2 = logical_disks[1]['size_gb'] # Only one logical disk is allowed to span the whole device. + # FIXME(dtantsur): this logic is not correct when logical disks use + # different physical devices. if size1 == 'MAX' and size2 == 'MAX': msg = ("Software RAID can have only one RAID device with " "size 'MAX'") diff --git a/ironic_python_agent/raid_utils.py b/ironic_python_agent/raid_utils.py index db75c5701..48867407a 100644 --- a/ironic_python_agent/raid_utils.py +++ b/ironic_python_agent/raid_utils.py @@ -202,24 +202,31 @@ def _get_actual_component_devices(raid_device): return component_devices -def create_raid_device(index, logical_disk): +def create_raid_device(index, logical_disk, indices=None): """Create a raid device. :param index: the index of the resulting md device. :param logical_disk: the logical disk containing the devices used to crete the raid. + :param indices: Mapping to track the last used partition index for each + physical device across calls to create_raid_device. :raise: errors.SoftwareRAIDError if not able to create the raid device or fails to re-add a device to a raid. """ md_device = '/dev/md%d' % index component_devices = [] + if indices is None: + # Backward compatibility with custom hardware managers + indices = {dev: index for dev in logical_disk['block_devices']} for device in logical_disk['block_devices']: # The partition delimiter for all common harddrives (sd[a-z]+) part_delimiter = '' if 'nvme' in device: part_delimiter = 'p' + index_on_device = indices.get(device, 0) component_devices.append( - device + part_delimiter + str(index + RAID_PARTITION)) + device + part_delimiter + str(index_on_device + RAID_PARTITION)) + indices[device] = index_on_device + 1 raid_level = logical_disk['raid_level'] # The schema check allows '1+0', but mdadm knows it as '10'. if raid_level == '1+0': diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 6cb38bf6a..851e1f4b9 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -4308,6 +4308,117 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock.call(x) for x in ['/dev/sda', '/dev/sdb'] ]) + @mock.patch.object(raid_utils, '_get_actual_component_devices', + autospec=True) + @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') + @mock.patch.object(disk_utils, 'list_partitions', autospec=True, + return_value=[]) + @mock.patch.object(utils, 'execute', autospec=True) + def test_create_configuration_with_different_disks(self, mocked_execute, + mock_list_parts, + mocked_actual_comp): + node = self.node + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software", + "physical_disks": [{'name': '/dev/sda'}, + {'name': '/dev/sdb'}], + }, + { + "size_gb": "MAX", + "raid_level": "0", + "controller": "software", + "physical_disks": [{'name': '/dev/sdc'}, + {'name': '/dev/sdd'}], + }, + ] + } + node['target_raid_config'] = raid_config + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + device3 = hardware.BlockDevice('/dev/sdc', 'sdc', 107374182400, True) + device4 = hardware.BlockDevice('/dev/sdd', 'sdd', 107374182400, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [ + device1, + device2, + device3, + device4, + ] + + mocked_execute.side_effect = [ + None, # mklabel sda + ('42', None), # sgdisk -F sda + None, # mklabel sda + ('42', None), # sgdisk -F sdb + None, # mklabel sdc + ('42', None), # sgdisk -F sdc + None, # mklabel sdd + ('42', None), # sgdisk -F sdd + None, None, None, # parted + partx + udevadm_settle sda + None, None, None, # parted + partx + udevadm_settle sdb + None, None, None, # parted + partx + udevadm_settle sdc + None, None, None, # parted + partx + udevadm_settle sdd + None, None, None, # parted + partx + udevadm_settle sda + None, None, None, # parted + partx + udevadm_settle sdb + None, None, None, # parted + partx + udevadm_settle sdc + None, None, None, # parted + partx + udevadm_settle sdd + None, None # mdadms + ] + + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sdc1', '/dev/sdd1'), + ] + + result = self.hardware.create_configuration(node, []) + + mocked_execute.assert_has_calls([ + mock.call('parted', '/dev/sda', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sda'), + mock.call('parted', '/dev/sdb', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdb'), + mock.call('parted', '/dev/sdc', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdc'), + mock.call('parted', '/dev/sdd', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdd'), + mock.call('parted', '/dev/sda', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sda', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdb', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sdb', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdc', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '-1'), + mock.call('partx', '-av', '/dev/sdc', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdd', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '-1'), + mock.call('partx', '-av', '/dev/sdd', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('mdadm', '--create', '/dev/md0', '--force', '--run', + '--metadata=1', '--level', '1', '--name', 'md0', + '--raid-devices', 2, '/dev/sda1', '/dev/sdb1'), + mock.call('mdadm', '--create', '/dev/md1', '--force', '--run', + '--metadata=1', '--level', '0', '--name', 'md1', + '--raid-devices', 2, '/dev/sdc1', '/dev/sdd1')]) + self.assertEqual(raid_config, result) + + self.assertEqual(4, mock_list_parts.call_count) + mock_list_parts.assert_has_calls([ + mock.call(x) for x in ['/dev/sda', '/dev/sdb', + '/dev/sdc', '/dev/sdd'] + ]) + @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=False) def test_create_configuration_invalid_raid_config(self, diff --git a/ironic_python_agent/tests/unit/test_raid_utils.py b/ironic_python_agent/tests/unit/test_raid_utils.py index c9763dba9..c1341960f 100644 --- a/ironic_python_agent/tests/unit/test_raid_utils.py +++ b/ironic_python_agent/tests/unit/test_raid_utils.py @@ -52,7 +52,7 @@ class TestRaidUtils(base.IronicAgentTest): '/dev/sdb1', '/dev/sdc1'] - raid_utils.create_raid_device(0, logical_disk) + raid_utils.create_raid_device(0, logical_disk, {}) mock_execute.assert_called_once_with( 'mdadm', '--create', '/dev/md0', '--force', '--run', @@ -73,7 +73,7 @@ class TestRaidUtils(base.IronicAgentTest): '/dev/sdb1', '/dev/sdc1'] - raid_utils.create_raid_device(0, logical_disk) + raid_utils.create_raid_device(0, logical_disk, {}) mock_execute.assert_called_once_with( 'mdadm', '--create', '/dev/md0', '--force', '--run', @@ -92,7 +92,7 @@ class TestRaidUtils(base.IronicAgentTest): mocked_components.return_value = ['/dev/sda1', '/dev/sdc1'] - raid_utils.create_raid_device(0, logical_disk) + raid_utils.create_raid_device(0, logical_disk, {}) expected_calls = [ mock.call('mdadm', '--create', '/dev/md0', '--force', '--run', @@ -105,6 +105,29 @@ class TestRaidUtils(base.IronicAgentTest): self.assertEqual(mock_execute.call_count, 2) mock_execute.assert_has_calls(expected_calls) + @mock.patch.object(raid_utils, '_get_actual_component_devices', + autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_create_raid_device_with_indices(self, mock_execute, + mocked_components): + indices = {'/dev/sda': 1, '/dev/sdb': 2} + logical_disk = { + "block_devices": ['/dev/sda', '/dev/sdb', '/dev/sdc'], + "raid_level": "1", + } + mocked_components.return_value = ['/dev/sda2', + '/dev/sdb3', + '/dev/sdc1'] + + raid_utils.create_raid_device(0, logical_disk, indices) + + mock_execute.assert_called_once_with( + 'mdadm', '--create', '/dev/md0', '--force', '--run', + '--metadata=1', '--level', '1', '--name', 'md0', + '--raid-devices', 3, '/dev/sda2', '/dev/sdb3', '/dev/sdc1') + self.assertEqual( + {'/dev/sda': 2, '/dev/sdb': 3, '/dev/sdc': 1}, indices) + @mock.patch.object(utils, 'execute', autospec=True) def test_create_raid_device_fail_create_device(self, mock_execute): logical_disk = { diff --git a/releasenotes/notes/fix-software-raid-different-devices-3298f735fdbd3a05.yaml b/releasenotes/notes/fix-software-raid-different-devices-3298f735fdbd3a05.yaml new file mode 100644 index 000000000..7bced29b2 --- /dev/null +++ b/releasenotes/notes/fix-software-raid-different-devices-3298f735fdbd3a05.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Software RAID creation now correctly handles configurations where + logical disks use different sets of physical devices. Previously, + partition indices were incorrectly shared across all devices, causing + failures when creating multiple RAID arrays on different disks. \ No newline at end of file