From a99bf274e4baec8e585bc9979e492bb8d85d17b5 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 24 Aug 2022 09:25:16 +0000 Subject: [PATCH] SoftwareRAID: Enable skipping RAIDS Extend the ability to skip disks to RAID devices This allows users to specify the volume name of a logical device in the skip list which is then not cleaned or created again during the create/apply configuration phase The volume name can be specified in target raid config provided the change https://review.opendev.org/c/openstack/ironic-python-agent/+/853182/ passes Story: 2010233 Change-Id: Ib9290a97519bc48e585e1bafb0b60cc14e621e0f --- doc/source/admin/hardware_managers.rst | 9 + ironic_python_agent/hardware.py | 248 +++++++--- ironic_python_agent/raid_utils.py | 36 +- .../tests/unit/samples/hardware_samples.py | 55 +++ .../tests/unit/test_hardware.py | 429 +++++++++++++++++- .../tests/unit/test_raid_utils.py | 14 + ironic_python_agent/utils.py | 16 + ...nable-skipping-raids-40263cc3a19cfd27.yaml | 6 + 8 files changed, 752 insertions(+), 61 deletions(-) create mode 100644 releasenotes/notes/enable-skipping-raids-40263cc3a19cfd27.yaml diff --git a/doc/source/admin/hardware_managers.rst b/doc/source/admin/hardware_managers.rst index 6c72ba1aa..90e6c2985 100644 --- a/doc/source/admin/hardware_managers.rst +++ b/doc/source/admin/hardware_managers.rst @@ -121,6 +121,15 @@ containing hints to identify the drives. For example:: 'skip_block_devices': [{'name': '/dev/vda', 'vendor': '0x1af4'}] +To prevent software RAID devices from being deleted, put their volume name +(defined in the ``target_raid_config``) to the list. + +Note: one dictionary with one value for each of the logical disks. +For example:: + + 'skip_block_devices': [{'volume_name': 'large'}, {'volume_name': 'temp'}] + + Shared Disk Cluster Filesystems ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index eda78f4a0..fe74aba18 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -863,6 +863,17 @@ class HardwareManager(object, metaclass=abc.ABCMeta): """ raise errors.IncompatibleHardwareMethodError + def get_skip_list_from_node(self, node, + block_devices=None, just_raids=False): + """Get the skip block devices list from the node + + :param block_devices: a list of BlockDevices + :param just_raids: a boolean to signify that only RAID devices + are important + :return: A set of names of devices on the skip list + """ + raise errors.IncompatibleHardwareMethodError + def list_block_devices_check_skip_list(self, node, include_partitions=False): """List physical block devices without the ones listed in @@ -1391,17 +1402,22 @@ class GenericHardwareManager(HardwareManager): ) return block_devices - def list_block_devices_check_skip_list(self, node, - include_partitions=False): - block_devices = self.list_block_devices( - include_partitions=include_partitions) + def get_skip_list_from_node(self, node, + block_devices=None, just_raids=False): properties = node.get('properties', {}) skip_list_hints = properties.get("skip_block_devices", []) if not skip_list_hints: - return block_devices + return None + if just_raids: + return {d['volume_name'] for d in skip_list_hints + if 'volume_name' in d} + if not block_devices: + return None skip_list = set() serialized_devs = [dev.serialize() for dev in block_devices] for hint in skip_list_hints: + if 'volume_name' in hint: + continue found_devs = il_utils.find_devices_by_hints(serialized_devs, hint) excluded_devs = {dev['name'] for dev in found_devs} skipped_devices = excluded_devs.difference(skip_list) @@ -1409,8 +1425,17 @@ class GenericHardwareManager(HardwareManager): if skipped_devices: LOG.warning("Using hint %(hint)s skipping devices: %(devs)s", {'hint': hint, 'devs': ','.join(skipped_devices)}) - block_devices = [d for d in block_devices - if d.name not in skip_list] + return skip_list + + def list_block_devices_check_skip_list(self, node, + include_partitions=False): + block_devices = self.list_block_devices( + include_partitions=include_partitions) + skip_list = self.get_skip_list_from_node( + node, block_devices) + if skip_list is not None: + block_devices = [d for d in block_devices + if d.name not in skip_list] return block_devices def get_os_install_device(self, permit_refresh=False): @@ -2341,15 +2366,41 @@ class GenericHardwareManager(HardwareManager): return self._do_create_configuration(node, ports, raid_config) def _do_create_configuration(self, node, ports, raid_config): + def _get_volume_names_of_existing_raids(): + list_of_raids = [] + raid_devices = list_all_block_devices(block_type='raid', + ignore_raid=False, + ignore_empty=False) + raid_devices.extend( + list_all_block_devices(block_type='md', + ignore_raid=False, + ignore_empty=False) + ) + for raid_device in raid_devices: + device = raid_device.name + try: + il_utils.execute('mdadm', '--examine', + device, use_standard_locale=True) + except processutils.ProcessExecutionError as e: + if "No md superblock detected" in str(e): + continue + volume_name = raid_utils.get_volume_name_of_raid_device(device) + if volume_name: + list_of_raids.append(volume_name) + else: + list_of_raids.append("unnamed_raid") + return list_of_raids + # No 'software' controller: do nothing. If 'controller' is # set to 'software' on only one of the drives, the validation # code will catch it. software_raid = False logical_disks = raid_config.get('logical_disks') + software_raid_disks = [] for logical_disk in logical_disks: if logical_disk.get('controller') == 'software': software_raid = True - break + software_raid_disks.append(logical_disk) if not software_raid: LOG.debug("No Software RAID config found") return {} @@ -2359,24 +2410,51 @@ class GenericHardwareManager(HardwareManager): # Check if the config is compliant with current limitations. self.validate_configuration(raid_config, node) + # Remove any logical disk from being eligible for inclusion in the + # RAID if it's on the skip list + skip_list = self.get_skip_list_from_node( + node, just_raids=True) + rm_from_list = [] + if skip_list: + present_raids = _get_volume_names_of_existing_raids() + if present_raids: + for ld in logical_disks: + volume_name = ld.get('volume_name', None) + if volume_name in skip_list \ + and volume_name in present_raids: + rm_from_list.append(ld) + LOG.debug("Software RAID device with volume name %s " + "exists and is, therefore, not going to be " + "created", volume_name) + present_raids.remove(volume_name) + # NOTE(kubajj): Raise an error if there is an existing software + # RAID device that either does not have a volume name or does not + # match one on the skip list + if present_raids: + msg = ("Existing Software RAID device detected that should" + " not") + raise errors.SoftwareRAIDError(msg) + logical_disks = [d for d in logical_disks if d not in rm_from_list] + # Log the validated target_raid_configuration. LOG.debug("Target Software RAID configuration: %s", raid_config) block_devices, logical_disks = raid_utils.get_block_devices_for_raid( self.list_block_devices(), logical_disks) - # Make sure there are no partitions yet (or left behind). - with_parts = [] - for dev_name in block_devices: - try: - if disk_utils.list_partitions(dev_name): - with_parts.append(dev_name) - except processutils.ProcessExecutionError: - # Presumably no partitions (or no partition table) - continue - if with_parts: - msg = ("Partitions detected on devices %s during RAID config" % - ', '.join(with_parts)) - raise errors.SoftwareRAIDError(msg) + if not rm_from_list: + # Make sure there are no partitions yet (or left behind). + with_parts = [] + for dev_name in block_devices: + try: + if disk_utils.list_partitions(dev_name): + with_parts.append(dev_name) + except processutils.ProcessExecutionError: + # Presumably no partitions (or no partition table) + continue + if with_parts: + msg = ("Partitions detected on devices %s during RAID config" % + ', '.join(with_parts)) + raise errors.SoftwareRAIDError(msg) partition_table_type = utils.get_partition_table_type_from_specs(node) target_boot_mode = utils.get_node_boot_mode(node) @@ -2484,10 +2562,12 @@ class GenericHardwareManager(HardwareManager): return raid_devices raid_devices = _scan_raids() + skip_list = self.get_skip_list_from_node( + node, just_raids=True) attempts = 0 while attempts < 2: attempts += 1 - self._delete_config_pass(raid_devices) + self._delete_config_pass(raid_devices, skip_list) raid_devices = _scan_raids() if not raid_devices: break @@ -2497,9 +2577,22 @@ class GenericHardwareManager(HardwareManager): LOG.error(msg) raise errors.SoftwareRAIDError(msg) - def _delete_config_pass(self, raid_devices): + def _delete_config_pass(self, raid_devices, skip_list): all_holder_disks = [] + do_not_delete_devices = set() + delete_partitions = {} for raid_device in raid_devices: + do_not_delete = False + volume_name = raid_utils.get_volume_name_of_raid_device( + raid_device.name) + if volume_name: + LOG.info("Software RAID device %(dev)s has volume name" + "%(name)s", {'dev': raid_device.name, + 'name': volume_name}) + if skip_list and volume_name in skip_list: + LOG.warning("RAID device %s will not be deleted", + raid_device.name) + do_not_delete = True component_devices = get_component_devices(raid_device.name) if not component_devices: # A "Software RAID device" without components is usually @@ -2511,52 +2604,73 @@ class GenericHardwareManager(HardwareManager): continue holder_disks = get_holder_disks(raid_device.name) - LOG.info("Deleting Software RAID device %s", raid_device.name) + if do_not_delete: + LOG.warning("Software RAID device %(dev)s is not going to be " + "deleted as its volume name - %(vn)s - is on the " + "skip list", {'dev': raid_device.name, + 'vn': volume_name}) + else: + LOG.info("Deleting Software RAID device %s", raid_device.name) LOG.debug('Found component devices %s', component_devices) LOG.debug('Found holder disks %s', holder_disks) - # Remove md devices. - try: - il_utils.execute('wipefs', '-af', raid_device.name) - except processutils.ProcessExecutionError as e: - LOG.warning('Failed to wipefs %(device)s: %(err)s', - {'device': raid_device.name, 'err': e}) - try: - il_utils.execute('mdadm', '--stop', raid_device.name) - except processutils.ProcessExecutionError as e: - LOG.warning('Failed to stop %(device)s: %(err)s', - {'device': raid_device.name, 'err': e}) - - # Remove md metadata from component devices. - for component_device in component_devices: + if not do_not_delete: + # Remove md devices. try: - il_utils.execute('mdadm', '--examine', component_device, - use_standard_locale=True) + il_utils.execute('wipefs', '-af', raid_device.name) except processutils.ProcessExecutionError as e: - if "No md superblock detected" in str(e): - # actually not a component device - continue - else: - msg = "Failed to examine device {}: {}".format( - component_device, e) - raise errors.SoftwareRAIDError(msg) - - LOG.debug('Deleting md superblock on %s', component_device) - try: - il_utils.execute('mdadm', '--zero-superblock', - component_device) - except processutils.ProcessExecutionError as e: - LOG.warning('Failed to remove superblock from' - '%(device)s: %(err)s', + LOG.warning('Failed to wipefs %(device)s: %(err)s', {'device': raid_device.name, 'err': e}) + try: + il_utils.execute('mdadm', '--stop', raid_device.name) + except processutils.ProcessExecutionError as e: + LOG.warning('Failed to stop %(device)s: %(err)s', + {'device': raid_device.name, 'err': e}) + + # Remove md metadata from component devices. + for component_device in component_devices: + try: + il_utils.execute('mdadm', '--examine', + component_device, + use_standard_locale=True) + except processutils.ProcessExecutionError as e: + if "No md superblock detected" in str(e): + # actually not a component device + continue + else: + msg = "Failed to examine device {}: {}".format( + component_device, e) + raise errors.SoftwareRAIDError(msg) + + LOG.debug('Deleting md superblock on %s', component_device) + try: + il_utils.execute('mdadm', '--zero-superblock', + component_device) + except processutils.ProcessExecutionError as e: + LOG.warning('Failed to remove superblock from' + '%(device)s: %(err)s', + {'device': raid_device.name, 'err': e}) + if skip_list: + dev, part = utils.split_device_and_partition_number( + component_device) + if dev in delete_partitions: + delete_partitions[dev].append(part) + else: + delete_partitions[dev] = [part] + else: + for component_device in component_devices: + do_not_delete_devices.add(component_device) # NOTE(arne_wiebalck): We cannot delete the partitions right # away since there may be other partitions on the same disks # which are members of other RAID devices. So we remember them # for later. all_holder_disks.extend(holder_disks) - - LOG.info('Deleted Software RAID device %s', raid_device.name) + if do_not_delete: + LOG.warning("Software RAID device %s was not deleted", + raid_device.name) + else: + LOG.info('Deleted Software RAID device %s', raid_device.name) # Remove all remaining raid traces from any drives, in case some # drives or partitions have been member of some raid once @@ -2581,7 +2695,13 @@ class GenericHardwareManager(HardwareManager): # mdadm: Couldn't open /dev/block for write - not zeroing # mdadm -E /dev/block1: still shows superblocks all_blks = reversed(self.list_block_devices(include_partitions=True)) + do_not_delete_disks = set() for blk in all_blks: + if blk.name in do_not_delete_devices: + do_not_delete_disks.add(utils.extract_device(blk.name)) + continue + if blk.name in do_not_delete_disks: + continue try: il_utils.execute('mdadm', '--examine', blk.name, use_standard_locale=True) @@ -2604,6 +2724,20 @@ class GenericHardwareManager(HardwareManager): all_holder_disks_uniq = list( collections.OrderedDict.fromkeys(all_holder_disks)) for holder_disk in all_holder_disks_uniq: + if holder_disk in do_not_delete_disks: + # Remove just partitions not listed in keep_partitions + del_list = delete_partitions[holder_disk] + if del_list: + LOG.warning('Holder disk %(dev)s contains logical disk ' + 'on the skip list. Deleting just partitions: ' + '%(parts)s', {'dev': holder_disk, + 'parts': del_list}) + for part in del_list: + il_utils.execute('parted', holder_disk, 'rm', part) + else: + LOG.warning('Holder disk %(dev)s contains only logical ' + 'disk(s) on the skip list', holder_disk) + continue LOG.info('Removing partitions on holder disk %s', holder_disk) try: il_utils.execute('wipefs', '-af', holder_disk) diff --git a/ironic_python_agent/raid_utils.py b/ironic_python_agent/raid_utils.py index 3d5f260fe..84c6941fd 100644 --- a/ironic_python_agent/raid_utils.py +++ b/ironic_python_agent/raid_utils.py @@ -267,10 +267,44 @@ def get_next_free_raid_device(): name = f'/dev/md{idx}' if name not in names: return name - raise errors.SoftwareRAIDError("No free md (RAID) devices are left") +def get_volume_name_of_raid_device(raid_device): + """Get the volume name of a RAID device + + :param raid_device: A Software RAID block device name. + :returns: volume name of the device, or None + """ + if not raid_device: + return None + try: + out, _ = utils.execute('mdadm', '--detail', raid_device, + use_standard_locale=True) + except processutils.ProcessExecutionError as e: + LOG.warning('Could not retrieve the volume name of %(dev)s: %(err)s', + {'dev': raid_device, 'err': e}) + return None + lines = out.splitlines() + for line in lines: + if re.search(r'Name', line) is not None: + split_array = line.split(':') + # expecting format: + # Name : :name (optional comment) + if len(split_array) == 3: + candidate = split_array[2] + else: + return None + # if name is followed by some other text + # such as (local to host ) remove + # everything after " " + if " " in candidate: + candidate = candidate.split(" ")[0] + volume_name = candidate + return volume_name + return None + + # TODO(rg): handle PreP boot parts relocation as well def prepare_boot_partitions_for_softraid(device, holders, efi_part, target_boot_mode): diff --git a/ironic_python_agent/tests/unit/samples/hardware_samples.py b/ironic_python_agent/tests/unit/samples/hardware_samples.py index f9635e432..c9e597a94 100644 --- a/ironic_python_agent/tests/unit/samples/hardware_samples.py +++ b/ironic_python_agent/tests/unit/samples/hardware_samples.py @@ -1031,6 +1031,61 @@ Working Devices : 2 1 259 3 1 active sync /dev/nvme1n1p1 """) +MDADM_DETAIL_OUTPUT_VOLUME_NAME = ("""/dev/md0: + Version : 1.0 + Creation Time : Fri Feb 15 12:37:44 2019 + Raid Level : raid1 + Array Size : 1048512 (1023.94 MiB 1073.68 MB) + Used Dev Size : 1048512 (1023.94 MiB 1073.68 MB) + Raid Devices : 2 + Total Devices : 2 + Persistence : Superblock is persistent + + Update Time : Fri Feb 15 12:38:02 2019 + State : clean + Active Devices : 2 + Working Devices : 2 + Failed Devices : 0 + Spare Devices : 0 + +Consistency Policy : resync + + Name : abc.xyz.com:this_name (local to host abc.xyz.com) + UUID : 83143055:2781ddf5:2c8f44c7:9b45d92e + Events : 17 + + Number Major Minor RaidDevice State + 0 253 64 0 active sync /dev/vde1 + 1 253 80 1 active sync /dev/vdf1 +""") + +MDADM_DETAIL_OUTPUT_VOLUME_NAME_INVALID = ("""/dev/md0: + Version : 1.0 + Creation Time : Fri Feb 15 12:37:44 2019 + Raid Level : raid1 + Array Size : 1048512 (1023.94 MiB 1073.68 MB) + Used Dev Size : 1048512 (1023.94 MiB 1073.68 MB) + Raid Devices : 2 + Total Devices : 2 + Persistence : Superblock is persistent + + Update Time : Fri Feb 15 12:38:02 2019 + State : clean + Active Devices : 2 + Working Devices : 2 + Failed Devices : 0 + Spare Devices : 0 + +Consistency Policy : resync + + UUID : 83143055:2781ddf5:2c8f44c7:9b45d92e + Events : 17 + + Number Major Minor RaidDevice State + 0 253 64 0 active sync /dev/vde1 + 1 253 80 1 active sync /dev/vdf1 +""") + MDADM_DETAIL_OUTPUT_BROKEN_RAID0 = ("""/dev/md126: Version : 1.2 Raid Level : raid0 diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 0d6d28f79..cc39d8339 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -1542,6 +1542,54 @@ class TestGenericHardwareManager(base.IronicAgentTest): ignore_raid=True)], list_mock.call_args_list) + def test_get_skip_list_from_node_block_devices_with_skip_list(self): + block_devices = [ + hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), + hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), + ] + expected_skip_list = {'/dev/sdj'} + node = self.node + + node['properties'] = { + 'skip_block_devices': [{ + 'name': '/dev/sdj' + }] + } + + skip_list = self.hardware.get_skip_list_from_node(node, + block_devices) + + self.assertEqual(expected_skip_list, skip_list) + + def test_get_skip_list_from_node_block_devices_just_raids(self): + expected_skip_list = {'large'} + node = self.node + + node['properties'] = { + 'skip_block_devices': [{ + 'name': '/dev/sdj' + }, { + 'volume_name': 'large' + }] + } + + skip_list = self.hardware.get_skip_list_from_node(node, + just_raids=True) + + self.assertEqual(expected_skip_list, skip_list) + + def test_get_skip_list_from_node_block_devices_no_skip_list(self): + block_devices = [ + hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), + hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), + ] + node = self.node + + skip_list = self.hardware.get_skip_list_from_node(node, + block_devices) + + self.assertIsNone(skip_list) + @mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices', autospec=True) def test_list_block_devices_check_skip_list_with_skip_list(self, @@ -4369,6 +4417,294 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.hardware.create_configuration, self.node, []) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(raid_utils, '_get_actual_component_devices', + autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(disk_utils, 'list_partitions', autospec=True) + @mock.patch.object(il_utils, 'execute', autospec=True) + def test_create_configuration_with_skip_list( + self, mocked_execute, mock_list_parts, mocked_list_all_devices, + mocked_actual_comp, mocked_get_volume_name): + node = self.node + + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software", + "volume_name": "small" + }, + { + "size_gb": "MAX", + "raid_level": "0", + "controller": "software", + "volume_name": "large" + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'large'}]} + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [device1, device2] + hardware.list_all_block_devices.side_effect = [ + [raid_device1], # block_type raid + [] # block type md + ] + mocked_get_volume_name.return_value = "large" + + mocked_execute.side_effect = [ + None, # examine md0 + None, # mklabel sda + ('42', None), # sgdisk -F sda + None, # mklabel sda + ('42', None), # sgdisk -F sdb + None, None, None, # parted + partx + udevadm_settle sda + None, None, None, # parted + partx + udevadm_settle sdb + None, None, None, # parted + partx + udevadm_settle sda + None, None, None, # parted + partx + udevadm_settle sdb + None, None # mdadms + ] + + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + + result = self.hardware.create_configuration(node, []) + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--examine', '/dev/md0', + use_standard_locale=True), + mock.call('parted', '/dev/sda', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sda'), + mock.call('parted', '/dev/sdb', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdb'), + mock.call('parted', '/dev/sda', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sda', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdb', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sdb', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('mdadm', '--create', '/dev/md0', '--force', '--run', + '--metadata=1', '--level', '1', '--name', 'small', + '--raid-devices', 2, '/dev/sda1', '/dev/sdb1')]) + self.assertEqual(raid_config, result) + + self.assertEqual(0, mock_list_parts.call_count) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(il_utils, 'execute', autospec=True) + def test_create_configuration_skip_list_existing_device_does_not_match( + self, mocked_execute, mocked_list_all_devices, + mocked_get_volume_name): + node = self.node + + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software", + "volume_name": "small" + }, + { + "size_gb": "MAX", + "raid_level": "0", + "controller": "software", + "volume_name": "large" + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'large'}]} + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [device1, device2] + hardware.list_all_block_devices.side_effect = [ + [raid_device1], # block_type raid + [] # block type md + ] + mocked_get_volume_name.return_value = "small" + + error_regex = "Existing Software RAID device detected that should not" + mocked_execute.side_effect = [ + processutils.ProcessExecutionError] + self.assertRaisesRegex(errors.SoftwareRAIDError, error_regex, + self.hardware.create_configuration, + self.node, []) + + mocked_execute.assert_called_once_with( + 'mdadm', '--examine', '/dev/md0', use_standard_locale=True) + + @mock.patch.object(raid_utils, '_get_actual_component_devices', + autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(disk_utils, 'list_partitions', autospec=True) + @mock.patch.object(il_utils, 'execute', autospec=True) + def test_create_configuration_with_skip_list_no_existing_device( + self, mocked_execute, mock_list_parts, + mocked_list_all_devices, mocked_actual_comp): + node = self.node + + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software", + "volume_name": "small" + }, + { + "size_gb": "MAX", + "raid_level": "0", + "controller": "software", + "volume_name": "large" + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'large'}]} + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [device1, device2] + mock_list_parts.side_effect = [ + [], + processutils.ProcessExecutionError + ] + hardware.list_all_block_devices.side_effect = [ + [], # block_type raid + [] # block type md + ] + + mocked_execute.side_effect = [ + None, # mklabel sda + ('42', None), # sgdisk -F sda + None, # mklabel sda + ('42', None), # sgdisk -F sdb + None, None, None, # parted + partx + udevadm_settle sda + None, None, None, # parted + partx + udevadm_settle sdb + None, None, None, # parted + partx + udevadm_settle sda + None, None, None, # parted + partx + udevadm_settle sdb + None, None # mdadms + ] + + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sda2', '/dev/sdb2'), + ] + + result = self.hardware.create_configuration(node, []) + mocked_execute.assert_has_calls([ + mock.call('parted', '/dev/sda', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sda'), + mock.call('parted', '/dev/sdb', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdb'), + mock.call('parted', '/dev/sda', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sda', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdb', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sdb', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sda', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '10GiB', '-1'), + mock.call('partx', '-av', '/dev/sda', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdb', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '10GiB', '-1'), + mock.call('partx', '-av', '/dev/sdb', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('mdadm', '--create', '/dev/md0', '--force', '--run', + '--metadata=1', '--level', '1', '--name', 'small', + '--raid-devices', 2, '/dev/sda1', '/dev/sdb1'), + mock.call('mdadm', '--create', '/dev/md1', '--force', '--run', + '--metadata=1', '--level', '0', '--name', 'large', + '--raid-devices', 2, '/dev/sda2', '/dev/sdb2')]) + + self.assertEqual(raid_config, result) + + self.assertEqual(2, mock_list_parts.call_count) + mock_list_parts.assert_has_calls([ + mock.call(x) for x in ['/dev/sda', '/dev/sdb'] + ]) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(raid_utils, '_get_actual_component_devices', + autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(il_utils, 'execute', autospec=True) + def test_create_configuration_with_complete_skip_list( + self, mocked_execute, mocked_ls_all_devs, + mocked_actual_comp, mocked_get_volume_name): + node = self.node + + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software", + "volume_name": "small" + }, + { + "size_gb": "MAX", + "raid_level": "0", + "controller": "software", + "volume_name": "large" + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'small'}, + {'volume_name': 'large'}]} + raid_device0 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 2147483648, True) + raid_device1 = hardware.BlockDevice('/dev/md1', 'RAID-0', + 107374182400, True) + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + hardware.list_all_block_devices.side_effect = [ + [raid_device0, raid_device1], # block_type raid + [] # block type md + ] + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [device1, device2] + mocked_get_volume_name.side_effect = [ + "small", + "large", + ] + + self.hardware.create_configuration(node, []) + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--examine', '/dev/md0', + use_standard_locale=True), + mock.call('mdadm', '--examine', '/dev/md1', + use_standard_locale=True), + ]) + self.assertEqual(2, mocked_execute.call_count) + @mock.patch.object(il_utils, 'execute', autospec=True) def test__get_md_uuid(self, mocked_execute): mocked_execute.side_effect = [(hws.MDADM_DETAIL_OUTPUT, '')] @@ -4462,12 +4798,15 @@ class TestGenericHardwareManager(base.IronicAgentTest): holder_disks = hardware.get_holder_disks('/dev/md0') self.assertEqual(['/dev/vda', '/dev/vdb'], holder_disks) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) @mock.patch.object(hardware, 'get_holder_disks', autospec=True) @mock.patch.object(hardware, 'get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(il_utils, 'execute', autospec=True) def test_delete_configuration(self, mocked_execute, mocked_list, - mocked_get_component, mocked_get_holder): + mocked_get_component, mocked_get_holder, + mocked_get_volume_name): raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', 107374182400, True) raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-0', @@ -4490,6 +4829,9 @@ class TestGenericHardwareManager(base.IronicAgentTest): mocked_get_holder.side_effect = [ ["/dev/sda", "/dev/sdb"], ["/dev/sda", "/dev/sdb"]] + mocked_get_volume_name.side_effect = [ + "/dev/md0", "/dev/md1" + ] mocked_execute.side_effect = [ None, # mdadm --assemble --scan None, # wipefs md0 @@ -4551,11 +4893,14 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), ]) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) @mock.patch.object(hardware, 'get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(il_utils, 'execute', autospec=True) def test_delete_configuration_partition(self, mocked_execute, mocked_list, - mocked_get_component): + mocked_get_component, + mocked_get_volume_name): # This test checks that if no components are returned for a given # raid device, then it must be a nested partition and so it gets # skipped @@ -4569,6 +4914,7 @@ class TestGenericHardwareManager(base.IronicAgentTest): [], # list_all_block_devices raid [], # list_all_block_devices raid (md) ] + mocked_get_volume_name.return_value = None mocked_get_component.return_value = [] self.assertIsNone(self.hardware.delete_configuration(self.node, [])) mocked_execute.assert_has_calls([ @@ -4576,11 +4922,14 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), ]) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) @mock.patch.object(hardware, 'get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(il_utils, 'execute', autospec=True) def test_delete_configuration_failure_blocks_remaining( - self, mocked_execute, mocked_list, mocked_get_component): + self, mocked_execute, mocked_list, mocked_get_component, + mocked_get_volume_name): # This test checks that, if after two raid clean passes there still # remain softraid hints on drives, then the delete_configuration call @@ -4601,6 +4950,7 @@ class TestGenericHardwareManager(base.IronicAgentTest): [], # list_all_block_devices raid (type md) ] mocked_get_component.return_value = [] + mocked_get_volume_name.return_value = "/dev/md0" self.assertRaisesRegex( errors.SoftwareRAIDError, @@ -4614,6 +4964,79 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), ]) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + 'get_skip_list_from_node', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(il_utils, 'execute', autospec=True) + def test_delete_configuration_skip_list(self, mocked_execute, mocked_list, + mocked_get_component, + mocked_get_holder, + mocked_get_skip_list, + mocked_get_volume_name): + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-0', + 2147483648, True) + sda = hardware.BlockDevice('/dev/sda', 'model12', 21, True) + sdb = hardware.BlockDevice('/dev/sdb', 'model12', 21, True) + sdc = hardware.BlockDevice('/dev/sdc', 'model12', 21, True) + + partitions = [ + hardware.BlockDevice('/dev/sdb1', 'raid-member', 32767, False), + hardware.BlockDevice('/dev/sdb2', 'raid-member', 32767, False), + hardware.BlockDevice('/dev/sda1', 'raid_member', 32767, False), + hardware.BlockDevice('/dev/sda2', 'raid-member', 32767, False), + ] + + hardware.list_all_block_devices.side_effect = [ + [raid_device1, raid_device2], # list_all_block_devices raid + [], # list_all_block_devices raid (md) + [sda, sdb, sdc], # list_all_block_devices disks + partitions, # list_all_block_devices parts + [], # list_all_block_devices raid + [], # list_all_block_devices raid (md) + ] + mocked_get_component.side_effect = [ + ["/dev/sda1", "/dev/sdb1"], + ["/dev/sda2", "/dev/sdb2"]] + mocked_get_holder.side_effect = [ + ["/dev/sda", "/dev/sdb"], + ["/dev/sda", "/dev/sdb"]] + mocked_get_volume_name.side_effect = [ + "/dev/md0", "small" + ] + mocked_get_skip_list.return_value = ["small"] + + self.hardware.delete_configuration(self.node, []) + + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + mock.call('wipefs', '-af', '/dev/md0'), + mock.call('mdadm', '--stop', '/dev/md0'), + mock.call('mdadm', '--examine', '/dev/sda1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sda1'), + mock.call('mdadm', '--examine', '/dev/sdb1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdb1'), + mock.call('mdadm', '--examine', '/dev/sda1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sda1'), + mock.call('mdadm', '--examine', '/dev/sdb1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdb1'), + mock.call('mdadm', '--examine', '/dev/sdc', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdc'), + mock.call('parted', '/dev/sda', 'rm', '1'), + mock.call('parted', '/dev/sdb', 'rm', '1'), + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + ]) + @mock.patch.object(il_utils, 'execute', autospec=True) def test_validate_configuration_valid_raid1(self, mocked_execute): raid_config = { diff --git a/ironic_python_agent/tests/unit/test_raid_utils.py b/ironic_python_agent/tests/unit/test_raid_utils.py index 6624304bf..a82027c8a 100644 --- a/ironic_python_agent/tests/unit/test_raid_utils.py +++ b/ironic_python_agent/tests/unit/test_raid_utils.py @@ -139,6 +139,20 @@ class TestRaidUtils(base.IronicAgentTest): raid_utils.create_raid_device, 0, logical_disk) + @mock.patch.object(utils, 'execute', autospec=True) + def test_get_volume_name_of_raid_device(self, mock_execute): + mock_execute.side_effect = [(hws.MDADM_DETAIL_OUTPUT_VOLUME_NAME, '')] + volume_name = raid_utils.get_volume_name_of_raid_device('/dev/md0') + self.assertEqual("this_name", volume_name) + + @mock.patch.object(utils, 'execute', autospec=True) + def test_get_volume_name_of_raid_device_invalid(self, mock_execute): + mock_execute.side_effect = [( + hws.MDADM_DETAIL_OUTPUT_VOLUME_NAME_INVALID, '' + )] + volume_name = raid_utils.get_volume_name_of_raid_device('/dev/md0') + self.assertIsNone(volume_name) + @mock.patch.object(disk_utils, 'trigger_device_rescan', autospec=True) @mock.patch.object(raid_utils, 'get_next_free_raid_device', autospec=True, return_value='/dev/md42') diff --git a/ironic_python_agent/utils.py b/ironic_python_agent/utils.py index 09d726cf1..0da737a81 100644 --- a/ironic_python_agent/utils.py +++ b/ironic_python_agent/utils.py @@ -651,6 +651,22 @@ def extract_device(part): return (m.group(1) or m.group(2)) +def split_device_and_partition_number(part): + """Extract the partition number from a partition name or path. + + :param part: the partition + :return: device and partition number if success, None otherwise + """ + + device = extract_device(part) + if not device: + return None + partition_number = part.replace(device, '') + if 'nvme' in device and partition_number[0] == 'p': + partition_number = partition_number[1:] + return (device, partition_number) + + # See ironic.drivers.utils.get_node_capability def _parse_capabilities_str(cap_str): """Extract capabilities from string. diff --git a/releasenotes/notes/enable-skipping-raids-40263cc3a19cfd27.yaml b/releasenotes/notes/enable-skipping-raids-40263cc3a19cfd27.yaml new file mode 100644 index 000000000..999437cbd --- /dev/null +++ b/releasenotes/notes/enable-skipping-raids-40263cc3a19cfd27.yaml @@ -0,0 +1,6 @@ +--- +features: + - The node property ``skip_block_devices`` supports + specifying volume names of software RAID devices. + These devices are not cleaned during cleaning and + are not created provided they already exist.