From 37385dd9dd3f7219aa3f36b620d4984e840c3625 Mon Sep 17 00:00:00 2001 From: Raphael Glon Date: Thu, 12 Sep 2019 19:01:32 +0200 Subject: [PATCH] Delete_configuration, consider removed raid members as well Change-Id: Ie4f62d8855d3f30a55f7032918dfed1f1d8b5b31 Story: #2006535 Task: #36591 --- ironic_python_agent/hardware.py | 66 ++++++++++- .../tests/unit/test_hardware.py | 111 +++++++++++++++--- .../softraid-zap-superblocks-anywhere.yaml | 6 + 3 files changed, 165 insertions(+), 18 deletions(-) create mode 100644 releasenotes/notes/softraid-zap-superblocks-anywhere.yaml diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index fbba74e60..077adf45c 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -1629,8 +1629,28 @@ class GenericHardwareManager(HardwareManager): of ports for the node """ - raid_devices = list_all_block_devices(block_type='raid', - ignore_raid=False) + def _scan_raids(): + utils.execute('mdadm', '--assemble', '--scan', + check_exit_code=False) + raid_devices = list_all_block_devices(block_type='raid', + ignore_raid=False) + return raid_devices + + raid_devices = _scan_raids() + attempts = 0 + while attempts < 2: + attempts += 1 + self._delete_config_pass(raid_devices) + raid_devices = _scan_raids() + if not raid_devices: + break + else: + msg = "Unable to clean all softraid correctly. Remaining {}".\ + format([dev.name for dev in raid_devices]) + LOG.error(msg) + raise errors.SoftwareRAIDError(msg) + + def _delete_config_pass(self, raid_devices): for raid_device in raid_devices: component_devices = _get_component_devices(raid_device.name) if not component_devices: @@ -1694,6 +1714,48 @@ class GenericHardwareManager(HardwareManager): LOG.info('Deleted Software RAID device %s', raid_device.name) + # Remove all remaining raid traces from any drives, in case some + # drives or partitions have been member of some raid once + + # TBD: should we consider all block devices by default, but still + # provide some 'control' through the node information + # (for example target_raid_config at the time of calling this). This + # may make sense if you do not want the delete_config to touch some + # drives, like cinder volumes locally attached, for example, or any + # kind of 'non-ephemeral' drive that you do not want to consider during + # deployment (= specify which drives to consider just like create + # configuration might consider the physical_disks parameter in a near + # future) + + # Consider partitions first, before underlying disks, never hurts and + # can even avoid some failures. Example to reproduce: + # mdadm --stop /dev/md0 + # mdadm --zero-superblock /dev/block + # mdadm: Unrecognised md component device - /dev/block + # (mdadm -E /dev/block still returns 0 so won't be skipped for zeroing) + # mdadm --zero-superblock /dev/block1 + # mdadm: Couldn't open /dev/block for write - not zeroing + # mdadm -E /dev/block1: still shows superblocks + all_blks = reversed(self.list_block_devices(include_partitions=True)) + for blk in all_blks: + try: + utils.execute('mdadm', '--examine', blk.name, + use_standard_locale=True) + except processutils.ProcessExecutionError as e: + if "No md superblock detected" in str(e): + # actually not a component device + continue + else: + msg = "Failed to examine device {}: {}".format( + blk.name, e) + LOG.warning(msg) + continue + try: + utils.execute('mdadm', '--zero-superblock', blk.name) + except processutils.ProcessExecutionError as e: + LOG.warning('Failed to remove superblock from %s: %s', + raid_device.name, e) + LOG.debug("Finished deleting Software RAID(s)") def validate_configuration(self, raid_config, node): diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index dcb890dcd..49cbf5e33 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -3251,62 +3251,141 @@ class TestGenericHardwareManager(base.IronicAgentTest): 107374182400, True) raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-0', 2147483648, True) + sda = hardware.BlockDevice('/dev/sda', 'model12', 21, True) + sdb = hardware.BlockDevice('/dev/sdb', 'model12', 21, True) + sdc = hardware.BlockDevice('/dev/sdc', 'model12', 21, True) + hardware.list_all_block_devices.side_effect = [ - [raid_device1, raid_device2]] + [raid_device1, raid_device2], # list_all_block_devices raid + [sda, sdb, sdc], # list_all_block_devices disks + [], # list_all_block_devices parts + [], # list_all_block_devices raid + ] mocked_get_component.side_effect = [ - ["/dev/sda1", "/dev/sda2"], - ["/dev/sdb1", "/dev/sdb2"]] + ["/dev/sda1", "/dev/sdb1"], + ["/dev/sda2", "/dev/sdb2"]] mocked_get_holder.side_effect = [ ["/dev/sda", "/dev/sdb"], ["/dev/sda", "/dev/sdb"]] mocked_execute.side_effect = [ - None, None, None, + None, # mdadm --assemble --scan + None, # wipefs md0 + None, # mdadm --stop md0 ['_', 'mdadm --examine output for sda1'], - None, + None, # mdadm zero-superblock sda1 ['_', 'mdadm --examine output for sdb1'], - None, None, None, - None, None, None, + None, # mdadm zero-superblock sdb1 + None, # wipefs sda + None, # wipefs sdb + None, # wipfs md1 + None, # mdadm --stop md1 ['_', 'mdadm --examine output for sda2'], - None, + None, # mdadm zero-superblock sda2 ['_', 'mdadm --examine output for sdb2'], - None, None, None] + None, # mdadm zero-superblock sdb2 + None, # wipefs sda + None, # wipefs sda + ['_', 'mdadm --examine output for sdc'], + None, # mdadm zero-superblock sdc + # examine sdb + processutils.ProcessExecutionError('No md superblock detected'), + # examine sda + processutils.ProcessExecutionError('No md superblock detected'), + None, # mdadm --assemble --scan + ] self.hardware.delete_configuration(self.node, []) mocked_execute.assert_has_calls([ + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), mock.call('wipefs', '-af', '/dev/md0'), mock.call('mdadm', '--stop', '/dev/md0'), mock.call('mdadm', '--examine', '/dev/sda1', use_standard_locale=True), mock.call('mdadm', '--zero-superblock', '/dev/sda1'), - mock.call('mdadm', '--examine', '/dev/sda2', + mock.call('mdadm', '--examine', '/dev/sdb1', use_standard_locale=True), - mock.call('mdadm', '--zero-superblock', '/dev/sda2'), + mock.call('mdadm', '--zero-superblock', '/dev/sdb1'), mock.call('wipefs', '-af', '/dev/sda'), mock.call('wipefs', '-af', '/dev/sdb'), mock.call('wipefs', '-af', '/dev/md1'), mock.call('mdadm', '--stop', '/dev/md1'), - mock.call('mdadm', '--examine', '/dev/sdb1', + mock.call('mdadm', '--examine', '/dev/sda2', use_standard_locale=True), - mock.call('mdadm', '--zero-superblock', '/dev/sdb1'), + mock.call('mdadm', '--zero-superblock', '/dev/sda2'), mock.call('mdadm', '--examine', '/dev/sdb2', use_standard_locale=True), mock.call('mdadm', '--zero-superblock', '/dev/sdb2'), mock.call('wipefs', '-af', '/dev/sda'), - mock.call('wipefs', '-af', '/dev/sdb')]) + mock.call('wipefs', '-af', '/dev/sdb'), + mock.call('mdadm', '--examine', '/dev/sdc', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdc'), + mock.call('mdadm', '--examine', '/dev/sdb', + use_standard_locale=True), + mock.call('mdadm', '--examine', '/dev/sda', + use_standard_locale=True), + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + ]) @mock.patch.object(hardware, '_get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) def test_delete_configuration_partition(self, mocked_execute, mocked_list, mocked_get_component): + # This test checks that if no components are returned for a given + # raid device, then it must be a nested partition and so it gets + # skipped raid_device1_part1 = hardware.BlockDevice('/dev/md0p1', 'RAID-1', 1073741824, True) - hardware.list_all_block_devices.return_value = [raid_device1_part1] + hardware.list_all_block_devices.side_effect = [ + [raid_device1_part1], # list_all_block_devices raid + [], # list_all_block_devices disks + [], # list_all_block_devices parts + [], # list_all_block_devices raid + ] mocked_get_component.return_value = [] self.assertIsNone(self.hardware.delete_configuration(self.node, [])) - mocked_execute.assert_has_calls([]) + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + ]) + + @mock.patch.object(hardware, '_get_component_devices', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_delete_configuration_failure_blocks_remaining( + self, mocked_execute, mocked_list, mocked_get_component): + + # This test checks that, if after two raid clean passes there still + # remain softraid hints on drives, then the delete_configuration call + # raises an error + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + + hardware.list_all_block_devices.side_effect = [ + [raid_device1], # list_all_block_devices raid + [], # list_all_block_devices disks + [], # list_all_block_devices parts + [raid_device1], # list_all_block_devices raid + [], # list_all_block_devices disks + [], # list_all_block_devices parts + [raid_device1], # list_all_block_devices raid + ] + mocked_get_component.return_value = [] + + self.assertRaisesRegex( + errors.SoftwareRAIDError, + r"^Software RAID caused unknown error: Unable to clean all " + r"softraid correctly. Remaining \['/dev/md0'\]$", + self.hardware.delete_configuration, self.node, []) + + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + ]) @mock.patch.object(utils, 'execute', autospec=True) def test_validate_configuration_valid_raid1(self, mocked_execute): diff --git a/releasenotes/notes/softraid-zap-superblocks-anywhere.yaml b/releasenotes/notes/softraid-zap-superblocks-anywhere.yaml new file mode 100644 index 000000000..0dc1947be --- /dev/null +++ b/releasenotes/notes/softraid-zap-superblocks-anywhere.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Zap superblocks from all block devices, as an attempt to erase any + softraid hint from devices when calling delete_configuration, including + from drives that are no more members of any raid.