Delete_configuration, consider removed raid members as well

Change-Id: Ie4f62d8855d3f30a55f7032918dfed1f1d8b5b31
Story: #2006535
Task: #36591
This commit is contained in:
Raphael Glon 2019-09-12 19:01:32 +02:00
parent b9a2ac8914
commit 37385dd9dd
3 changed files with 165 additions and 18 deletions

View File

@ -1629,8 +1629,28 @@ class GenericHardwareManager(HardwareManager):
of ports for the node
"""
raid_devices = list_all_block_devices(block_type='raid',
ignore_raid=False)
def _scan_raids():
utils.execute('mdadm', '--assemble', '--scan',
check_exit_code=False)
raid_devices = list_all_block_devices(block_type='raid',
ignore_raid=False)
return raid_devices
raid_devices = _scan_raids()
attempts = 0
while attempts < 2:
attempts += 1
self._delete_config_pass(raid_devices)
raid_devices = _scan_raids()
if not raid_devices:
break
else:
msg = "Unable to clean all softraid correctly. Remaining {}".\
format([dev.name for dev in raid_devices])
LOG.error(msg)
raise errors.SoftwareRAIDError(msg)
def _delete_config_pass(self, raid_devices):
for raid_device in raid_devices:
component_devices = _get_component_devices(raid_device.name)
if not component_devices:
@ -1694,6 +1714,48 @@ class GenericHardwareManager(HardwareManager):
LOG.info('Deleted Software RAID device %s', raid_device.name)
# Remove all remaining raid traces from any drives, in case some
# drives or partitions have been member of some raid once
# TBD: should we consider all block devices by default, but still
# provide some 'control' through the node information
# (for example target_raid_config at the time of calling this). This
# may make sense if you do not want the delete_config to touch some
# drives, like cinder volumes locally attached, for example, or any
# kind of 'non-ephemeral' drive that you do not want to consider during
# deployment (= specify which drives to consider just like create
# configuration might consider the physical_disks parameter in a near
# future)
# Consider partitions first, before underlying disks, never hurts and
# can even avoid some failures. Example to reproduce:
# mdadm --stop /dev/md0
# mdadm --zero-superblock /dev/block
# mdadm: Unrecognised md component device - /dev/block
# (mdadm -E /dev/block still returns 0 so won't be skipped for zeroing)
# mdadm --zero-superblock /dev/block1
# mdadm: Couldn't open /dev/block for write - not zeroing
# mdadm -E /dev/block1: still shows superblocks
all_blks = reversed(self.list_block_devices(include_partitions=True))
for blk in all_blks:
try:
utils.execute('mdadm', '--examine', blk.name,
use_standard_locale=True)
except processutils.ProcessExecutionError as e:
if "No md superblock detected" in str(e):
# actually not a component device
continue
else:
msg = "Failed to examine device {}: {}".format(
blk.name, e)
LOG.warning(msg)
continue
try:
utils.execute('mdadm', '--zero-superblock', blk.name)
except processutils.ProcessExecutionError as e:
LOG.warning('Failed to remove superblock from %s: %s',
raid_device.name, e)
LOG.debug("Finished deleting Software RAID(s)")
def validate_configuration(self, raid_config, node):

View File

@ -3251,62 +3251,141 @@ class TestGenericHardwareManager(base.IronicAgentTest):
107374182400, True)
raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-0',
2147483648, True)
sda = hardware.BlockDevice('/dev/sda', 'model12', 21, True)
sdb = hardware.BlockDevice('/dev/sdb', 'model12', 21, True)
sdc = hardware.BlockDevice('/dev/sdc', 'model12', 21, True)
hardware.list_all_block_devices.side_effect = [
[raid_device1, raid_device2]]
[raid_device1, raid_device2], # list_all_block_devices raid
[sda, sdb, sdc], # list_all_block_devices disks
[], # list_all_block_devices parts
[], # list_all_block_devices raid
]
mocked_get_component.side_effect = [
["/dev/sda1", "/dev/sda2"],
["/dev/sdb1", "/dev/sdb2"]]
["/dev/sda1", "/dev/sdb1"],
["/dev/sda2", "/dev/sdb2"]]
mocked_get_holder.side_effect = [
["/dev/sda", "/dev/sdb"],
["/dev/sda", "/dev/sdb"]]
mocked_execute.side_effect = [
None, None, None,
None, # mdadm --assemble --scan
None, # wipefs md0
None, # mdadm --stop md0
['_', 'mdadm --examine output for sda1'],
None,
None, # mdadm zero-superblock sda1
['_', 'mdadm --examine output for sdb1'],
None, None, None,
None, None, None,
None, # mdadm zero-superblock sdb1
None, # wipefs sda
None, # wipefs sdb
None, # wipfs md1
None, # mdadm --stop md1
['_', 'mdadm --examine output for sda2'],
None,
None, # mdadm zero-superblock sda2
['_', 'mdadm --examine output for sdb2'],
None, None, None]
None, # mdadm zero-superblock sdb2
None, # wipefs sda
None, # wipefs sda
['_', 'mdadm --examine output for sdc'],
None, # mdadm zero-superblock sdc
# examine sdb
processutils.ProcessExecutionError('No md superblock detected'),
# examine sda
processutils.ProcessExecutionError('No md superblock detected'),
None, # mdadm --assemble --scan
]
self.hardware.delete_configuration(self.node, [])
mocked_execute.assert_has_calls([
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
mock.call('wipefs', '-af', '/dev/md0'),
mock.call('mdadm', '--stop', '/dev/md0'),
mock.call('mdadm', '--examine', '/dev/sda1',
use_standard_locale=True),
mock.call('mdadm', '--zero-superblock', '/dev/sda1'),
mock.call('mdadm', '--examine', '/dev/sda2',
mock.call('mdadm', '--examine', '/dev/sdb1',
use_standard_locale=True),
mock.call('mdadm', '--zero-superblock', '/dev/sda2'),
mock.call('mdadm', '--zero-superblock', '/dev/sdb1'),
mock.call('wipefs', '-af', '/dev/sda'),
mock.call('wipefs', '-af', '/dev/sdb'),
mock.call('wipefs', '-af', '/dev/md1'),
mock.call('mdadm', '--stop', '/dev/md1'),
mock.call('mdadm', '--examine', '/dev/sdb1',
mock.call('mdadm', '--examine', '/dev/sda2',
use_standard_locale=True),
mock.call('mdadm', '--zero-superblock', '/dev/sdb1'),
mock.call('mdadm', '--zero-superblock', '/dev/sda2'),
mock.call('mdadm', '--examine', '/dev/sdb2',
use_standard_locale=True),
mock.call('mdadm', '--zero-superblock', '/dev/sdb2'),
mock.call('wipefs', '-af', '/dev/sda'),
mock.call('wipefs', '-af', '/dev/sdb')])
mock.call('wipefs', '-af', '/dev/sdb'),
mock.call('mdadm', '--examine', '/dev/sdc',
use_standard_locale=True),
mock.call('mdadm', '--zero-superblock', '/dev/sdc'),
mock.call('mdadm', '--examine', '/dev/sdb',
use_standard_locale=True),
mock.call('mdadm', '--examine', '/dev/sda',
use_standard_locale=True),
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
])
@mock.patch.object(hardware, '_get_component_devices', autospec=True)
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test_delete_configuration_partition(self, mocked_execute, mocked_list,
mocked_get_component):
# This test checks that if no components are returned for a given
# raid device, then it must be a nested partition and so it gets
# skipped
raid_device1_part1 = hardware.BlockDevice('/dev/md0p1', 'RAID-1',
1073741824, True)
hardware.list_all_block_devices.return_value = [raid_device1_part1]
hardware.list_all_block_devices.side_effect = [
[raid_device1_part1], # list_all_block_devices raid
[], # list_all_block_devices disks
[], # list_all_block_devices parts
[], # list_all_block_devices raid
]
mocked_get_component.return_value = []
self.assertIsNone(self.hardware.delete_configuration(self.node, []))
mocked_execute.assert_has_calls([])
mocked_execute.assert_has_calls([
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
])
@mock.patch.object(hardware, '_get_component_devices', autospec=True)
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
def test_delete_configuration_failure_blocks_remaining(
self, mocked_execute, mocked_list, mocked_get_component):
# This test checks that, if after two raid clean passes there still
# remain softraid hints on drives, then the delete_configuration call
# raises an error
raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1',
107374182400, True)
hardware.list_all_block_devices.side_effect = [
[raid_device1], # list_all_block_devices raid
[], # list_all_block_devices disks
[], # list_all_block_devices parts
[raid_device1], # list_all_block_devices raid
[], # list_all_block_devices disks
[], # list_all_block_devices parts
[raid_device1], # list_all_block_devices raid
]
mocked_get_component.return_value = []
self.assertRaisesRegex(
errors.SoftwareRAIDError,
r"^Software RAID caused unknown error: Unable to clean all "
r"softraid correctly. Remaining \['/dev/md0'\]$",
self.hardware.delete_configuration, self.node, [])
mocked_execute.assert_has_calls([
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
mock.call('mdadm', '--assemble', '--scan', check_exit_code=False),
])
@mock.patch.object(utils, 'execute', autospec=True)
def test_validate_configuration_valid_raid1(self, mocked_execute):

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Zap superblocks from all block devices, as an attempt to erase any
softraid hint from devices when calling delete_configuration, including
from drives that are no more members of any raid.