Software RAID: Re-add missing devices
Upon md device creation, component devices are sometimes removed immediately again due to a "disk failure". The disks seem healthy, though. This patch re-adds compoenent devices in such cases to prevent that the md device will remain in a degraded state (which would cause issues later, e.g. during ESP creation). Story: #2008164 Task: #40914 Change-Id: I2ac7cb4a546de84686d5c3435e850c14b3f6c1d7
This commit is contained in:
parent
3ddca46131
commit
253b4887d5
@ -203,6 +203,36 @@ def _get_component_devices(raid_device):
|
||||
return component_devices
|
||||
|
||||
|
||||
def _get_actual_component_devices(raid_device):
|
||||
"""Get the component devices of a Software RAID device.
|
||||
|
||||
Examine an md device and return its constituent devices.
|
||||
|
||||
:param raid_device: A Software RAID block device name.
|
||||
:returns: A list of the component devices.
|
||||
"""
|
||||
if not raid_device:
|
||||
return []
|
||||
|
||||
try:
|
||||
out, _ = utils.execute('mdadm', '--detail', raid_device,
|
||||
use_standard_locale=True)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
msg = ('Could not get component devices of %(dev)s: %(err)s' %
|
||||
{'dev': raid_device, 'err': e})
|
||||
LOG.warning(msg)
|
||||
return []
|
||||
|
||||
component_devices = []
|
||||
lines = out.splitlines()
|
||||
# the first line contains the md device itself
|
||||
for line in lines[1:]:
|
||||
device = re.findall(r'/dev/\w+', line)
|
||||
component_devices += device
|
||||
|
||||
return component_devices
|
||||
|
||||
|
||||
def _calc_memory(sys_dict):
|
||||
physical = 0
|
||||
for sys_child in sys_dict['children']:
|
||||
@ -1866,6 +1896,20 @@ class GenericHardwareManager(HardwareManager):
|
||||
md_device, ' '.join(component_devices), e)
|
||||
raise errors.SoftwareRAIDError(msg)
|
||||
|
||||
# check for missing devices and re-add them
|
||||
actual_components = _get_actual_component_devices(md_device)
|
||||
missing = list(set(component_devices) - set(actual_components))
|
||||
for dev in missing:
|
||||
try:
|
||||
LOG.warning('Found %s to be missing from %s '
|
||||
'... re-adding!', dev, md_device)
|
||||
utils.execute('mdadm', '--add', md_device, dev,
|
||||
attempts=3, delay_on_retry=True)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
msg = "Failed re-add {} to {}: {}".format(
|
||||
dev, md_device, e)
|
||||
raise errors.SoftwareRAIDError(msg)
|
||||
|
||||
LOG.info("Successfully created Software RAID")
|
||||
|
||||
return raid_config
|
||||
|
@ -2960,11 +2960,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
mocked_create.assert_called_once_with(self.hardware, self.node, [],
|
||||
raid_config)
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True)
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(os.path, 'isdir', autospec=True, return_value=False)
|
||||
def test_create_configuration(self, mocked_os_path_isdir, mocked_execute,
|
||||
mock_list_parts):
|
||||
mock_list_parts, mocked_actual_comp):
|
||||
node = self.node
|
||||
|
||||
raid_config = {
|
||||
@ -3003,6 +3005,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1'),
|
||||
('/dev/sda2', '/dev/sdb2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
mocked_os_path_isdir.assert_has_calls([
|
||||
mock.call('/sys/firmware/efi')
|
||||
@ -3037,12 +3044,14 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
mock.call(x) for x in ['/dev/sda', '/dev/sdb']
|
||||
])
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios')
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
def test_create_configuration_raid_5(self, mocked_execute,
|
||||
mock_list_parts):
|
||||
mock_list_parts, mocked_actual_comp):
|
||||
node = self.node
|
||||
raid_config = {
|
||||
"logical_disks": [
|
||||
@ -3082,6 +3091,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1', '/dev/sdc1'),
|
||||
('/dev/sda2', '/dev/sdb2', '/dev/sdc2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
|
||||
mocked_execute.assert_has_calls([
|
||||
@ -3120,12 +3134,14 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
'/dev/sda2', '/dev/sdb2', '/dev/sdc2')])
|
||||
self.assertEqual(raid_config, result)
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios')
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
def test_create_configuration_raid_6(self, mocked_execute,
|
||||
mock_list_parts):
|
||||
mock_list_parts, mocked_actual_comp):
|
||||
node = self.node
|
||||
raid_config = {
|
||||
"logical_disks": [
|
||||
@ -3170,6 +3186,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1', '/dev/sdc1', '/dev/sdd1'),
|
||||
('/dev/sda2', '/dev/sdb2', '/dev/sdc2', '/dev/sdd2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
|
||||
mocked_execute.assert_has_calls([
|
||||
@ -3217,12 +3238,15 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
'/dev/sda2', '/dev/sdb2', '/dev/sdc2', '/dev/sdd2')])
|
||||
self.assertEqual(raid_config, result)
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(os.path, 'isdir', autospec=True, return_value=True)
|
||||
def test_create_configuration_efi(self, mocked_os_path_isdir,
|
||||
mocked_execute, mock_list_parts):
|
||||
mocked_execute, mock_list_parts,
|
||||
mocked_actual_comp):
|
||||
node = self.node
|
||||
|
||||
raid_config = {
|
||||
@ -3255,6 +3279,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1'),
|
||||
('/dev/sda2', '/dev/sdb2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
mocked_os_path_isdir.assert_has_calls([
|
||||
mock.call('/sys/firmware/efi')
|
||||
@ -3282,12 +3311,15 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
'/dev/sda2', '/dev/sdb2')])
|
||||
self.assertEqual(raid_config, result)
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(os.path, 'isdir', autospec=True, return_value=False)
|
||||
def test_create_configuration_force_gpt_with_disk_label(
|
||||
self, mocked_os_path_isdir, mocked_execute, mock_list_part):
|
||||
self, mocked_os_path_isdir, mocked_execute, mock_list_part,
|
||||
mocked_actual_comp):
|
||||
node = self.node
|
||||
|
||||
raid_config = {
|
||||
@ -3326,6 +3358,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1'),
|
||||
('/dev/sda2', '/dev/sdb2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
mocked_os_path_isdir.assert_has_calls([
|
||||
mock.call('/sys/firmware/efi')
|
||||
@ -3353,12 +3390,14 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
'/dev/sda2', '/dev/sdb2')])
|
||||
self.assertEqual(raid_config, result)
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(os.path, 'isdir', autospec=True, return_value=False)
|
||||
def test_create_configuration_no_max(self, _mocked_isdir, mocked_execute,
|
||||
mock_list_parts):
|
||||
mock_list_parts, mocked_actual_comp):
|
||||
node = self.node
|
||||
raid_config = {
|
||||
"logical_disks": [
|
||||
@ -3381,6 +3420,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
self.hardware.list_block_devices = mock.Mock()
|
||||
self.hardware.list_block_devices.return_value = [device1, device2]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1'),
|
||||
('/dev/sda2', '/dev/sdb2'),
|
||||
]
|
||||
|
||||
mocked_execute.side_effect = [
|
||||
None, # mklabel sda
|
||||
('42', None), # sgdisk -F sda
|
||||
@ -3390,7 +3434,7 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None, # parted + partx sdb
|
||||
None, None, # parted + partx sda
|
||||
None, None, # parted + partx sdb
|
||||
None, None # mdadms
|
||||
None, None, # mdadms
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
@ -3420,13 +3464,16 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
'/dev/sda2', '/dev/sdb2')])
|
||||
self.assertEqual(raid_config, result)
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(os.path, 'isdir', autospec=True, return_value=False)
|
||||
def test_create_configuration_max_is_first_logical(self, _mocked_isdir,
|
||||
mocked_execute,
|
||||
mock_list_parts):
|
||||
mock_list_parts,
|
||||
mocked_actual_comp):
|
||||
node = self.node
|
||||
raid_config = {
|
||||
"logical_disks": [
|
||||
@ -3461,6 +3508,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1'),
|
||||
('/dev/sda2', '/dev/sdb2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
|
||||
mocked_execute.assert_has_calls([
|
||||
@ -3488,12 +3540,15 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
'/dev/sda2', '/dev/sdb2')])
|
||||
self.assertEqual(raid_config, result)
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios')
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
def test_create_configuration_with_hints(self, mocked_execute,
|
||||
mock_list_parts):
|
||||
mock_list_parts,
|
||||
mocked_actual_comp):
|
||||
node = self.node
|
||||
raid_config = {
|
||||
"logical_disks": [
|
||||
@ -3538,6 +3593,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/sda1', '/dev/sdb1'),
|
||||
('/dev/sda2', '/dev/sdb2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(node, [])
|
||||
|
||||
mocked_execute.assert_has_calls([
|
||||
@ -3818,9 +3878,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
self.hardware.list_block_devices.side_effect = [
|
||||
[device1, device2, device3],
|
||||
[device1, device2, device3]]
|
||||
|
||||
# pre-creation validation fails as insufficent number of devices found
|
||||
error_regex = ("Software RAID configuration is not possible for "
|
||||
"RAID level 6 with only 3 block devices found.")
|
||||
|
||||
# Execute is actually called for listing_block_devices
|
||||
self.assertFalse(mocked_execute.called)
|
||||
self.assertRaisesRegex(errors.SoftwareRAIDError, error_regex,
|
||||
@ -3832,12 +3894,15 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
result = self.hardware.create_configuration(self.node, [])
|
||||
self.assertEqual(result, {})
|
||||
|
||||
@mock.patch.object(hardware, '_get_actual_component_devices',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'list_partitions', autospec=True,
|
||||
return_value=[])
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(os.path, 'isdir', autospec=True, return_value=True)
|
||||
def test_create_configuration_with_nvme(self, mocked_os_path_isdir,
|
||||
mocked_execute, mock_list_parts):
|
||||
mocked_execute, mock_list_parts,
|
||||
mocked_actual_comp):
|
||||
raid_config = {
|
||||
"logical_disks": [
|
||||
{
|
||||
@ -3870,6 +3935,11 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
None, None # mdadms
|
||||
]
|
||||
|
||||
mocked_actual_comp.side_effect = [
|
||||
('/dev/nvme0n1p1', '/dev/nvme1n1p1'),
|
||||
('/dev/nvme0n1p2', '/dev/nvme1n1p2'),
|
||||
]
|
||||
|
||||
result = self.hardware.create_configuration(self.node, [])
|
||||
|
||||
mocked_execute.assert_has_calls([
|
||||
@ -3965,6 +4035,20 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
self.hardware.create_configuration,
|
||||
self.node, [])
|
||||
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
def test__get_actual_component_devices(self, mocked_execute):
|
||||
mocked_execute.side_effect = [(MDADM_DETAIL_OUTPUT, '')]
|
||||
component_devices = hardware._get_actual_component_devices(
|
||||
'/dev/md0')
|
||||
self.assertEqual(['/dev/vde1', '/dev/vdf1'], component_devices)
|
||||
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
def test__get_actual_component_devices_broken_raid0(self, mocked_execute):
|
||||
mocked_execute.side_effect = [(MDADM_DETAIL_OUTPUT_BROKEN_RAID0, '')]
|
||||
component_devices = hardware._get_actual_component_devices(
|
||||
'/dev/md126')
|
||||
self.assertEqual(['/dev/sda2'], component_devices)
|
||||
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
def test__get_md_uuid(self, mocked_execute):
|
||||
mocked_execute.side_effect = [(MDADM_DETAIL_OUTPUT, '')]
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Upon the creation of Software RAID devices, component devices are
|
||||
sometimes kicked out immediately (for no apparent reason). This
|
||||
fix re-adds devices in such cases in order to prevent the component
|
||||
to be missing next time the device is assembled, which, for instance
|
||||
may prevent the UEFI ESPs to be installed properly.
|
Loading…
Reference in New Issue
Block a user