From f7888d00efc4da23b955ba6a6a494438752f8cb3 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Fri, 7 Jan 2022 08:14:53 -0800 Subject: [PATCH] Abort root device detection if an MD device is found Ironic knows how to choose root devices, and when it comes to software raid, it shouldn't realistically be required. This is because multi-device device numbering is unstable across reboots, espescially when someone creates a raid set, re-inspects to force the root_device hint to be set. This is because the introspection ramdisk re-detects the device as opposed to the software raid device management managing the device directly when a deployment is occuring. In reality, ironic's own algorithm of smallest device detection is really ideal for software raid as well. So instead of trying to be smart and helpful, we should let the software do it's own thing, and provide a warning to help the user along. Also adds a few notes around root device selection for the purpose of wider context. Resolves: rhbz#2032243 Change-Id: I4fd1de54d076ceb1100a6a7b2bda1578b039c042 --- ...vice-for-root-device-8ad0c1e85292ca0a.yaml | 13 ++++++++++ .../tests/workflows/test_baremetal.py | 14 +++++++++++ tripleoclient/workflows/baremetal.py | 25 +++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 releasenotes/notes/refuse-to-label-an-md-device-for-root-device-8ad0c1e85292ca0a.yaml diff --git a/releasenotes/notes/refuse-to-label-an-md-device-for-root-device-8ad0c1e85292ca0a.yaml b/releasenotes/notes/refuse-to-label-an-md-device-for-root-device-8ad0c1e85292ca0a.yaml new file mode 100644 index 000000000..ce90eedc6 --- /dev/null +++ b/releasenotes/notes/refuse-to-label-an-md-device-for-root-device-8ad0c1e85292ca0a.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Fixes incorrect handling of root device hints when Software RAID is in + use with Ironic. Users may re-introspect and an automatic root device hint + would be added, which is incorrect and can lead to a failed deployment + due to Software RAID (MD) device names being inconsistent across reboot + from being configured to utilized. Ironic ultimately understands these + devices and should choose the correct device by default if present. + We now log an Warning and do not insert a potentially incorrect root + device hint. Operators using a complex set of disks may still need to + explicitly set a root device hint should their operational state require + it. diff --git a/tripleoclient/tests/workflows/test_baremetal.py b/tripleoclient/tests/workflows/test_baremetal.py index e18fe994a..75a25da38 100644 --- a/tripleoclient/tests/workflows/test_baremetal.py +++ b/tripleoclient/tests/workflows/test_baremetal.py @@ -309,6 +309,20 @@ class TestBaremetalWorkflows(fakes.FakePlaybookExecution): strategy='smallest') self.assertEqual(self.baremetal.node.update.call_count, 0) + def test_md_device_found(self): + self.inspector.get_data.return_value = { + 'inventory': { + 'disks': [{'name': '/dev/md0', 'size': 99 * units.Gi}, + {'name': '/dev/sda', 'size': 100 * units.Gi}] + } + } + + baremetal._apply_root_device_strategy( + self.app.client_manager, + node_uuid='MOCK_UUID', + strategy=None) + self.assertEqual(self.baremetal.node.update.call_count, 0) + def test_no_data(self): self.inspector.get_data.side_effect = ( ironic_inspector_client.ClientError(mock.Mock())) diff --git a/tripleoclient/workflows/baremetal.py b/tripleoclient/workflows/baremetal.py index feaf9544f..09e19862b 100644 --- a/tripleoclient/workflows/baremetal.py +++ b/tripleoclient/workflows/baremetal.py @@ -319,7 +319,29 @@ def _apply_root_device_strategy(clients, node_uuid, strategy, raise exceptions.RootDeviceDetectionError( 'No suitable disks found for node %s' % node.uuid) + for disk in disks: + # NOTE(TheJulia): An md device should not explicitly forced, + # If software raid, Ironic knows exactly what it is doing. + if 'md' in disk['name']: + LOG.warning('A "md" device %(md)s, signifying software RAID, ' + 'has been detected. If software raid is in ' + 'use, this should not necessarilly need to ' + 'be set or used if software raid is being ' + 'managed by Ironic, unless the operator knows' + 'better due to site configuration. ' + 'Unfortunately, we cannot guess a ' + 'root deivce hint when Ironic managing a ' + 'software raid device. If this is in error ' + 'please set an explicit root device hint using ' + '$ openstack baremetal node set --property ' + 'root_device=/dev/', + {'md': disk['name']}) + return + if strategy == 'smallest': + # NOTE(TheJulia): This is redundant, Ironic does this by default, + # and maintains a list of invalid devices which would show up in a + # the introspetion data which cannot be used. Such as flash cards. disks.sort(key=lambda d: d['size']) root_device = disks[0] elif strategy == 'largest': @@ -360,6 +382,9 @@ def _apply_root_device_strategy(clients, node_uuid, strategy, # This -1 is what we always do to account for partitioning new_size -= 1 + # NOTE(TheJulia): local_gb is only used for partition images, + # and is ignored with Whole Disk Images. With movement to Whole + # Disk images, this is tech debt and should be removed at some point. baremetal_client.node.update( node.uuid, [{'op': 'add', 'path': '/properties/root_device', 'value': hint},