From c9de185ea1ac1e8d4435c5863b2ad7cefdb28c76 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Wed, 16 Nov 2022 17:12:40 +0000 Subject: [PATCH] Ironic nodes with instance reserved in placement Currently, when you delete an ironic instance, we trigger and undeploy in ironic and we release our allocation in placement. We do this well before the ironic node is actually available. We have attempted to fix this my marking unavailable nodes as reserved in placement. This works great until you try and re-image lots of nodes. It turns out, ironic nodes that are waiting for their automatic clean to finish, are returned as a valid allocation candidates for quite some time. Eventually we mark then as reserved. This patch takes a strange approach, if we mark all nodes as reserved as soon as the instance lands, we close the race. That is, when the allocation is removed the node is still unavailable until the next update of placement is done and notices that the node has become available. That may or may not have been after automatic cleaning. The trade off is that when you don't have automatic cleaning, we wait a bit longer to notice the node is available again. Note, this is also useful when a broken Ironic node is marked as in-maintainance while it is in-use by a nova instance. In a similar way, we mark the Nova as reserved immmeidately, rather than first waiting for the instance to be deleted before reserving the resources in Placement. Closes-Bug: #1974070 Change-Id: Iab92124b5776a799c7f90d07281d28fcf191c8fe (cherry picked from commit 3c022e968375c1b2eadf3c2dd7190b9434c6d4c1) --- nova/conf/workarounds.py | 15 ++++++ nova/tests/unit/virt/ironic/test_driver.py | 48 +++++++++++++++++-- nova/virt/ironic/driver.py | 26 ++++++---- ...ronic-scheduler-race-08cf8aba0365f512.yaml | 11 +++++ 4 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 2ec53282cdbe..1116664d36dc 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -416,6 +416,21 @@ compatibility on the destination host during live migration. help=""" When this is enabled, it will skip version-checking of hypervisors during live migration. +"""), + cfg.BoolOpt( + 'skip_reserve_in_use_ironic_nodes', + default=False, + help=""" +This may be useful if you use the Ironic driver, but don't have +automatic cleaning enabled in Ironic. Nova, by default, will mark +Ironic nodes as reserved as soon as they are in use. When you free +the Ironic node (by deleting the nova instance) it takes a while +for Nova to un-reserve that Ironic node in placement. Usually this +is a good idea, because it avoids placement providing an Ironic +as a valid candidate when it is still being cleaned. +Howerver, if you don't use automatic cleaning, it can cause an +extra delay before and Ironic node is available for building a +new Nova instance. """), ] diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py index 6ac7ca464e56..958623f31a24 100644 --- a/nova/tests/unit/virt/ironic/test_driver.py +++ b/nova/tests/unit/virt/ironic/test_driver.py @@ -932,6 +932,48 @@ class IronicDriverTestCase(test.NoDBTestCase): self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename) + expected = { + 'CUSTOM_IRON_NFV': { + 'total': 1, + 'reserved': 1, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + mock_nfc.assert_called_once_with(mock.sentinel.nodename) + mock_nr.assert_called_once_with(mock_nfc.return_value) + mock_res_used.assert_called_once_with(mock_nfc.return_value) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) + result = self.ptree.data(mock.sentinel.nodename).inventory + self.assertEqual(expected, result) + + @mock.patch.object(ironic_driver.IronicDriver, + '_node_resources_used', return_value=True) + @mock.patch.object(ironic_driver.IronicDriver, + '_node_resources_unavailable', return_value=False) + @mock.patch.object(ironic_driver.IronicDriver, '_node_resource') + @mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache') + def test_update_provider_tree_with_rc_occupied_workaround(self, + mock_nfc, mock_nr, mock_res_unavail, mock_res_used): + """Ensure that when a node is used, we report the inventory matching + the consumed resources. + """ + self.flags(skip_reserve_in_use_ironic_nodes=True, + group="workarounds") + mock_nr.return_value = { + 'vcpus': 24, + 'vcpus_used': 24, + 'memory_mb': 1024, + 'memory_mb_used': 1024, + 'local_gb': 100, + 'local_gb_used': 100, + 'resource_class': 'iron-nfv', + } + + self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename) + expected = { 'CUSTOM_IRON_NFV': { 'total': 1, @@ -945,7 +987,7 @@ class IronicDriverTestCase(test.NoDBTestCase): mock_nfc.assert_called_once_with(mock.sentinel.nodename) mock_nr.assert_called_once_with(mock_nfc.return_value) mock_res_used.assert_called_once_with(mock_nfc.return_value) - self.assertFalse(mock_res_unavail.called) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) result = self.ptree.data(mock.sentinel.nodename).inventory self.assertEqual(expected, result) @@ -1016,7 +1058,7 @@ class IronicDriverTestCase(test.NoDBTestCase): mock_nfc.assert_called_once_with(mock.sentinel.nodename) mock_nr.assert_called_once_with(mock_nfc.return_value) mock_res_used.assert_called_once_with(mock_nfc.return_value) - self.assertFalse(mock_res_unavail.called) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) result = self.ptree.data(mock.sentinel.nodename).traits self.assertEqual(set(), result) @@ -1048,7 +1090,7 @@ class IronicDriverTestCase(test.NoDBTestCase): mock_nfc.assert_called_once_with(mock.sentinel.nodename) mock_nr.assert_called_once_with(mock_nfc.return_value) mock_res_used.assert_called_once_with(mock_nfc.return_value) - self.assertFalse(mock_res_unavail.called) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) result = self.ptree.data(mock.sentinel.nodename).traits self.assertEqual(set(traits), result) diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 7496db5a7cd0..5583ac52360e 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -874,15 +874,25 @@ class IronicDriver(virt_driver.ComputeDriver): """ # nodename is the ironic node's UUID. node = self._node_from_cache(nodename) + reserved = False - if (not self._node_resources_used(node) and - self._node_resources_unavailable(node)): - LOG.debug('Node %(node)s is not ready for a deployment, ' - 'reporting resources as reserved for it. Node\'s ' - 'provision state is %(prov)s, power state is ' - '%(power)s and maintenance is %(maint)s.', - {'node': node.uuid, 'prov': node.provision_state, - 'power': node.power_state, 'maint': node.maintenance}) + if self._node_resources_unavailable(node): + # Operators might mark a node as in maintainance, + # even when an instance is on the node, + # either way lets mark this as reserved + reserved = True + + if (self._node_resources_used(node) and + not CONF.workarounds.skip_reserve_in_use_ironic_nodes): + # Make resources as reserved once we have + # and instance here. + # When the allocation is deleted, most likely + # automatic clean will start, so we keep the node + # reserved until it becomes available again. + # In the case without automatic clean, once + # the allocation is removed in placement it + # also stays as reserved until we notice on + # the next periodic its actually available. reserved = True info = self._node_resource(node) diff --git a/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml new file mode 100644 index 000000000000..4fd2cc1ca9f7 --- /dev/null +++ b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Fixed when placement returns ironic nodes that have just started automatic + cleaning as possible valid candidates. This is done by marking all ironic + nodes with an instance on them as reserved, such that nova only makes them + available once we have double checked Ironic reports the node as available. + If you don't have automatic cleaning on, this might mean it takes longer + than normal for Ironic nodes to become available for new instances. + If you want the old behaviour use the following workaround config: + `[workarounds]skip_reserve_in_use_ironic_nodes=true`