diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 2ec53282cdbe..1116664d36dc 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -416,6 +416,21 @@ compatibility on the destination host during live migration. help=""" When this is enabled, it will skip version-checking of hypervisors during live migration. +"""), + cfg.BoolOpt( + 'skip_reserve_in_use_ironic_nodes', + default=False, + help=""" +This may be useful if you use the Ironic driver, but don't have +automatic cleaning enabled in Ironic. Nova, by default, will mark +Ironic nodes as reserved as soon as they are in use. When you free +the Ironic node (by deleting the nova instance) it takes a while +for Nova to un-reserve that Ironic node in placement. Usually this +is a good idea, because it avoids placement providing an Ironic +as a valid candidate when it is still being cleaned. +Howerver, if you don't use automatic cleaning, it can cause an +extra delay before and Ironic node is available for building a +new Nova instance. """), ] diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py index 6ac7ca464e56..958623f31a24 100644 --- a/nova/tests/unit/virt/ironic/test_driver.py +++ b/nova/tests/unit/virt/ironic/test_driver.py @@ -932,6 +932,48 @@ class IronicDriverTestCase(test.NoDBTestCase): self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename) + expected = { + 'CUSTOM_IRON_NFV': { + 'total': 1, + 'reserved': 1, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + mock_nfc.assert_called_once_with(mock.sentinel.nodename) + mock_nr.assert_called_once_with(mock_nfc.return_value) + mock_res_used.assert_called_once_with(mock_nfc.return_value) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) + result = self.ptree.data(mock.sentinel.nodename).inventory + self.assertEqual(expected, result) + + @mock.patch.object(ironic_driver.IronicDriver, + '_node_resources_used', return_value=True) + @mock.patch.object(ironic_driver.IronicDriver, + '_node_resources_unavailable', return_value=False) + @mock.patch.object(ironic_driver.IronicDriver, '_node_resource') + @mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache') + def test_update_provider_tree_with_rc_occupied_workaround(self, + mock_nfc, mock_nr, mock_res_unavail, mock_res_used): + """Ensure that when a node is used, we report the inventory matching + the consumed resources. + """ + self.flags(skip_reserve_in_use_ironic_nodes=True, + group="workarounds") + mock_nr.return_value = { + 'vcpus': 24, + 'vcpus_used': 24, + 'memory_mb': 1024, + 'memory_mb_used': 1024, + 'local_gb': 100, + 'local_gb_used': 100, + 'resource_class': 'iron-nfv', + } + + self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename) + expected = { 'CUSTOM_IRON_NFV': { 'total': 1, @@ -945,7 +987,7 @@ class IronicDriverTestCase(test.NoDBTestCase): mock_nfc.assert_called_once_with(mock.sentinel.nodename) mock_nr.assert_called_once_with(mock_nfc.return_value) mock_res_used.assert_called_once_with(mock_nfc.return_value) - self.assertFalse(mock_res_unavail.called) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) result = self.ptree.data(mock.sentinel.nodename).inventory self.assertEqual(expected, result) @@ -1016,7 +1058,7 @@ class IronicDriverTestCase(test.NoDBTestCase): mock_nfc.assert_called_once_with(mock.sentinel.nodename) mock_nr.assert_called_once_with(mock_nfc.return_value) mock_res_used.assert_called_once_with(mock_nfc.return_value) - self.assertFalse(mock_res_unavail.called) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) result = self.ptree.data(mock.sentinel.nodename).traits self.assertEqual(set(), result) @@ -1048,7 +1090,7 @@ class IronicDriverTestCase(test.NoDBTestCase): mock_nfc.assert_called_once_with(mock.sentinel.nodename) mock_nr.assert_called_once_with(mock_nfc.return_value) mock_res_used.assert_called_once_with(mock_nfc.return_value) - self.assertFalse(mock_res_unavail.called) + mock_res_unavail.assert_called_once_with(mock_nfc.return_value) result = self.ptree.data(mock.sentinel.nodename).traits self.assertEqual(set(traits), result) diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 7496db5a7cd0..5583ac52360e 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -874,15 +874,25 @@ class IronicDriver(virt_driver.ComputeDriver): """ # nodename is the ironic node's UUID. node = self._node_from_cache(nodename) + reserved = False - if (not self._node_resources_used(node) and - self._node_resources_unavailable(node)): - LOG.debug('Node %(node)s is not ready for a deployment, ' - 'reporting resources as reserved for it. Node\'s ' - 'provision state is %(prov)s, power state is ' - '%(power)s and maintenance is %(maint)s.', - {'node': node.uuid, 'prov': node.provision_state, - 'power': node.power_state, 'maint': node.maintenance}) + if self._node_resources_unavailable(node): + # Operators might mark a node as in maintainance, + # even when an instance is on the node, + # either way lets mark this as reserved + reserved = True + + if (self._node_resources_used(node) and + not CONF.workarounds.skip_reserve_in_use_ironic_nodes): + # Make resources as reserved once we have + # and instance here. + # When the allocation is deleted, most likely + # automatic clean will start, so we keep the node + # reserved until it becomes available again. + # In the case without automatic clean, once + # the allocation is removed in placement it + # also stays as reserved until we notice on + # the next periodic its actually available. reserved = True info = self._node_resource(node) diff --git a/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml new file mode 100644 index 000000000000..4fd2cc1ca9f7 --- /dev/null +++ b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Fixed when placement returns ironic nodes that have just started automatic + cleaning as possible valid candidates. This is done by marking all ironic + nodes with an instance on them as reserved, such that nova only makes them + available once we have double checked Ironic reports the node as available. + If you don't have automatic cleaning on, this might mean it takes longer + than normal for Ironic nodes to become available for new instances. + If you want the old behaviour use the following workaround config: + `[workarounds]skip_reserve_in_use_ironic_nodes=true`