Merge "Ironic nodes with instance reserved in placement"

This commit is contained in:
Zuul 2022-12-14 14:11:40 +00:00 committed by Gerrit Code Review
commit d7de0c121a
4 changed files with 89 additions and 11 deletions

View File

@ -416,6 +416,21 @@ compatibility on the destination host during live migration.
help="""
When this is enabled, it will skip version-checking of hypervisors
during live migration.
"""),
cfg.BoolOpt(
'skip_reserve_in_use_ironic_nodes',
default=False,
help="""
This may be useful if you use the Ironic driver, but don't have
automatic cleaning enabled in Ironic. Nova, by default, will mark
Ironic nodes as reserved as soon as they are in use. When you free
the Ironic node (by deleting the nova instance) it takes a while
for Nova to un-reserve that Ironic node in placement. Usually this
is a good idea, because it avoids placement providing an Ironic
as a valid candidate when it is still being cleaned.
Howerver, if you don't use automatic cleaning, it can cause an
extra delay before and Ironic node is available for building a
new Nova instance.
"""),
]

View File

@ -932,6 +932,48 @@ class IronicDriverTestCase(test.NoDBTestCase):
self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename)
expected = {
'CUSTOM_IRON_NFV': {
'total': 1,
'reserved': 1,
'min_unit': 1,
'max_unit': 1,
'step_size': 1,
'allocation_ratio': 1.0,
},
}
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).inventory
self.assertEqual(expected, result)
@mock.patch.object(ironic_driver.IronicDriver,
'_node_resources_used', return_value=True)
@mock.patch.object(ironic_driver.IronicDriver,
'_node_resources_unavailable', return_value=False)
@mock.patch.object(ironic_driver.IronicDriver, '_node_resource')
@mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache')
def test_update_provider_tree_with_rc_occupied_workaround(self,
mock_nfc, mock_nr, mock_res_unavail, mock_res_used):
"""Ensure that when a node is used, we report the inventory matching
the consumed resources.
"""
self.flags(skip_reserve_in_use_ironic_nodes=True,
group="workarounds")
mock_nr.return_value = {
'vcpus': 24,
'vcpus_used': 24,
'memory_mb': 1024,
'memory_mb_used': 1024,
'local_gb': 100,
'local_gb_used': 100,
'resource_class': 'iron-nfv',
}
self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename)
expected = {
'CUSTOM_IRON_NFV': {
'total': 1,
@ -945,7 +987,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
self.assertFalse(mock_res_unavail.called)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).inventory
self.assertEqual(expected, result)
@ -1016,7 +1058,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
self.assertFalse(mock_res_unavail.called)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).traits
self.assertEqual(set(), result)
@ -1048,7 +1090,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
self.assertFalse(mock_res_unavail.called)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).traits
self.assertEqual(set(traits), result)

View File

@ -874,15 +874,25 @@ class IronicDriver(virt_driver.ComputeDriver):
"""
# nodename is the ironic node's UUID.
node = self._node_from_cache(nodename)
reserved = False
if (not self._node_resources_used(node) and
self._node_resources_unavailable(node)):
LOG.debug('Node %(node)s is not ready for a deployment, '
'reporting resources as reserved for it. Node\'s '
'provision state is %(prov)s, power state is '
'%(power)s and maintenance is %(maint)s.',
{'node': node.uuid, 'prov': node.provision_state,
'power': node.power_state, 'maint': node.maintenance})
if self._node_resources_unavailable(node):
# Operators might mark a node as in maintainance,
# even when an instance is on the node,
# either way lets mark this as reserved
reserved = True
if (self._node_resources_used(node) and
not CONF.workarounds.skip_reserve_in_use_ironic_nodes):
# Make resources as reserved once we have
# and instance here.
# When the allocation is deleted, most likely
# automatic clean will start, so we keep the node
# reserved until it becomes available again.
# In the case without automatic clean, once
# the allocation is removed in placement it
# also stays as reserved until we notice on
# the next periodic its actually available.
reserved = True
info = self._node_resource(node)

View File

@ -0,0 +1,11 @@
---
fixes:
- |
Fixed when placement returns ironic nodes that have just started automatic
cleaning as possible valid candidates. This is done by marking all ironic
nodes with an instance on them as reserved, such that nova only makes them
available once we have double checked Ironic reports the node as available.
If you don't have automatic cleaning on, this might mean it takes longer
than normal for Ironic nodes to become available for new instances.
If you want the old behaviour use the following workaround config:
`[workarounds]skip_reserve_in_use_ironic_nodes=true`