Merge "[ironic] Ensure unprovision happens for new states"

This commit is contained in:
Zuul
2025-11-24 10:43:39 +00:00
committed by Gerrit Code Review
4 changed files with 157 additions and 8 deletions

View File

@@ -1746,6 +1746,91 @@ class IronicDriverTestCase(test.NoDBTestCase):
for state in ironic_states.PROVISION_STATE_LIST:
self._test_destroy(state)
@mock.patch.object(ironic_driver.IronicDriver,
'_remove_instance_info_from_node')
@mock.patch.object(ironic_driver.IronicDriver, '_cleanup_deploy')
def test_destroy_servicing_states_unprovision(self, mock_cleanup_deploy,
mock_remove_instance_info):
"""Test that servicing and deployhold states trigger unprovisioning.
This is a regression test for bug 2131960 where nodes in servicing
states (SERVICING, SERVICEWAIT, SERVICEHOLD, SERVICEFAIL) and
DEPLOYHOLD were not being properly unprovisioned when destroyed.
"""
# Test all servicing-related states and deployhold
servicing_states = [
ironic_states.DEPLOYHOLD,
ironic_states.SERVICING,
ironic_states.SERVICEWAIT,
ironic_states.SERVICEFAIL,
ironic_states.SERVICEHOLD,
]
for state in servicing_states:
mock_cleanup_deploy.reset_mock()
mock_remove_instance_info.reset_mock()
self.mock_conn.set_node_provision_state.reset_mock()
node_id = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
network_info = 'foo'
node = _get_cached_node(
driver='fake', id=node_id, provision_state=state)
instance = fake_instance.fake_instance_obj(self.ctx, node=node_id)
def fake_set_node_provision_state(*_):
node.provision_state = None
self.mock_conn.nodes.return_value = iter([node])
self.mock_conn.set_node_provision_state.side_effect = \
fake_set_node_provision_state
self.driver.destroy(self.ctx, instance, network_info, None)
# All these states should trigger unprovisioning, not cleanup
self.mock_conn.set_node_provision_state.assert_called_once_with(
node_id, 'deleted',
)
self.assertFalse(mock_remove_instance_info.called)
self.assertFalse(mock_cleanup_deploy.called)
@mock.patch.object(ironic_driver.IronicDriver,
'_remove_instance_info_from_node')
@mock.patch.object(ironic_driver.IronicDriver, '_cleanup_deploy')
def test_destroy_unknown_state_unprovision(self, mock_cleanup_deploy,
mock_remove_instance_info):
"""Test that unknown provision states trigger unprovisioning.
This is a regression test for bug 2131960. As a safety mechanism,
nodes in provision states that are not recognized (not in
PROVISION_STATE_LIST) should trigger unprovisioning rather than
cleanup to ensure proper node teardown even when encountering
unexpected or future Ironic states.
"""
# Test with a completely unknown state that doesn't exist in
# PROVISION_STATE_LIST
node_id = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
network_info = 'foo'
unknown_state = 'unknown-future-state'
node = _get_cached_node(
driver='fake', id=node_id, provision_state=unknown_state)
instance = fake_instance.fake_instance_obj(self.ctx, node=node_id)
def fake_set_node_provision_state(*_):
node.provision_state = None
self.mock_conn.nodes.return_value = iter([node])
self.mock_conn.set_node_provision_state.side_effect = \
fake_set_node_provision_state
self.driver.destroy(self.ctx, instance, network_info, None)
# Unknown states should trigger unprovisioning for safety
self.mock_conn.set_node_provision_state.assert_called_once_with(
node_id, 'deleted',
)
self.assertFalse(mock_remove_instance_info.called)
self.assertFalse(mock_cleanup_deploy.called)
@mock.patch.object(ironic_driver.IronicDriver,
'_validate_instance_and_node')
@mock.patch.object(ironic_driver.IronicDriver,

View File

@@ -71,10 +71,12 @@ _POWER_STATE_MAP = {
_UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL,
ironic_states.ERROR, ironic_states.DEPLOYWAIT,
ironic_states.DEPLOYING, ironic_states.RESCUE,
ironic_states.RESCUING, ironic_states.RESCUEWAIT,
ironic_states.RESCUEFAIL, ironic_states.UNRESCUING,
ironic_states.UNRESCUEFAIL)
ironic_states.DEPLOYING, ironic_states.DEPLOYHOLD,
ironic_states.RESCUE, ironic_states.RESCUING,
ironic_states.RESCUEWAIT, ironic_states.RESCUEFAIL,
ironic_states.UNRESCUING, ironic_states.UNRESCUEFAIL,
ironic_states.SERVICING, ironic_states.SERVICEWAIT,
ironic_states.SERVICEFAIL, ironic_states.SERVICEHOLD)
_NODE_FIELDS = ('uuid', 'power_state', 'target_power_state', 'provision_state',
'target_provision_state', 'last_error', 'maintenance',
@@ -1370,13 +1372,18 @@ class IronicDriver(virt_driver.ComputeDriver):
# without raising any exceptions.
return
if node.provision_state in _UNPROVISION_STATES:
if (node.provision_state in _UNPROVISION_STATES or
node.provision_state not in ironic_states.PROVISION_STATE_LIST):
# NOTE(mgoddard): Ironic's node tear-down procedure includes all of
# the things we do in _cleanup_deploy, so let's not repeat them
# here. Doing so would also race with the node cleaning process,
# which may acquire the node lock and prevent us from making
# changes to the node. See
# https://bugs.launchpad.net/nova/+bug/2019977
# NOTE(JayF): For maximum safety, we call unprovision also in cases
# where we see an unrecognized state. See
# https://bugs.launchpad.net/nova/+bug/2131960 for more
# information.
self._unprovision(instance, node)
else:
self._cleanup_deploy(node, instance, network_info)

View File

@@ -110,6 +110,9 @@ the driver to finish a cleaning step.
CLEANFAIL = 'clean failed'
""" Node failed cleaning. This requires operator intervention to resolve. """
CLEANHOLD = 'clean hold'
""" Node is being held by a cleaning step. """
ERROR = 'error'
""" An error occurred during node processing.
@@ -131,6 +134,8 @@ inspected node shall transition to MANAGEABLE status.
INSPECTFAIL = 'inspect failed'
""" Node inspection failed. """
INSPECTWAIT = 'inspect wait'
""" Node is waiting for inspection callback. """
RESCUE = 'rescue'
""" Node is in rescue mode.
@@ -158,6 +163,30 @@ UNRESCUEFAIL = 'unrescue failed'
UNRESCUING = "unrescuing"
""" Node is unrescuing. """
DEPLOYHOLD = 'deploy hold'
""" Node is being held by a deploy step. """
SERVICING = 'servicing'
""" Node is actively being changed by a service step. """
SERVICEWAIT = 'service wait'
""" Node is waiting for an operation to complete. """
SERVICEFAIL = 'service failed'
""" Node has failed in a service step execution. """
SERVICEHOLD = 'service hold'
""" Node is being held for direct intervention from a service step. """
ENROLL = 'enroll'
""" Node being entrolled into Ironic.
Nova should never see a node in this state."""
VERIFYING = 'verifying'
""" Node driver attributes being verified.
Nova should never see a node in this state."""
##############
# Power states
##############
@@ -176,7 +205,10 @@ REBOOT = 'rebooting'
##################
PROVISION_STATE_LIST = (NOSTATE, MANAGEABLE, AVAILABLE, ACTIVE, DEPLOYWAIT,
DEPLOYING, DEPLOYFAIL, DEPLOYDONE, DELETING, DELETED,
CLEANING, CLEANWAIT, CLEANFAIL, ERROR, REBUILD,
INSPECTING, INSPECTFAIL)
DEPLOYING, DEPLOYFAIL, DEPLOYDONE, DEPLOYHOLD,
DELETING, DELETED, CLEANING, CLEANWAIT, CLEANFAIL,
CLEANHOLD, ERROR, REBUILD, INSPECTING, INSPECTFAIL,
INSPECTWAIT, RESCUE, RESCUEFAIL, RESCUEWAIT, RESCUING,
SERVICING, SERVICEWAIT, SERVICEFAIL, SERVICEHOLD,
UNRESCUEFAIL, UNRESCUING, ENROLL, VERIFYING)
""" A list of all provision states. """

View File

@@ -0,0 +1,25 @@
---
security:
- |
[`bug 2131960 <https://bugs.launchpad.net/nova/+bug/2131960>`_]
Fixed improper teardown of Ironic nodes in servicing states.
Previously, when Nova's virt driver called ``destroy()`` on Ironic
nodes in certain states (``deploy hold``, ``servicing``, ``service wait``,
``service hold``, ``service failed``), these nodes would fail to undergo
proper teardown, resulting in orphaned instances where instance metadata
was removed but nodes remained unprovisioned in Ironic. In flat networking
configurations, this could allow nodes traversing servicing states to
briefly access tenant networks. In Neutron networking configurations,
while VIFs would detach correctly, nodes would remain in Ironic with a
shutdown IPA, and future deprovisioning attempts would fail due to missing
networking metadata. These states are now properly handled to ensure
complete node deprovisioning.
fixes:
- |
[`bug 2131960 <https://bugs.launchpad.net/nova/+bug/2131960>`_]
Fixed a bug where Ironic nodes in servicing-related provision states
(``deploy hold``, ``servicing``, ``service wait``, ``service hold``,
``service failed``) were not properly unprovisioned when instances were
destroyed. The Nova Ironic virt driver now correctly triggers
unprovisioning for nodes in these states, preventing orphaned node
configurations in Ironic.