diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py index fc2ae12917b3..988f61a62d43 100644 --- a/nova/tests/unit/virt/ironic/test_driver.py +++ b/nova/tests/unit/virt/ironic/test_driver.py @@ -1746,6 +1746,91 @@ class IronicDriverTestCase(test.NoDBTestCase): for state in ironic_states.PROVISION_STATE_LIST: self._test_destroy(state) + @mock.patch.object(ironic_driver.IronicDriver, + '_remove_instance_info_from_node') + @mock.patch.object(ironic_driver.IronicDriver, '_cleanup_deploy') + def test_destroy_servicing_states_unprovision(self, mock_cleanup_deploy, + mock_remove_instance_info): + """Test that servicing and deployhold states trigger unprovisioning. + + This is a regression test for bug 2131960 where nodes in servicing + states (SERVICING, SERVICEWAIT, SERVICEHOLD, SERVICEFAIL) and + DEPLOYHOLD were not being properly unprovisioned when destroyed. + """ + # Test all servicing-related states and deployhold + servicing_states = [ + ironic_states.DEPLOYHOLD, + ironic_states.SERVICING, + ironic_states.SERVICEWAIT, + ironic_states.SERVICEFAIL, + ironic_states.SERVICEHOLD, + ] + + for state in servicing_states: + mock_cleanup_deploy.reset_mock() + mock_remove_instance_info.reset_mock() + self.mock_conn.set_node_provision_state.reset_mock() + + node_id = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' + network_info = 'foo' + + node = _get_cached_node( + driver='fake', id=node_id, provision_state=state) + instance = fake_instance.fake_instance_obj(self.ctx, node=node_id) + + def fake_set_node_provision_state(*_): + node.provision_state = None + + self.mock_conn.nodes.return_value = iter([node]) + self.mock_conn.set_node_provision_state.side_effect = \ + fake_set_node_provision_state + self.driver.destroy(self.ctx, instance, network_info, None) + + # All these states should trigger unprovisioning, not cleanup + self.mock_conn.set_node_provision_state.assert_called_once_with( + node_id, 'deleted', + ) + self.assertFalse(mock_remove_instance_info.called) + self.assertFalse(mock_cleanup_deploy.called) + + @mock.patch.object(ironic_driver.IronicDriver, + '_remove_instance_info_from_node') + @mock.patch.object(ironic_driver.IronicDriver, '_cleanup_deploy') + def test_destroy_unknown_state_unprovision(self, mock_cleanup_deploy, + mock_remove_instance_info): + """Test that unknown provision states trigger unprovisioning. + + This is a regression test for bug 2131960. As a safety mechanism, + nodes in provision states that are not recognized (not in + PROVISION_STATE_LIST) should trigger unprovisioning rather than + cleanup to ensure proper node teardown even when encountering + unexpected or future Ironic states. + """ + # Test with a completely unknown state that doesn't exist in + # PROVISION_STATE_LIST + node_id = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' + network_info = 'foo' + unknown_state = 'unknown-future-state' + + node = _get_cached_node( + driver='fake', id=node_id, provision_state=unknown_state) + instance = fake_instance.fake_instance_obj(self.ctx, node=node_id) + + def fake_set_node_provision_state(*_): + node.provision_state = None + + self.mock_conn.nodes.return_value = iter([node]) + self.mock_conn.set_node_provision_state.side_effect = \ + fake_set_node_provision_state + self.driver.destroy(self.ctx, instance, network_info, None) + + # Unknown states should trigger unprovisioning for safety + self.mock_conn.set_node_provision_state.assert_called_once_with( + node_id, 'deleted', + ) + self.assertFalse(mock_remove_instance_info.called) + self.assertFalse(mock_cleanup_deploy.called) + @mock.patch.object(ironic_driver.IronicDriver, '_validate_instance_and_node') @mock.patch.object(ironic_driver.IronicDriver, diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 6da6c457ee5b..647fca237032 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -71,10 +71,12 @@ _POWER_STATE_MAP = { _UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL, ironic_states.ERROR, ironic_states.DEPLOYWAIT, - ironic_states.DEPLOYING, ironic_states.RESCUE, - ironic_states.RESCUING, ironic_states.RESCUEWAIT, - ironic_states.RESCUEFAIL, ironic_states.UNRESCUING, - ironic_states.UNRESCUEFAIL) + ironic_states.DEPLOYING, ironic_states.DEPLOYHOLD, + ironic_states.RESCUE, ironic_states.RESCUING, + ironic_states.RESCUEWAIT, ironic_states.RESCUEFAIL, + ironic_states.UNRESCUING, ironic_states.UNRESCUEFAIL, + ironic_states.SERVICING, ironic_states.SERVICEWAIT, + ironic_states.SERVICEFAIL, ironic_states.SERVICEHOLD) _NODE_FIELDS = ('uuid', 'power_state', 'target_power_state', 'provision_state', 'target_provision_state', 'last_error', 'maintenance', @@ -1370,13 +1372,18 @@ class IronicDriver(virt_driver.ComputeDriver): # without raising any exceptions. return - if node.provision_state in _UNPROVISION_STATES: + if (node.provision_state in _UNPROVISION_STATES or + node.provision_state not in ironic_states.PROVISION_STATE_LIST): # NOTE(mgoddard): Ironic's node tear-down procedure includes all of # the things we do in _cleanup_deploy, so let's not repeat them # here. Doing so would also race with the node cleaning process, # which may acquire the node lock and prevent us from making # changes to the node. See # https://bugs.launchpad.net/nova/+bug/2019977 + # NOTE(JayF): For maximum safety, we call unprovision also in cases + # where we see an unrecognized state. See + # https://bugs.launchpad.net/nova/+bug/2131960 for more + # information. self._unprovision(instance, node) else: self._cleanup_deploy(node, instance, network_info) diff --git a/nova/virt/ironic/ironic_states.py b/nova/virt/ironic/ironic_states.py index aafdd47e64a1..f2e93a902261 100644 --- a/nova/virt/ironic/ironic_states.py +++ b/nova/virt/ironic/ironic_states.py @@ -110,6 +110,9 @@ the driver to finish a cleaning step. CLEANFAIL = 'clean failed' """ Node failed cleaning. This requires operator intervention to resolve. """ +CLEANHOLD = 'clean hold' +""" Node is being held by a cleaning step. """ + ERROR = 'error' """ An error occurred during node processing. @@ -131,6 +134,8 @@ inspected node shall transition to MANAGEABLE status. INSPECTFAIL = 'inspect failed' """ Node inspection failed. """ +INSPECTWAIT = 'inspect wait' +""" Node is waiting for inspection callback. """ RESCUE = 'rescue' """ Node is in rescue mode. @@ -158,6 +163,30 @@ UNRESCUEFAIL = 'unrescue failed' UNRESCUING = "unrescuing" """ Node is unrescuing. """ +DEPLOYHOLD = 'deploy hold' +""" Node is being held by a deploy step. """ + +SERVICING = 'servicing' +""" Node is actively being changed by a service step. """ + +SERVICEWAIT = 'service wait' +""" Node is waiting for an operation to complete. """ + +SERVICEFAIL = 'service failed' +""" Node has failed in a service step execution. """ + +SERVICEHOLD = 'service hold' +""" Node is being held for direct intervention from a service step. """ + +ENROLL = 'enroll' +""" Node being entrolled into Ironic. +Nova should never see a node in this state.""" + +VERIFYING = 'verifying' +""" Node driver attributes being verified. +Nova should never see a node in this state.""" + + ############## # Power states ############## @@ -176,7 +205,10 @@ REBOOT = 'rebooting' ################## PROVISION_STATE_LIST = (NOSTATE, MANAGEABLE, AVAILABLE, ACTIVE, DEPLOYWAIT, - DEPLOYING, DEPLOYFAIL, DEPLOYDONE, DELETING, DELETED, - CLEANING, CLEANWAIT, CLEANFAIL, ERROR, REBUILD, - INSPECTING, INSPECTFAIL) + DEPLOYING, DEPLOYFAIL, DEPLOYDONE, DEPLOYHOLD, + DELETING, DELETED, CLEANING, CLEANWAIT, CLEANFAIL, + CLEANHOLD, ERROR, REBUILD, INSPECTING, INSPECTFAIL, + INSPECTWAIT, RESCUE, RESCUEFAIL, RESCUEWAIT, RESCUING, + SERVICING, SERVICEWAIT, SERVICEFAIL, SERVICEHOLD, + UNRESCUEFAIL, UNRESCUING, ENROLL, VERIFYING) """ A list of all provision states. """ diff --git a/releasenotes/notes/bug-2131960-servicing-states-446d0e58bb9aa5f2.yaml b/releasenotes/notes/bug-2131960-servicing-states-446d0e58bb9aa5f2.yaml new file mode 100644 index 000000000000..326b8a7c50b4 --- /dev/null +++ b/releasenotes/notes/bug-2131960-servicing-states-446d0e58bb9aa5f2.yaml @@ -0,0 +1,25 @@ +--- +security: + - | + [`bug 2131960 `_] + Fixed improper teardown of Ironic nodes in servicing states. + Previously, when Nova's virt driver called ``destroy()`` on Ironic + nodes in certain states (``deploy hold``, ``servicing``, ``service wait``, + ``service hold``, ``service failed``), these nodes would fail to undergo + proper teardown, resulting in orphaned instances where instance metadata + was removed but nodes remained unprovisioned in Ironic. In flat networking + configurations, this could allow nodes traversing servicing states to + briefly access tenant networks. In Neutron networking configurations, + while VIFs would detach correctly, nodes would remain in Ironic with a + shutdown IPA, and future deprovisioning attempts would fail due to missing + networking metadata. These states are now properly handled to ensure + complete node deprovisioning. +fixes: + - | + [`bug 2131960 `_] + Fixed a bug where Ironic nodes in servicing-related provision states + (``deploy hold``, ``servicing``, ``service wait``, ``service hold``, + ``service failed``) were not properly unprovisioned when instances were + destroyed. The Nova Ironic virt driver now correctly triggers + unprovisioning for nodes in these states, preventing orphaned node + configurations in Ironic.