From 4baa4fee12f80f264076cfdee61bd0a36aed0350 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Fri, 3 Jul 2020 11:41:29 +0200 Subject: [PATCH] Clean up nodes in DELETING on conductor restart Change-Id: Iad6cb02ee4f17b7c5987719b3a0603df210c840d (cherry picked from commit 4f6374a209dcf96ecf7d55da4999b045ae4061ef) --- ironic/common/states.py | 4 ++-- ironic/conductor/manager.py | 2 +- ironic/tests/unit/conductor/test_base_manager.py | 1 + ironic/tests/unit/conductor/test_manager.py | 2 +- releasenotes/notes/deleting-dcdb9cf0d2a6a1a6.yaml | 4 ++++ 5 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/deleting-dcdb9cf0d2a6a1a6.yaml diff --git a/ironic/common/states.py b/ironic/common/states.py index cefdbd8382..567ce0eea3 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -235,7 +235,7 @@ UNSTABLE_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT, VERIFYING, """States that can be changed without external request.""" STUCK_STATES_TREATED_AS_FAIL = (DEPLOYING, CLEANING, VERIFYING, INSPECTING, - ADOPTING, RESCUING, UNRESCUING) + ADOPTING, RESCUING, UNRESCUING, DELETING) """States that cannot be resumed once a conductor dies. If a node gets stuck with one of these states for some reason @@ -384,7 +384,7 @@ machine.add_transition(DEPLOYWAIT, DELETING, 'delete') machine.add_transition(DEPLOYFAIL, DELETING, 'delete') # This state can also transition to error -machine.add_transition(DELETING, ERROR, 'error') +machine.add_transition(DELETING, ERROR, 'fail') # When finished deleting, a node will begin cleaning machine.add_transition(DELETING, CLEANING, 'clean') diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 4c218b3504..dcd973b368 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1011,7 +1011,7 @@ class ConductorManager(base_manager.BaseConductorManager): LOG.exception('Error in tear_down of node %(node)s: %(err)s', {'node': node.uuid, 'err': e}) node.last_error = _("Failed to tear down. Error: %s") % e - task.process_event('error') + task.process_event('fail') else: # NOTE(tenbrae): When tear_down finishes, the deletion is done, # cleaning will start next diff --git a/ironic/tests/unit/conductor/test_base_manager.py b/ironic/tests/unit/conductor/test_base_manager.py index 49cacbff5b..2fd305dd1d 100644 --- a/ironic/tests/unit/conductor/test_base_manager.py +++ b/ironic/tests/unit/conductor/test_base_manager.py @@ -230,6 +230,7 @@ class StartStopTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): (states.ADOPTING, states.ADOPTFAIL), (states.RESCUING, states.RESCUEFAIL), (states.UNRESCUING, states.UNRESCUEFAIL), + (states.DELETING, states.ERROR), ] nodes = [obj_utils.create_test_node(self.context, uuid=uuid.uuid4(), driver='fake-hardware', diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 6e88255839..2846ff9ff5 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -2507,7 +2507,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): def test_continue_node_clean_wrong_state(self, mock_spawn): # Test the appropriate exception is raised if node isn't already # in CLEANWAIT state - prv_state = states.DELETING + prv_state = states.ACTIVE tgt_prv_state = states.AVAILABLE node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=prv_state, diff --git a/releasenotes/notes/deleting-dcdb9cf0d2a6a1a6.yaml b/releasenotes/notes/deleting-dcdb9cf0d2a6a1a6.yaml new file mode 100644 index 0000000000..3b53b2e9a8 --- /dev/null +++ b/releasenotes/notes/deleting-dcdb9cf0d2a6a1a6.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Cleans up nodes stuck in the ``deleting`` state on conductor restart.