Clean up nodes in DELETING on conductor restart

Change-Id: Iad6cb02ee4f17b7c5987719b3a0603df210c840d
(cherry picked from commit 4f6374a209)
This commit is contained in:
Dmitry Tantsur 2020-07-03 11:41:29 +02:00
parent f1b2cb7454
commit 549af3cec2
5 changed files with 9 additions and 4 deletions

View File

@ -235,7 +235,7 @@ UNSTABLE_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT, VERIFYING,
"""States that can be changed without external request."""
STUCK_STATES_TREATED_AS_FAIL = (DEPLOYING, CLEANING, VERIFYING, INSPECTING,
ADOPTING, RESCUING, UNRESCUING)
ADOPTING, RESCUING, UNRESCUING, DELETING)
"""States that cannot be resumed once a conductor dies.
If a node gets stuck with one of these states for some reason
@ -383,7 +383,7 @@ machine.add_transition(DEPLOYWAIT, DELETING, 'delete')
machine.add_transition(DEPLOYFAIL, DELETING, 'delete')
# This state can also transition to error
machine.add_transition(DELETING, ERROR, 'error')
machine.add_transition(DELETING, ERROR, 'fail')
# When finished deleting, a node will begin cleaning
machine.add_transition(DELETING, CLEANING, 'clean')

View File

@ -1033,7 +1033,7 @@ class ConductorManager(base_manager.BaseConductorManager):
LOG.exception('Error in tear_down of node %(node)s: %(err)s',
{'node': node.uuid, 'err': e})
node.last_error = _("Failed to tear down. Error: %s") % e
task.process_event('error')
task.process_event('fail')
else:
# NOTE(deva): When tear_down finishes, the deletion is done,
# cleaning will start next

View File

@ -217,6 +217,7 @@ class StartStopTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
(states.ADOPTING, states.ADOPTFAIL),
(states.RESCUING, states.RESCUEFAIL),
(states.UNRESCUING, states.UNRESCUEFAIL),
(states.DELETING, states.ERROR),
]
nodes = [obj_utils.create_test_node(self.context, uuid=uuid.uuid4(),
driver='fake-hardware',

View File

@ -3434,7 +3434,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
def test_continue_node_clean_wrong_state(self, mock_spawn):
# Test the appropriate exception is raised if node isn't already
# in CLEANWAIT state
prv_state = states.DELETING
prv_state = states.ACTIVE
tgt_prv_state = states.AVAILABLE
node = obj_utils.create_test_node(self.context, driver='fake-hardware',
provision_state=prv_state,

View File

@ -0,0 +1,4 @@
---
fixes:
- |
Cleans up nodes stuck in the ``deleting`` state on conductor restart.