Clean up nodes in DELETING on conductor restart

Change-Id: Iad6cb02ee4f17b7c5987719b3a0603df210c840d
This commit is contained in:
Dmitry Tantsur 2020-07-03 11:41:29 +02:00
parent 5026854e31
commit 4f6374a209
5 changed files with 9 additions and 4 deletions

View File

@ -235,7 +235,7 @@ UNSTABLE_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT, VERIFYING,
"""States that can be changed without external request.""" """States that can be changed without external request."""
STUCK_STATES_TREATED_AS_FAIL = (DEPLOYING, CLEANING, VERIFYING, INSPECTING, STUCK_STATES_TREATED_AS_FAIL = (DEPLOYING, CLEANING, VERIFYING, INSPECTING,
ADOPTING, RESCUING, UNRESCUING) ADOPTING, RESCUING, UNRESCUING, DELETING)
"""States that cannot be resumed once a conductor dies. """States that cannot be resumed once a conductor dies.
If a node gets stuck with one of these states for some reason If a node gets stuck with one of these states for some reason
@ -384,7 +384,7 @@ machine.add_transition(DEPLOYWAIT, DELETING, 'delete')
machine.add_transition(DEPLOYFAIL, DELETING, 'delete') machine.add_transition(DEPLOYFAIL, DELETING, 'delete')
# This state can also transition to error # This state can also transition to error
machine.add_transition(DELETING, ERROR, 'error') machine.add_transition(DELETING, ERROR, 'fail')
# When finished deleting, a node will begin cleaning # When finished deleting, a node will begin cleaning
machine.add_transition(DELETING, CLEANING, 'clean') machine.add_transition(DELETING, CLEANING, 'clean')

View File

@ -1011,7 +1011,7 @@ class ConductorManager(base_manager.BaseConductorManager):
LOG.exception('Error in tear_down of node %(node)s: %(err)s', LOG.exception('Error in tear_down of node %(node)s: %(err)s',
{'node': node.uuid, 'err': e}) {'node': node.uuid, 'err': e})
node.last_error = _("Failed to tear down. Error: %s") % e node.last_error = _("Failed to tear down. Error: %s") % e
task.process_event('error') task.process_event('fail')
else: else:
# NOTE(tenbrae): When tear_down finishes, the deletion is done, # NOTE(tenbrae): When tear_down finishes, the deletion is done,
# cleaning will start next # cleaning will start next

View File

@ -235,6 +235,7 @@ class StartStopTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
(states.ADOPTING, states.ADOPTFAIL), (states.ADOPTING, states.ADOPTFAIL),
(states.RESCUING, states.RESCUEFAIL), (states.RESCUING, states.RESCUEFAIL),
(states.UNRESCUING, states.UNRESCUEFAIL), (states.UNRESCUING, states.UNRESCUEFAIL),
(states.DELETING, states.ERROR),
] ]
nodes = [obj_utils.create_test_node(self.context, uuid=uuid.uuid4(), nodes = [obj_utils.create_test_node(self.context, uuid=uuid.uuid4(),
driver='fake-hardware', driver='fake-hardware',

View File

@ -2527,7 +2527,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
def test_continue_node_clean_wrong_state(self, mock_spawn): def test_continue_node_clean_wrong_state(self, mock_spawn):
# Test the appropriate exception is raised if node isn't already # Test the appropriate exception is raised if node isn't already
# in CLEANWAIT state # in CLEANWAIT state
prv_state = states.DELETING prv_state = states.ACTIVE
tgt_prv_state = states.AVAILABLE tgt_prv_state = states.AVAILABLE
node = obj_utils.create_test_node(self.context, driver='fake-hardware', node = obj_utils.create_test_node(self.context, driver='fake-hardware',
provision_state=prv_state, provision_state=prv_state,

View File

@ -0,0 +1,4 @@
---
fixes:
- |
Cleans up nodes stuck in the ``deleting`` state on conductor restart.