Merge "Do not move nodes to CLEAN FAILED with empty last_error"
This commit is contained in:
commit
a3f854392a
@ -269,6 +269,9 @@ _FASTTRACK_LOOKUP_ALLOWED_STATES = (ENROLL, MANAGEABLE, AVAILABLE,
|
|||||||
FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES)
|
FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES)
|
||||||
"""States where API lookups are permitted with fast track enabled."""
|
"""States where API lookups are permitted with fast track enabled."""
|
||||||
|
|
||||||
|
FAILURE_STATES = frozenset((DEPLOYFAIL, CLEANFAIL, INSPECTFAIL,
|
||||||
|
RESCUEFAIL, UNRESCUEFAIL, ADOPTFAIL))
|
||||||
|
|
||||||
|
|
||||||
##############
|
##############
|
||||||
# Power states
|
# Power states
|
||||||
|
@ -248,12 +248,21 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None):
|
|||||||
task.process_event(event)
|
task.process_event(event)
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_error(node):
|
||||||
|
last_error = _('By request, the clean operation was aborted')
|
||||||
|
if node.clean_step:
|
||||||
|
last_error += (
|
||||||
|
_(' during or after the completion of step "%s"')
|
||||||
|
% conductor_steps.step_id(node.clean_step)
|
||||||
|
)
|
||||||
|
return last_error
|
||||||
|
|
||||||
|
|
||||||
@task_manager.require_exclusive_lock
|
@task_manager.require_exclusive_lock
|
||||||
def do_node_clean_abort(task, step_name=None):
|
def do_node_clean_abort(task):
|
||||||
"""Internal method to abort an ongoing operation.
|
"""Internal method to abort an ongoing operation.
|
||||||
|
|
||||||
:param task: a TaskManager instance with an exclusive lock
|
:param task: a TaskManager instance with an exclusive lock
|
||||||
:param step_name: The name of the clean step.
|
|
||||||
"""
|
"""
|
||||||
node = task.node
|
node = task.node
|
||||||
try:
|
try:
|
||||||
@ -271,12 +280,13 @@ def do_node_clean_abort(task, step_name=None):
|
|||||||
set_fail_state=False)
|
set_fail_state=False)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
last_error = get_last_error(node)
|
||||||
info_message = _('Clean operation aborted for node %s') % node.uuid
|
info_message = _('Clean operation aborted for node %s') % node.uuid
|
||||||
last_error = _('By request, the clean operation was aborted')
|
if node.clean_step:
|
||||||
if step_name:
|
info_message += (
|
||||||
msg = _(' after the completion of step "%s"') % step_name
|
_(' during or after the completion of step "%s"')
|
||||||
last_error += msg
|
% node.clean_step
|
||||||
info_message += msg
|
)
|
||||||
|
|
||||||
node.last_error = last_error
|
node.last_error = last_error
|
||||||
node.clean_step = None
|
node.clean_step = None
|
||||||
@ -318,7 +328,7 @@ def continue_node_clean(task):
|
|||||||
target_state = None
|
target_state = None
|
||||||
|
|
||||||
task.process_event('fail', target_state=target_state)
|
task.process_event('fail', target_state=target_state)
|
||||||
do_node_clean_abort(task, step_name)
|
do_node_clean_abort(task)
|
||||||
return
|
return
|
||||||
|
|
||||||
LOG.debug('The cleaning operation for node %(node)s was '
|
LOG.debug('The cleaning operation for node %(node)s was '
|
||||||
|
@ -1351,7 +1351,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
|||||||
callback=self._spawn_worker,
|
callback=self._spawn_worker,
|
||||||
call_args=(cleaning.do_node_clean_abort, task),
|
call_args=(cleaning.do_node_clean_abort, task),
|
||||||
err_handler=utils.provisioning_error_handler,
|
err_handler=utils.provisioning_error_handler,
|
||||||
target_state=target_state)
|
target_state=target_state,
|
||||||
|
last_error=cleaning.get_last_error(node))
|
||||||
return
|
return
|
||||||
|
|
||||||
if node.provision_state == states.RESCUEWAIT:
|
if node.provision_state == states.RESCUEWAIT:
|
||||||
|
@ -527,7 +527,8 @@ class TaskManager(object):
|
|||||||
self.release_resources()
|
self.release_resources()
|
||||||
|
|
||||||
def process_event(self, event, callback=None, call_args=None,
|
def process_event(self, event, callback=None, call_args=None,
|
||||||
call_kwargs=None, err_handler=None, target_state=None):
|
call_kwargs=None, err_handler=None, target_state=None,
|
||||||
|
last_error=None):
|
||||||
"""Process the given event for the task's current state.
|
"""Process the given event for the task's current state.
|
||||||
|
|
||||||
:param event: the name of the event to process
|
:param event: the name of the event to process
|
||||||
@ -540,6 +541,8 @@ class TaskManager(object):
|
|||||||
prev_target_state)
|
prev_target_state)
|
||||||
:param target_state: if specified, the target provision state for the
|
:param target_state: if specified, the target provision state for the
|
||||||
node. Otherwise, use the target state from the fsm
|
node. Otherwise, use the target state from the fsm
|
||||||
|
:param last_error: last error to set on the node together with
|
||||||
|
the state transition.
|
||||||
:raises: InvalidState if the event is not allowed by the associated
|
:raises: InvalidState if the event is not allowed by the associated
|
||||||
state machine
|
state machine
|
||||||
"""
|
"""
|
||||||
@ -572,13 +575,15 @@ class TaskManager(object):
|
|||||||
|
|
||||||
# set up the async worker
|
# set up the async worker
|
||||||
if callback:
|
if callback:
|
||||||
# clear the error if we're going to start work in a callback
|
# update the error if we're going to start work in a callback
|
||||||
self.node.last_error = None
|
self.node.last_error = last_error
|
||||||
if call_args is None:
|
if call_args is None:
|
||||||
call_args = ()
|
call_args = ()
|
||||||
if call_kwargs is None:
|
if call_kwargs is None:
|
||||||
call_kwargs = {}
|
call_kwargs = {}
|
||||||
self.spawn_after(callback, *call_args, **call_kwargs)
|
self.spawn_after(callback, *call_args, **call_kwargs)
|
||||||
|
elif last_error is not None:
|
||||||
|
self.node.last_error = last_error
|
||||||
|
|
||||||
# publish the state transition by saving the Node
|
# publish the state transition by saving the Node
|
||||||
self.node.save()
|
self.node.save()
|
||||||
|
@ -302,9 +302,11 @@ def node_power_action(task, new_state, timeout=None):
|
|||||||
|
|
||||||
# Set the target_power_state and clear any last_error, if we're
|
# Set the target_power_state and clear any last_error, if we're
|
||||||
# starting a new operation. This will expose to other processes
|
# starting a new operation. This will expose to other processes
|
||||||
# and clients that work is in progress.
|
# and clients that work is in progress. Keep the last_error intact
|
||||||
node['target_power_state'] = target_state
|
# if the power action happens as a result of a failure.
|
||||||
node['last_error'] = None
|
node.target_power_state = target_state
|
||||||
|
if node.provision_state not in states.FAILURE_STATES:
|
||||||
|
node.last_error = None
|
||||||
node.timestamp_driver_internal_info('last_power_state_change')
|
node.timestamp_driver_internal_info('last_power_state_change')
|
||||||
# NOTE(dtantsur): wipe token on shutting down, otherwise a reboot in
|
# NOTE(dtantsur): wipe token on shutting down, otherwise a reboot in
|
||||||
# fast-track (or an accidentally booted agent) will cause subsequent
|
# fast-track (or an accidentally booted agent) will cause subsequent
|
||||||
|
@ -1138,12 +1138,12 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
|
|||||||
|
|
||||||
class DoNodeCleanAbortTestCase(db_base.DbTestCase):
|
class DoNodeCleanAbortTestCase(db_base.DbTestCase):
|
||||||
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
|
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
|
||||||
def _test__do_node_clean_abort(self, step_name, tear_mock):
|
def _test_do_node_clean_abort(self, clean_step, tear_mock):
|
||||||
node = obj_utils.create_test_node(
|
node = obj_utils.create_test_node(
|
||||||
self.context, driver='fake-hardware',
|
self.context, driver='fake-hardware',
|
||||||
provision_state=states.CLEANFAIL,
|
provision_state=states.CLEANWAIT,
|
||||||
target_provision_state=states.AVAILABLE,
|
target_provision_state=states.AVAILABLE,
|
||||||
clean_step={'step': 'foo', 'abortable': True},
|
clean_step=clean_step,
|
||||||
driver_internal_info={
|
driver_internal_info={
|
||||||
'agent_url': 'some url',
|
'agent_url': 'some url',
|
||||||
'agent_secret_token': 'token',
|
'agent_secret_token': 'token',
|
||||||
@ -1153,11 +1153,11 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase):
|
|||||||
'skip_current_clean_step': True})
|
'skip_current_clean_step': True})
|
||||||
|
|
||||||
with task_manager.acquire(self.context, node.uuid) as task:
|
with task_manager.acquire(self.context, node.uuid) as task:
|
||||||
cleaning.do_node_clean_abort(task, step_name=step_name)
|
cleaning.do_node_clean_abort(task)
|
||||||
self.assertIsNotNone(task.node.last_error)
|
self.assertIsNotNone(task.node.last_error)
|
||||||
tear_mock.assert_called_once_with(task.driver.deploy, task)
|
tear_mock.assert_called_once_with(task.driver.deploy, task)
|
||||||
if step_name:
|
if clean_step:
|
||||||
self.assertIn(step_name, task.node.last_error)
|
self.assertIn(clean_step['step'], task.node.last_error)
|
||||||
# assert node's clean_step and metadata was cleaned up
|
# assert node's clean_step and metadata was cleaned up
|
||||||
self.assertEqual({}, task.node.clean_step)
|
self.assertEqual({}, task.node.clean_step)
|
||||||
self.assertNotIn('clean_step_index',
|
self.assertNotIn('clean_step_index',
|
||||||
@ -1173,11 +1173,12 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase):
|
|||||||
self.assertNotIn('agent_secret_token',
|
self.assertNotIn('agent_secret_token',
|
||||||
task.node.driver_internal_info)
|
task.node.driver_internal_info)
|
||||||
|
|
||||||
def test__do_node_clean_abort(self):
|
def test_do_node_clean_abort_early(self):
|
||||||
self._test__do_node_clean_abort(None)
|
self._test_do_node_clean_abort(None)
|
||||||
|
|
||||||
def test__do_node_clean_abort_with_step_name(self):
|
def test_do_node_clean_abort_with_step(self):
|
||||||
self._test__do_node_clean_abort('foo')
|
self._test_do_node_clean_abort({'step': 'foo', 'interface': 'deploy',
|
||||||
|
'abortable': True})
|
||||||
|
|
||||||
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
|
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
|
||||||
def test__do_node_clean_abort_tear_down_fail(self, tear_mock):
|
def test__do_node_clean_abort_tear_down_fail(self, tear_mock):
|
||||||
|
@ -2735,7 +2735,8 @@ class DoProvisioningActionTestCase(mgr_utils.ServiceSetUpMixin,
|
|||||||
# Node will be moved to tgt_prov_state after cleaning, not tested here
|
# Node will be moved to tgt_prov_state after cleaning, not tested here
|
||||||
self.assertEqual(states.CLEANFAIL, node.provision_state)
|
self.assertEqual(states.CLEANFAIL, node.provision_state)
|
||||||
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
||||||
self.assertIsNone(node.last_error)
|
self.assertEqual('By request, the clean operation was aborted',
|
||||||
|
node.last_error)
|
||||||
mock_spawn.assert_called_with(
|
mock_spawn.assert_called_with(
|
||||||
self.service, cleaning.do_node_clean_abort, mock.ANY)
|
self.service, cleaning.do_node_clean_abort, mock.ANY)
|
||||||
|
|
||||||
|
@ -196,7 +196,8 @@ class NodePowerActionTestCase(db_base.DbTestCase):
|
|||||||
node = obj_utils.create_test_node(self.context,
|
node = obj_utils.create_test_node(self.context,
|
||||||
uuid=uuidutils.generate_uuid(),
|
uuid=uuidutils.generate_uuid(),
|
||||||
driver='fake-hardware',
|
driver='fake-hardware',
|
||||||
power_state=states.POWER_OFF)
|
power_state=states.POWER_OFF,
|
||||||
|
last_error='failed before')
|
||||||
task = task_manager.TaskManager(self.context, node.uuid)
|
task = task_manager.TaskManager(self.context, node.uuid)
|
||||||
|
|
||||||
get_power_mock.return_value = states.POWER_OFF
|
get_power_mock.return_value = states.POWER_OFF
|
||||||
@ -209,6 +210,27 @@ class NodePowerActionTestCase(db_base.DbTestCase):
|
|||||||
self.assertIsNone(node['target_power_state'])
|
self.assertIsNone(node['target_power_state'])
|
||||||
self.assertIsNone(node['last_error'])
|
self.assertIsNone(node['last_error'])
|
||||||
|
|
||||||
|
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
|
||||||
|
def test_node_power_action_keep_last_error(self, get_power_mock):
|
||||||
|
"""Test node_power_action to keep last_error for failed states."""
|
||||||
|
node = obj_utils.create_test_node(self.context,
|
||||||
|
uuid=uuidutils.generate_uuid(),
|
||||||
|
driver='fake-hardware',
|
||||||
|
power_state=states.POWER_OFF,
|
||||||
|
provision_state=states.CLEANFAIL,
|
||||||
|
last_error='failed before')
|
||||||
|
task = task_manager.TaskManager(self.context, node.uuid)
|
||||||
|
|
||||||
|
get_power_mock.return_value = states.POWER_OFF
|
||||||
|
|
||||||
|
conductor_utils.node_power_action(task, states.POWER_ON)
|
||||||
|
|
||||||
|
node.refresh()
|
||||||
|
get_power_mock.assert_called_once_with(mock.ANY, mock.ANY)
|
||||||
|
self.assertEqual(states.POWER_ON, node['power_state'])
|
||||||
|
self.assertIsNone(node['target_power_state'])
|
||||||
|
self.assertEqual('failed before', node['last_error'])
|
||||||
|
|
||||||
@mock.patch('ironic.objects.node.NodeSetPowerStateNotification',
|
@mock.patch('ironic.objects.node.NodeSetPowerStateNotification',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
|
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
|
||||||
|
8
releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml
Normal file
8
releasenotes/notes/cleaning-error-5c13c33c58404b97.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
When aborting cleaning, the ``last_error`` field is no longer initially
|
||||||
|
empty. It is now populated on the state transition to ``clean failed``.
|
||||||
|
- |
|
||||||
|
When cleaning or deployment fails, the ``last_error`` field is no longer
|
||||||
|
temporary set to ``None`` while the power off action is running.
|
Loading…
Reference in New Issue
Block a user