Fix spawn error hook in "continue_node_clean" RPC method

1) utils.cleaning_error_handler() as hook in current code does not
have an exception parameter that task context manager adds.
2) It's not needed to set fail state on "NoFreeConductorWorker"
exception, this is driver specific. IPA driver can obtain a free
worker on the next heartbeat(s).

This patch also adds missing tests for error handlers functions.

Closes-Bug: #1539118
Change-Id: I9081d5b3437e9be6b68fe9c635b38f84637e93df
This commit is contained in:
Yuriy Zveryanskyy 2016-01-28 16:39:21 +02:00
parent fd41d6c196
commit 8c8704e23a
6 changed files with 164 additions and 11 deletions

View File

@ -816,8 +816,8 @@ class ConductorManager(base_manager.BaseConductorManager):
if node.provision_state == states.CLEANWAIT: if node.provision_state == states.CLEANWAIT:
task.process_event('resume', target_state=target_state) task.process_event('resume', target_state=target_state)
task.set_spawn_error_hook(utils.cleaning_error_handler, task.node, task.set_spawn_error_hook(utils.spawn_cleaning_error_handler,
_('Failed to run next clean step')) task.node)
task.spawn_after( task.spawn_after(
self._spawn_worker, self._spawn_worker,
self._do_next_clean_step, self._do_next_clean_step,

View File

@ -227,6 +227,15 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
task.process_event('fail', target_state=target_state) task.process_event('fail', target_state=target_state)
def spawn_cleaning_error_handler(e, node):
"""Handle spawning error for node cleaning."""
if isinstance(e, exception.NoFreeConductorWorker):
node.last_error = (_("No free conductor workers available"))
node.save()
LOG.warning(_LW("No free conductor workers available to perform "
"cleaning on node %(node)s"), {'node': node.uuid})
def power_state_error_handler(e, node, power_state): def power_state_error_handler(e, node, power_state):
"""Set the node's power states if error occurs. """Set the node's power states if error occurs.

View File

@ -350,15 +350,20 @@ class BaseAgentVendor(base.VendorInterface):
# release starts. # release starts.
elif node.provision_state in (states.CLEANWAIT, states.CLEANING): elif node.provision_state in (states.CLEANWAIT, states.CLEANING):
node.touch_provisioning() node.touch_provisioning()
if not node.clean_step: try:
LOG.debug('Node %s just booted to start cleaning.', if not node.clean_step:
node.uuid) LOG.debug('Node %s just booted to start cleaning.',
msg = _('Node failed to start the next cleaning step.') node.uuid)
manager_utils.set_node_cleaning_steps(task) msg = _('Node failed to start the next cleaning step.')
self.notify_conductor_resume_clean(task) manager_utils.set_node_cleaning_steps(task)
else: self.notify_conductor_resume_clean(task)
msg = _('Node failed to check cleaning progress.') else:
self.continue_cleaning(task, **kwargs) msg = _('Node failed to check cleaning progress.')
self.continue_cleaning(task, **kwargs)
except exception.NoFreeConductorWorker:
# waiting for the next heartbeat, node.last_error and
# logging message is filled already via conductor's hook
pass
except Exception as e: except Exception as e:
err_info = {'node': node.uuid, 'msg': msg, 'e': e} err_info = {'node': node.uuid, 'msg': msg, 'e': e}

View File

@ -538,3 +538,109 @@ class NodeCleaningStepsTestCase(base.DbTestCase):
conductor_utils._validate_user_clean_steps, conductor_utils._validate_user_clean_steps,
task, user_steps) task, user_steps)
mock_steps.assert_called_once_with(task, enabled=False, sort=False) mock_steps.assert_called_once_with(task, enabled=False, sort=False)
class ErrorHandlersTestCase(tests_base.TestCase):
def setUp(self):
super(ErrorHandlersTestCase, self).setUp()
self.task = mock.Mock(spec=task_manager.TaskManager)
self.task.driver = mock.Mock(spec_set=['deploy'])
self.task.node = mock.Mock(spec_set=objects.Node)
self.node = self.task.node
@mock.patch.object(conductor_utils, 'LOG')
def test_provision_error_handler_no_worker(self, log_mock):
exc = exception.NoFreeConductorWorker()
conductor_utils.provisioning_error_handler(exc, self.node, 'state-one',
'state-two')
self.node.save.assert_called_once_with()
self.assertEqual('state-one', self.node.provision_state)
self.assertEqual('state-two', self.node.target_provision_state)
self.assertIn('No free conductor workers', self.node.last_error)
self.assertTrue(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_provision_error_handler_other_error(self, log_mock):
exc = Exception('foo')
conductor_utils.provisioning_error_handler(exc, self.node, 'state-one',
'state-two')
self.assertFalse(self.node.save.called)
self.assertFalse(log_mock.warning.called)
def test_cleaning_error_handler(self):
self.node.provision_state = states.CLEANING
target = 'baz'
self.node.target_provision_state = target
msg = 'error bar'
conductor_utils.cleaning_error_handler(self.task, msg)
self.node.save.assert_called_once_with()
self.assertEqual({}, self.node.clean_step)
self.assertEqual(msg, self.node.last_error)
self.assertTrue(self.node.maintenance)
self.assertEqual(msg, self.node.maintenance_reason)
driver = self.task.driver.deploy
driver.tear_down_cleaning.assert_called_once_with(self.task)
self.task.process_event.assert_called_once_with('fail',
target_state=None)
def test_cleaning_error_handler_manual(self):
target = states.MANAGEABLE
self.node.target_provision_state = target
conductor_utils.cleaning_error_handler(self.task, 'foo')
self.task.process_event.assert_called_once_with('fail',
target_state=target)
def test_cleaning_error_handler_no_teardown(self):
target = states.MANAGEABLE
self.node.target_provision_state = target
conductor_utils.cleaning_error_handler(self.task, 'foo',
tear_down_cleaning=False)
self.assertFalse(self.task.driver.deploy.tear_down_cleaning.called)
self.task.process_event.assert_called_once_with('fail',
target_state=target)
def test_cleaning_error_handler_no_fail(self):
conductor_utils.cleaning_error_handler(self.task, 'foo',
set_fail_state=False)
driver = self.task.driver.deploy
driver.tear_down_cleaning.assert_called_once_with(self.task)
self.assertFalse(self.task.process_event.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_cleaning_error_handler_tear_down_error(self, log_mock):
driver = self.task.driver.deploy
driver.tear_down_cleaning.side_effect = Exception('bar')
conductor_utils.cleaning_error_handler(self.task, 'foo')
self.assertTrue(log_mock.exception.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_spawn_cleaning_error_handler_no_worker(self, log_mock):
exc = exception.NoFreeConductorWorker()
conductor_utils.spawn_cleaning_error_handler(exc, self.node)
self.node.save.assert_called_once_with()
self.assertIn('No free conductor workers', self.node.last_error)
self.assertTrue(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_spawn_cleaning_error_handler_other_error(self, log_mock):
exc = Exception('foo')
conductor_utils.spawn_cleaning_error_handler(exc, self.node)
self.assertFalse(self.node.save.called)
self.assertFalse(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_power_state_error_handler_no_worker(self, log_mock):
exc = exception.NoFreeConductorWorker()
conductor_utils.power_state_error_handler(exc, self.node, 'newstate')
self.node.save.assert_called_once_with()
self.assertEqual('newstate', self.node.power_state)
self.assertEqual(states.NOSTATE, self.node.target_power_state)
self.assertIn('No free conductor workers', self.node.last_error)
self.assertTrue(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_power_state_error_handler_other_error(self, log_mock):
exc = Exception('foo')
conductor_utils.power_state_error_handler(exc, self.node, 'foo')
self.assertFalse(self.node.save.called)
self.assertFalse(log_mock.warning.called)

View File

@ -436,6 +436,35 @@ class TestBaseAgentVendor(db_base.DbTestCase):
mock_handler.reset_mock() mock_handler.reset_mock()
mock_continue.reset_mock() mock_continue.reset_mock()
@mock.patch.object(manager_utils, 'cleaning_error_handler')
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'continue_cleaning', autospec=True)
def test_heartbeat_continue_cleaning_no_worker(self, mock_continue,
mock_handler):
kwargs = {
'agent_url': 'http://127.0.0.1:9999/bar'
}
self.node.clean_step = {
'priority': 10,
'interface': 'deploy',
'step': 'foo',
'reboot_requested': False
}
mock_continue.side_effect = exception.NoFreeConductorWorker()
for state in (states.CLEANWAIT, states.CLEANING):
self.node.provision_state = state
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=True) as task:
self.passthru.heartbeat(task, **kwargs)
mock_continue.assert_called_once_with(mock.ANY, task, **kwargs)
self.assertFalse(mock_handler.called)
mock_handler.reset_mock()
mock_continue.reset_mock()
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'continue_deploy', @mock.patch.object(agent_base_vendor.BaseAgentVendor, 'continue_deploy',
autospec=True) autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'reboot_to_instance', @mock.patch.object(agent_base_vendor.BaseAgentVendor, 'reboot_to_instance',

View File

@ -0,0 +1,4 @@
---
fixes:
- Fix issues with error handling when spawning a new thread to continue
cleaning. See https://bugs.launchpad.net/ironic/+bug/1539118.