Merge "Fix spawn error hook in "continue_node_clean" RPC method"

This commit is contained in:
Jenkins 2016-02-03 19:56:30 +00:00 committed by Gerrit Code Review
commit eef666695e
6 changed files with 164 additions and 11 deletions

View File

@ -816,8 +816,8 @@ class ConductorManager(base_manager.BaseConductorManager):
if node.provision_state == states.CLEANWAIT:
task.process_event('resume', target_state=target_state)
task.set_spawn_error_hook(utils.cleaning_error_handler, task.node,
_('Failed to run next clean step'))
task.set_spawn_error_hook(utils.spawn_cleaning_error_handler,
task.node)
task.spawn_after(
self._spawn_worker,
self._do_next_clean_step,

View File

@ -227,6 +227,15 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
task.process_event('fail', target_state=target_state)
def spawn_cleaning_error_handler(e, node):
"""Handle spawning error for node cleaning."""
if isinstance(e, exception.NoFreeConductorWorker):
node.last_error = (_("No free conductor workers available"))
node.save()
LOG.warning(_LW("No free conductor workers available to perform "
"cleaning on node %(node)s"), {'node': node.uuid})
def power_state_error_handler(e, node, power_state):
"""Set the node's power states if error occurs.

View File

@ -350,15 +350,20 @@ class BaseAgentVendor(base.VendorInterface):
# release starts.
elif node.provision_state in (states.CLEANWAIT, states.CLEANING):
node.touch_provisioning()
if not node.clean_step:
LOG.debug('Node %s just booted to start cleaning.',
node.uuid)
msg = _('Node failed to start the next cleaning step.')
manager_utils.set_node_cleaning_steps(task)
self.notify_conductor_resume_clean(task)
else:
msg = _('Node failed to check cleaning progress.')
self.continue_cleaning(task, **kwargs)
try:
if not node.clean_step:
LOG.debug('Node %s just booted to start cleaning.',
node.uuid)
msg = _('Node failed to start the next cleaning step.')
manager_utils.set_node_cleaning_steps(task)
self.notify_conductor_resume_clean(task)
else:
msg = _('Node failed to check cleaning progress.')
self.continue_cleaning(task, **kwargs)
except exception.NoFreeConductorWorker:
# waiting for the next heartbeat, node.last_error and
# logging message is filled already via conductor's hook
pass
except Exception as e:
err_info = {'node': node.uuid, 'msg': msg, 'e': e}

View File

@ -538,3 +538,109 @@ class NodeCleaningStepsTestCase(base.DbTestCase):
conductor_utils._validate_user_clean_steps,
task, user_steps)
mock_steps.assert_called_once_with(task, enabled=False, sort=False)
class ErrorHandlersTestCase(tests_base.TestCase):
def setUp(self):
super(ErrorHandlersTestCase, self).setUp()
self.task = mock.Mock(spec=task_manager.TaskManager)
self.task.driver = mock.Mock(spec_set=['deploy'])
self.task.node = mock.Mock(spec_set=objects.Node)
self.node = self.task.node
@mock.patch.object(conductor_utils, 'LOG')
def test_provision_error_handler_no_worker(self, log_mock):
exc = exception.NoFreeConductorWorker()
conductor_utils.provisioning_error_handler(exc, self.node, 'state-one',
'state-two')
self.node.save.assert_called_once_with()
self.assertEqual('state-one', self.node.provision_state)
self.assertEqual('state-two', self.node.target_provision_state)
self.assertIn('No free conductor workers', self.node.last_error)
self.assertTrue(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_provision_error_handler_other_error(self, log_mock):
exc = Exception('foo')
conductor_utils.provisioning_error_handler(exc, self.node, 'state-one',
'state-two')
self.assertFalse(self.node.save.called)
self.assertFalse(log_mock.warning.called)
def test_cleaning_error_handler(self):
self.node.provision_state = states.CLEANING
target = 'baz'
self.node.target_provision_state = target
msg = 'error bar'
conductor_utils.cleaning_error_handler(self.task, msg)
self.node.save.assert_called_once_with()
self.assertEqual({}, self.node.clean_step)
self.assertEqual(msg, self.node.last_error)
self.assertTrue(self.node.maintenance)
self.assertEqual(msg, self.node.maintenance_reason)
driver = self.task.driver.deploy
driver.tear_down_cleaning.assert_called_once_with(self.task)
self.task.process_event.assert_called_once_with('fail',
target_state=None)
def test_cleaning_error_handler_manual(self):
target = states.MANAGEABLE
self.node.target_provision_state = target
conductor_utils.cleaning_error_handler(self.task, 'foo')
self.task.process_event.assert_called_once_with('fail',
target_state=target)
def test_cleaning_error_handler_no_teardown(self):
target = states.MANAGEABLE
self.node.target_provision_state = target
conductor_utils.cleaning_error_handler(self.task, 'foo',
tear_down_cleaning=False)
self.assertFalse(self.task.driver.deploy.tear_down_cleaning.called)
self.task.process_event.assert_called_once_with('fail',
target_state=target)
def test_cleaning_error_handler_no_fail(self):
conductor_utils.cleaning_error_handler(self.task, 'foo',
set_fail_state=False)
driver = self.task.driver.deploy
driver.tear_down_cleaning.assert_called_once_with(self.task)
self.assertFalse(self.task.process_event.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_cleaning_error_handler_tear_down_error(self, log_mock):
driver = self.task.driver.deploy
driver.tear_down_cleaning.side_effect = Exception('bar')
conductor_utils.cleaning_error_handler(self.task, 'foo')
self.assertTrue(log_mock.exception.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_spawn_cleaning_error_handler_no_worker(self, log_mock):
exc = exception.NoFreeConductorWorker()
conductor_utils.spawn_cleaning_error_handler(exc, self.node)
self.node.save.assert_called_once_with()
self.assertIn('No free conductor workers', self.node.last_error)
self.assertTrue(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_spawn_cleaning_error_handler_other_error(self, log_mock):
exc = Exception('foo')
conductor_utils.spawn_cleaning_error_handler(exc, self.node)
self.assertFalse(self.node.save.called)
self.assertFalse(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_power_state_error_handler_no_worker(self, log_mock):
exc = exception.NoFreeConductorWorker()
conductor_utils.power_state_error_handler(exc, self.node, 'newstate')
self.node.save.assert_called_once_with()
self.assertEqual('newstate', self.node.power_state)
self.assertEqual(states.NOSTATE, self.node.target_power_state)
self.assertIn('No free conductor workers', self.node.last_error)
self.assertTrue(log_mock.warning.called)
@mock.patch.object(conductor_utils, 'LOG')
def test_power_state_error_handler_other_error(self, log_mock):
exc = Exception('foo')
conductor_utils.power_state_error_handler(exc, self.node, 'foo')
self.assertFalse(self.node.save.called)
self.assertFalse(log_mock.warning.called)

View File

@ -436,6 +436,35 @@ class TestBaseAgentVendor(db_base.DbTestCase):
mock_handler.reset_mock()
mock_continue.reset_mock()
@mock.patch.object(manager_utils, 'cleaning_error_handler')
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'continue_cleaning', autospec=True)
def test_heartbeat_continue_cleaning_no_worker(self, mock_continue,
mock_handler):
kwargs = {
'agent_url': 'http://127.0.0.1:9999/bar'
}
self.node.clean_step = {
'priority': 10,
'interface': 'deploy',
'step': 'foo',
'reboot_requested': False
}
mock_continue.side_effect = exception.NoFreeConductorWorker()
for state in (states.CLEANWAIT, states.CLEANING):
self.node.provision_state = state
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=True) as task:
self.passthru.heartbeat(task, **kwargs)
mock_continue.assert_called_once_with(mock.ANY, task, **kwargs)
self.assertFalse(mock_handler.called)
mock_handler.reset_mock()
mock_continue.reset_mock()
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'continue_deploy',
autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'reboot_to_instance',

View File

@ -0,0 +1,4 @@
---
fixes:
- Fix issues with error handling when spawning a new thread to continue
cleaning. See https://bugs.launchpad.net/ironic/+bug/1539118.