Merge "Don't resume deployment or cleaning on heartbeat when polling"
This commit is contained in:
commit
add03f5fe7
@ -918,12 +918,17 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
'state': node.provision_state,
|
||||
'deploy_state': ', '.join(expected_states)})
|
||||
|
||||
save_required = False
|
||||
info = node.driver_internal_info
|
||||
try:
|
||||
skip_current_step = info.pop('skip_current_deploy_step')
|
||||
except KeyError:
|
||||
skip_current_step = True
|
||||
else:
|
||||
save_required = True
|
||||
if info.pop('deployment_polling', None) is not None:
|
||||
save_required = True
|
||||
if save_required:
|
||||
node.driver_internal_info = info
|
||||
node.save()
|
||||
|
||||
@ -1229,12 +1234,17 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
'state': node.provision_state,
|
||||
'clean_state': states.CLEANWAIT})
|
||||
|
||||
save_required = False
|
||||
info = node.driver_internal_info
|
||||
try:
|
||||
skip_current_step = info.pop('skip_current_clean_step')
|
||||
except KeyError:
|
||||
skip_current_step = True
|
||||
else:
|
||||
save_required = True
|
||||
if info.pop('cleaning_polling', None) is not None:
|
||||
save_required = True
|
||||
if save_required:
|
||||
node.driver_internal_info = info
|
||||
node.save()
|
||||
|
||||
@ -1462,6 +1472,7 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
driver_internal_info['clean_steps'] = None
|
||||
driver_internal_info.pop('clean_step_index', None)
|
||||
driver_internal_info.pop('cleaning_reboot', None)
|
||||
driver_internal_info.pop('cleaning_polling', None)
|
||||
node.driver_internal_info = driver_internal_info
|
||||
node.save()
|
||||
try:
|
||||
@ -1545,6 +1556,13 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
|
||||
node.last_error = last_error
|
||||
node.clean_step = None
|
||||
info = node.driver_internal_info
|
||||
# Clear any leftover metadata about cleaning
|
||||
info.pop('clean_step_index', None)
|
||||
info.pop('cleaning_reboot', None)
|
||||
info.pop('cleaning_polling', None)
|
||||
info.pop('skip_current_clean_step', None)
|
||||
node.driver_internal_info = info
|
||||
node.save()
|
||||
LOG.info(info_message)
|
||||
|
||||
@ -3960,6 +3978,7 @@ def _do_next_deploy_step(task, step_index, conductor_id):
|
||||
driver_internal_info['deploy_steps'] = None
|
||||
driver_internal_info.pop('deploy_step_index', None)
|
||||
driver_internal_info.pop('deployment_reboot', None)
|
||||
driver_internal_info.pop('deployment_polling', None)
|
||||
node.driver_internal_info = driver_internal_info
|
||||
node.save()
|
||||
|
||||
|
@ -408,9 +408,11 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
|
||||
# Clear clean step, msg should already include current step
|
||||
node.clean_step = {}
|
||||
info = node.driver_internal_info
|
||||
# Clear any leftover metadata about cleaning
|
||||
info.pop('clean_step_index', None)
|
||||
# Clear any leftover metadata about cleaning reboots
|
||||
info.pop('cleaning_reboot', None)
|
||||
info.pop('cleaning_polling', None)
|
||||
info.pop('skip_current_clean_step', None)
|
||||
node.driver_internal_info = info
|
||||
# For manual cleaning, the target provision state is MANAGEABLE, whereas
|
||||
# for automated cleaning, it is AVAILABLE.
|
||||
@ -466,9 +468,11 @@ def deploying_error_handler(task, logmsg, errmsg, traceback=False,
|
||||
# in node.driver_internal_info for debugging purposes.
|
||||
node.deploy_step = {}
|
||||
info = node.driver_internal_info
|
||||
# Clear any leftover metadata about deployment.
|
||||
info.pop('deploy_step_index', None)
|
||||
# Clear any leftover metadata about deployment reboots
|
||||
info.pop('deployment_reboot', None)
|
||||
info.pop('deployment_polling', None)
|
||||
info.pop('skip_current_deploy_step', None)
|
||||
node.driver_internal_info = info
|
||||
|
||||
if cleanup_err:
|
||||
|
@ -380,7 +380,7 @@ class HeartbeatMixin(object):
|
||||
# NOTE(mgoddard): Only handle heartbeats during DEPLOYWAIT if we
|
||||
# are currently in the core deploy.deploy step. Other deploy steps
|
||||
# may cause the agent to boot, but we should not trigger deployment
|
||||
# at that point.
|
||||
# at that point if the driver is polling for completion of a step.
|
||||
if node.provision_state == states.DEPLOYWAIT:
|
||||
if self.in_core_deploy_step(task):
|
||||
if not self.deploy_has_started(task):
|
||||
@ -394,7 +394,12 @@ class HeartbeatMixin(object):
|
||||
else:
|
||||
# The exceptions from RPC are not possible as we using cast
|
||||
# here
|
||||
manager_utils.notify_conductor_resume_deploy(task)
|
||||
# Check if the driver is polling for completion of a step,
|
||||
# via the 'deployment_polling' flag.
|
||||
polling = node.driver_internal_info.get(
|
||||
'deployment_polling', False)
|
||||
if not polling:
|
||||
manager_utils.notify_conductor_resume_deploy(task)
|
||||
node.touch_provisioning()
|
||||
elif node.provision_state == states.CLEANWAIT:
|
||||
node.touch_provisioning()
|
||||
@ -411,7 +416,12 @@ class HeartbeatMixin(object):
|
||||
manager_utils.notify_conductor_resume_clean(task)
|
||||
else:
|
||||
msg = _('Node failed to check cleaning progress.')
|
||||
self.continue_cleaning(task)
|
||||
# Check if the driver is polling for completion of a step,
|
||||
# via the 'cleaning_polling' flag.
|
||||
polling = node.driver_internal_info.get(
|
||||
'cleaning_polling', False)
|
||||
if not polling:
|
||||
self.continue_cleaning(task)
|
||||
elif (node.provision_state == states.RESCUEWAIT):
|
||||
msg = _('Node failed to perform rescue operation.')
|
||||
self._finalize_rescue(task)
|
||||
|
@ -1475,7 +1475,8 @@ def get_async_step_return_state(node):
|
||||
return states.CLEANWAIT if node.clean_step else states.DEPLOYWAIT
|
||||
|
||||
|
||||
def set_async_step_flags(node, reboot=None, skip_current_step=None):
|
||||
def set_async_step_flags(node, reboot=None, skip_current_step=None,
|
||||
polling=None):
|
||||
"""Sets appropriate reboot flags in driver_internal_info based on operation
|
||||
|
||||
:param node: an ironic node object.
|
||||
@ -1488,16 +1489,24 @@ def set_async_step_flags(node, reboot=None, skip_current_step=None):
|
||||
skip_current_deploy_step based on cleaning or deployment operation
|
||||
in progress. If it is None, corresponding skip step flag is not set
|
||||
in node's driver_internal_info.
|
||||
:param polling: Boolean value to set for node's driver_internal_info flag
|
||||
deployment_polling or cleaning_polling. If it is None, the
|
||||
corresponding polling flag is not set in the node's
|
||||
driver_internal_info.
|
||||
"""
|
||||
info = node.driver_internal_info
|
||||
cleaning = {'reboot': 'cleaning_reboot',
|
||||
'skip': 'skip_current_clean_step'}
|
||||
'skip': 'skip_current_clean_step',
|
||||
'polling': 'cleaning_polling'}
|
||||
deployment = {'reboot': 'deployment_reboot',
|
||||
'skip': 'skip_current_deploy_step'}
|
||||
'skip': 'skip_current_deploy_step',
|
||||
'polling': 'deployment_polling'}
|
||||
fields = cleaning if node.clean_step else deployment
|
||||
if reboot is not None:
|
||||
info[fields['reboot']] = reboot
|
||||
if skip_current_step is not None:
|
||||
info[fields['skip']] = skip_current_step
|
||||
if polling is not None:
|
||||
info[fields['polling']] = polling
|
||||
node.driver_internal_info = info
|
||||
node.save()
|
||||
|
@ -1890,6 +1890,26 @@ class ContinueNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
def test_continue_node_deploy_no_skip_step(self):
|
||||
self._continue_node_deploy_skip_step(skip=False)
|
||||
|
||||
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker',
|
||||
autospec=True)
|
||||
def test_continue_node_deploy_polling(self, mock_spawn):
|
||||
# test that deployment_polling flag is cleared
|
||||
driver_info = {'deploy_steps': self.deploy_steps,
|
||||
'deploy_step_index': 0,
|
||||
'deployment_polling': True}
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.DEPLOYWAIT,
|
||||
target_provision_state=states.MANAGEABLE,
|
||||
driver_internal_info=driver_info, deploy_step=self.deploy_steps[0])
|
||||
self._start_service()
|
||||
self.service.continue_node_deploy(self.context, node.uuid)
|
||||
self._stop_service()
|
||||
node.refresh()
|
||||
self.assertNotIn('deployment_polling', node.driver_internal_info)
|
||||
mock_spawn.assert_called_with(mock.ANY, manager._do_next_deploy_step,
|
||||
mock.ANY, 1, mock.ANY)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
|
||||
autospec=True)
|
||||
def test_do_next_deploy_step_oob_reboot(self, mock_execute):
|
||||
@ -3269,7 +3289,12 @@ class DoNodeCleanAbortTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.CLEANFAIL,
|
||||
target_provision_state=states.AVAILABLE,
|
||||
clean_step={'step': 'foo', 'abortable': True})
|
||||
clean_step={'step': 'foo', 'abortable': True},
|
||||
driver_internal_info={
|
||||
'clean_step_index': 2,
|
||||
'cleaning_reboot': True,
|
||||
'cleaning_polling': True,
|
||||
'skip_current_clean_step': True})
|
||||
|
||||
with task_manager.acquire(self.context, node.uuid) as task:
|
||||
self.service._do_node_clean_abort(task, step_name=step_name)
|
||||
@ -3277,8 +3302,16 @@ class DoNodeCleanAbortTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
tear_mock.assert_called_once_with(task.driver.deploy, task)
|
||||
if step_name:
|
||||
self.assertIn(step_name, task.node.last_error)
|
||||
# assert node's clean_step was cleaned up
|
||||
# assert node's clean_step and metadata was cleaned up
|
||||
self.assertEqual({}, task.node.clean_step)
|
||||
self.assertNotIn('clean_step_index',
|
||||
task.node.driver_internal_info)
|
||||
self.assertNotIn('cleaning_reboot',
|
||||
task.node.driver_internal_info)
|
||||
self.assertNotIn('cleaning_polling',
|
||||
task.node.driver_internal_info)
|
||||
self.assertNotIn('skip_current_clean_step',
|
||||
task.node.driver_internal_info)
|
||||
|
||||
def test__do_node_clean_abort(self):
|
||||
self._test__do_node_clean_abort(None)
|
||||
@ -3555,6 +3588,27 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
def test_continue_node_clean_no_skip_step(self):
|
||||
self._continue_node_clean_skip_step(skip=False)
|
||||
|
||||
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker',
|
||||
autospec=True)
|
||||
def test_continue_node_clean_polling(self, mock_spawn):
|
||||
# test that cleaning_polling flag is cleared
|
||||
driver_info = {'clean_steps': self.clean_steps,
|
||||
'clean_step_index': 0,
|
||||
'cleaning_polling': True}
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.CLEANWAIT,
|
||||
target_provision_state=states.MANAGEABLE,
|
||||
driver_internal_info=driver_info, clean_step=self.clean_steps[0])
|
||||
self._start_service()
|
||||
self.service.continue_node_clean(self.context, node.uuid)
|
||||
self._stop_service()
|
||||
node.refresh()
|
||||
self.assertNotIn('cleaning_polling', node.driver_internal_info)
|
||||
mock_spawn.assert_called_with(self.service,
|
||||
self.service._do_next_clean_step,
|
||||
mock.ANY, 1)
|
||||
|
||||
def _continue_node_clean_abort(self, manual=False):
|
||||
last_clean_step = self.clean_steps[0]
|
||||
last_clean_step['abortable'] = False
|
||||
|
@ -915,6 +915,11 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
|
||||
self.task)
|
||||
|
||||
def test_deploying_error_handler(self):
|
||||
info = self.node.driver_internal_info
|
||||
info['deploy_step_index'] = 2
|
||||
info['deployment_reboot'] = True
|
||||
info['deployment_polling'] = True
|
||||
info['skip_current_deploy_step'] = True
|
||||
conductor_utils.deploying_error_handler(self.task, self.logmsg,
|
||||
self.errmsg)
|
||||
|
||||
@ -924,6 +929,9 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
|
||||
self.assertEqual({}, self.node.deploy_step)
|
||||
self.assertNotIn('deploy_step_index', self.node.driver_internal_info)
|
||||
self.assertNotIn('deployment_reboot', self.node.driver_internal_info)
|
||||
self.assertNotIn('deployment_polling', self.node.driver_internal_info)
|
||||
self.assertNotIn('skip_current_deploy_step',
|
||||
self.node.driver_internal_info)
|
||||
self.task.process_event.assert_called_once_with('fail')
|
||||
|
||||
def _test_deploying_error_handler_cleanup(self, exc, expected_str):
|
||||
@ -1049,12 +1057,18 @@ class ErrorHandlersTestCase(tests_base.TestCase):
|
||||
self.node.clean_step = {'key': 'val'}
|
||||
self.node.driver_internal_info = {
|
||||
'cleaning_reboot': True,
|
||||
'cleaning_polling': True,
|
||||
'skip_current_clean_step': True,
|
||||
'clean_step_index': 0}
|
||||
msg = 'error bar'
|
||||
conductor_utils.cleaning_error_handler(self.task, msg)
|
||||
self.node.save.assert_called_once_with()
|
||||
self.assertEqual({}, self.node.clean_step)
|
||||
self.assertNotIn('clean_step_index', self.node.driver_internal_info)
|
||||
self.assertNotIn('cleaning_reboot', self.node.driver_internal_info)
|
||||
self.assertNotIn('cleaning_polling', self.node.driver_internal_info)
|
||||
self.assertNotIn('skip_current_clean_step',
|
||||
self.node.driver_internal_info)
|
||||
self.assertEqual(msg, self.node.last_error)
|
||||
self.assertTrue(self.node.maintenance)
|
||||
self.assertEqual(msg, self.node.maintenance_reason)
|
||||
|
@ -175,6 +175,46 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
self.assertFalse(rti_mock.called)
|
||||
self.assertTrue(in_resume_deploy_mock.called)
|
||||
|
||||
@mock.patch.object(manager_utils,
|
||||
'notify_conductor_resume_deploy', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'in_core_deploy_step', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'deploy_has_started', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'deploy_is_done', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'reboot_to_instance', autospec=True)
|
||||
def test_heartbeat_not_in_core_deploy_step_polling(self, rti_mock, cd_mock,
|
||||
deploy_is_done_mock,
|
||||
deploy_started_mock,
|
||||
in_deploy_mock,
|
||||
in_resume_deploy_mock):
|
||||
# Check that heartbeats do not trigger deployment actions when not in
|
||||
# the deploy.deploy step.
|
||||
in_deploy_mock.return_value = False
|
||||
self.node.provision_state = states.DEPLOYWAIT
|
||||
info = self.node.driver_internal_info
|
||||
info['deployment_polling'] = True
|
||||
self.node.driver_internal_info = info
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
self.deploy.heartbeat(task, 'url', '3.2.0')
|
||||
self.assertFalse(task.shared)
|
||||
self.assertEqual(
|
||||
'url', task.node.driver_internal_info['agent_url'])
|
||||
self.assertEqual(
|
||||
'3.2.0',
|
||||
task.node.driver_internal_info['agent_version'])
|
||||
self.assertFalse(deploy_started_mock.called)
|
||||
self.assertFalse(deploy_is_done_mock.called)
|
||||
self.assertFalse(cd_mock.called)
|
||||
self.assertFalse(rti_mock.called)
|
||||
self.assertFalse(in_resume_deploy_mock.called)
|
||||
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
|
||||
autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
@ -458,6 +498,29 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
mock_touch.assert_called_once_with(mock.ANY)
|
||||
mock_continue.assert_called_once_with(mock.ANY, task)
|
||||
|
||||
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'continue_cleaning', autospec=True)
|
||||
def test_heartbeat_continue_cleaning_polling(self, mock_continue,
|
||||
mock_touch):
|
||||
info = self.node.driver_internal_info
|
||||
info['cleaning_polling'] = True
|
||||
self.node.driver_internal_info = info
|
||||
self.node.clean_step = {
|
||||
'priority': 10,
|
||||
'interface': 'deploy',
|
||||
'step': 'foo',
|
||||
'reboot_requested': False
|
||||
}
|
||||
self.node.provision_state = states.CLEANWAIT
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '1.0.0')
|
||||
|
||||
mock_touch.assert_called_once_with(mock.ANY)
|
||||
self.assertFalse(mock_continue.called)
|
||||
|
||||
@mock.patch.object(manager_utils, 'cleaning_error_handler')
|
||||
@mock.patch.object(agent_base_vendor.HeartbeatMixin,
|
||||
'continue_cleaning', autospec=True)
|
||||
|
@ -2889,15 +2889,17 @@ class AsyncStepTestCase(db_base.DbTestCase):
|
||||
self.node.save()
|
||||
self._test_get_async_step_return_state()
|
||||
|
||||
def test_set_async_step_flags_cleaning_set_both(self):
|
||||
def test_set_async_step_flags_cleaning_set_all(self):
|
||||
self.node.clean_step = {'step': 'create_configuration',
|
||||
'interface': 'raid'}
|
||||
self.node.driver_internal_info = {}
|
||||
expected = {'cleaning_reboot': True,
|
||||
'cleaning_polling': True,
|
||||
'skip_current_clean_step': True}
|
||||
self.node.save()
|
||||
utils.set_async_step_flags(self.node, reboot=True,
|
||||
skip_current_step=True)
|
||||
skip_current_step=True,
|
||||
polling=True)
|
||||
self.assertEqual(expected, self.node.driver_internal_info)
|
||||
|
||||
def test_set_async_step_flags_cleaning_set_one(self):
|
||||
@ -2909,15 +2911,17 @@ class AsyncStepTestCase(db_base.DbTestCase):
|
||||
self.assertEqual({'cleaning_reboot': True},
|
||||
self.node.driver_internal_info)
|
||||
|
||||
def test_set_async_step_flags_deploying_set_both(self):
|
||||
def test_set_async_step_flags_deploying_set_all(self):
|
||||
self.node.deploy_step = {'step': 'create_configuration',
|
||||
'interface': 'raid'}
|
||||
self.node.driver_internal_info = {}
|
||||
expected = {'deployment_reboot': True,
|
||||
'deployment_polling': True,
|
||||
'skip_current_deploy_step': True}
|
||||
self.node.save()
|
||||
utils.set_async_step_flags(self.node, reboot=True,
|
||||
skip_current_step=True)
|
||||
skip_current_step=True,
|
||||
polling=True)
|
||||
self.assertEqual(expected, self.node.driver_internal_info)
|
||||
|
||||
def test_set_async_step_flags_deploying_set_one(self):
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes an issue with asynchronous deploy steps that poll for completion
|
||||
where the step could fail to execute. The ``deployment_polling`` and
|
||||
``cleaning_polling`` flags may be used by driver implementations to signal
|
||||
that the driver is polling for completion. See `story 2003817
|
||||
<https://storyboard.openstack.org/#!/story/2003817>`__ for details.
|
Loading…
Reference in New Issue
Block a user