From 18d016f7961e8dcac851ae099d63c93a227ac992 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Tue, 6 Oct 2020 17:44:07 +0200 Subject: [PATCH] Avoid RPC notify_conductor_resume_{deploy,clean} in agent_base Currently we use an RPC call to the conductor itself to proceed to the next clean or deploy step. This is unnecessary and requires temporary lifting the lock, potentially causing race conditions. This change makes the agent code use continue_node_{deploy,clean} directly. The drivers still need updating, it will be done later. Story: #2008167 Task: #40922 Change-Id: If4763d542029b9021432425532f24a0228f04c25 --- ironic/conductor/manager.py | 6 +- ironic/conductor/task_manager.py | 8 + ironic/drivers/modules/agent_base.py | 41 +- .../unit/drivers/modules/test_agent_base.py | 681 +++++++++--------- .../notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml | 5 + 5 files changed, 386 insertions(+), 355 deletions(-) create mode 100644 releasenotes/notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 7b91996171..58b8969ad7 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1206,10 +1206,6 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, shared=False, patient=True, purpose='continue node cleaning') as task: node = task.node - if node.target_provision_state == states.MANAGEABLE: - target_state = states.MANAGEABLE - else: - target_state = None if node.provision_state != states.CLEANWAIT: raise exception.InvalidStateRequested(_( @@ -1219,7 +1215,7 @@ class ConductorManager(base_manager.BaseConductorManager): 'state': node.provision_state, 'clean_state': states.CLEANWAIT}) - task.process_event('resume', target_state=target_state) + task.resume_cleaning() task.set_spawn_error_hook(utils.spawn_cleaning_error_handler, task.node) diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py index 5063cb2113..509c9ce92e 100644 --- a/ironic/conductor/task_manager.py +++ b/ironic/conductor/task_manager.py @@ -604,6 +604,14 @@ class TaskManager(object): # emitted at __exit__(). self._saved_node = self.node + def resume_cleaning(self): + """A helper to resume cleaning with the right target state.""" + if self.node.target_provision_state == states.MANAGEABLE: + target_state = states.MANAGEABLE + else: + target_state = None + self.process_event('resume', target_state=target_state) + def __enter__(self): return self diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 51d96ec4d4..33df542929 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -31,6 +31,8 @@ from ironic.common.i18n import _ from ironic.common import image_service from ironic.common import states from ironic.common import utils +from ironic.conductor import cleaning +from ironic.conductor import deployments from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils @@ -400,6 +402,15 @@ def _step_failure_handler(task, msg, step_type, traceback=False): manager_utils.deploying_error_handler(task, msg, traceback=traceback) +def _continue_steps(task, step_type): + if step_type == 'clean': + task.resume_cleaning() + cleaning.continue_node_clean(task) + else: + task.process_event('resume') + deployments.continue_node_deploy(task) + + class HeartbeatMixin(object): """Mixin class implementing heartbeat processing.""" @@ -510,13 +521,12 @@ class HeartbeatMixin(object): LOG.debug('Node %s just booted to start cleaning.', node.uuid) msg = _('Node failed to start the first cleaning step') + task.resume_cleaning() # First, cache the clean steps self.refresh_clean_steps(task) # Then set/verify node clean steps and start cleaning conductor_steps.set_node_cleaning_steps(task) - # The exceptions from RPC are not possible as we using cast - # here - manager_utils.notify_conductor_resume_clean(task) + cleaning.continue_node_clean(task) else: msg = _('Node failed to check cleaning progress') # Check if the driver is polling for completion of a step, @@ -910,7 +920,7 @@ class AgentBaseMixin(object): return manager_utils.deploying_error_handler(task, msg, traceback=True) - manager_utils.notify_conductor_resume_operation(task, step_type) + _continue_steps(task, step_type) @METRICS.timer('AgentBaseMixin.process_next_step') def process_next_step(self, task, step_type, **kwargs): @@ -940,8 +950,7 @@ class AgentBaseMixin(object): else 'deployment_reboot') utils.pop_node_nested_field(node, 'driver_internal_info', field) node.save() - manager_utils.notify_conductor_resume_operation(task, step_type) - return + return _continue_steps(task, step_type) current_step = (node.clean_step if step_type == 'clean' else node.deploy_step) @@ -1000,7 +1009,7 @@ class AgentBaseMixin(object): LOG.info('Agent on node %(node)s returned %(type)s command ' 'success, moving to next step', {'node': node.uuid, 'type': step_type}) - manager_utils.notify_conductor_resume_operation(task, step_type) + _continue_steps(task, step_type) else: msg = (_('Agent returned unknown status for %(type)s step %(step)s' ' on node %(node)s : %(err)s.') % @@ -1202,24 +1211,6 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): 'error': e}) log_and_raise_deployment_error(task, msg, exc=e) - # TODO(dtantsur): remove in W - @METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy') - def reboot_and_finish_deploy(self, task): - """Helper method to trigger reboot on the node and finish deploy. - - This method initiates a reboot on the node. On success, it - marks the deploy as complete. On failure, it logs the error - and marks deploy as failure. - - :param task: a TaskManager object containing the node - :raises: InstanceDeployFailure, if node reboot failed. - """ - # NOTE(dtantsur): do nothing here, the new deploy steps tear_down_agent - # and boot_instance will be picked up and finish the deploy (even for - # legacy deploy interfaces without decomposed steps). - task.process_event('wait') - manager_utils.notify_conductor_resume_deploy(task) - @METRICS.timer('AgentDeployMixin.prepare_instance_to_boot') def prepare_instance_to_boot(self, task, root_uuid, efi_sys_uuid, prep_boot_part_uuid=None): diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 21c0b4fff2..d3c826e152 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -24,6 +24,7 @@ from ironic.common import boot_devices from ironic.common import exception from ironic.common import image_service from ironic.common import states +from ironic.conductor import cleaning from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils @@ -315,9 +316,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): 'refresh_steps', autospec=True) @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - def test_heartbeat_resume_clean(self, mock_notify, mock_set_steps, + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + def test_heartbeat_resume_clean(self, mock_clean, mock_set_steps, mock_refresh, mock_touch): self.node.clean_step = {} self.node.provision_state = states.CLEANWAIT @@ -328,7 +328,7 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): mock_touch.assert_called_once_with(mock.ANY) mock_refresh.assert_called_once_with(mock.ANY, task, 'clean') - mock_notify.assert_called_once_with(task, 'clean') + mock_clean.assert_called_once_with(task) mock_set_steps.assert_called_once_with(task) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @@ -337,16 +337,16 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): 'refresh_steps', autospec=True) @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - def test_heartbeat_resume_clean_fails(self, mock_notify, mock_set_steps, + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + def test_heartbeat_resume_clean_fails(self, mock_clean, mock_set_steps, mock_refresh, mock_touch, mock_handler): - mocks = [mock_refresh, mock_set_steps, mock_notify] - self.node.clean_step = {} - self.node.provision_state = states.CLEANWAIT - self.node.save() + mocks = [mock_refresh, mock_set_steps, mock_clean] for i in range(len(mocks)): + self.node.clean_step = {} + self.node.provision_state = states.CLEANWAIT + self.node.save() + before_failed_mocks = mocks[:i] failed_mock = mocks[i] after_failed_mocks = mocks[i + 1:] @@ -1528,31 +1528,6 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertFalse(prepare_mock.called) self.assertFalse(failed_state_mock.called) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning(self, status_mock, notify_mock): - # Test a successful execute clean step on the agent - self.node.clean_step = { - 'priority': 10, - 'interface': 'deploy', - 'step': 'erase_devices', - 'reboot_requested': False - } - self.node.save() - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': { - 'clean_step': self.node.clean_step - } - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @@ -1647,295 +1622,6 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertNotIn('deployment_reboot', task.node.driver_internal_info) - @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) - @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, - autospec=True) - @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_reboot( - self, status_mock, reboot_mock, mock_prepare, mock_build_opt): - # Test a successful execute clean step on the agent, with reboot - self.node.clean_step = { - 'priority': 42, - 'interface': 'deploy', - 'step': 'reboot_me_afterwards', - 'reboot_requested': True - } - self.node.save() - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': { - 'clean_step': self.node.clean_step - } - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - reboot_mock.assert_called_once_with(task, states.REBOOT) - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_after_reboot(self, status_mock, notify_mock): - # Test a successful execute clean step on the agent, with reboot - self.node.clean_step = { - 'priority': 42, - 'interface': 'deploy', - 'step': 'reboot_me_afterwards', - 'reboot_requested': True - } - driver_internal_info = self.node.driver_internal_info - driver_internal_info['cleaning_reboot'] = True - self.node.driver_internal_info = driver_internal_info - self.node.save() - # Represents a freshly booted agent with no commands - status_mock.return_value = [] - - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - self.assertNotIn('cleaning_reboot', - task.node.driver_internal_info) - - @mock.patch.object(agent_base, - '_get_post_step_hook', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_with_hook( - self, status_mock, notify_mock, get_hook_mock): - self.node.clean_step = { - 'priority': 10, - 'interface': 'raid', - 'step': 'create_configuration', - } - self.node.save() - command_status = { - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': {'clean_step': self.node.clean_step}} - status_mock.return_value = [command_status] - hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') - get_hook_mock.return_value = hook_mock - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - - get_hook_mock.assert_called_once_with(task.node, 'clean') - hook_mock.assert_called_once_with(task, command_status) - notify_mock.assert_called_once_with(task, 'clean') - - @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_base, - '_get_post_step_hook', autospec=True) - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_with_hook_fails( - self, status_mock, error_handler_mock, get_hook_mock, - notify_mock, collect_logs_mock): - self.node.clean_step = { - 'priority': 10, - 'interface': 'raid', - 'step': 'create_configuration', - } - self.node.save() - command_status = { - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': {'clean_step': self.node.clean_step}} - status_mock.return_value = [command_status] - hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') - hook_mock.side_effect = RuntimeError('error') - get_hook_mock.return_value = hook_mock - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - - get_hook_mock.assert_called_once_with(task.node, 'clean') - hook_mock.assert_called_once_with(task, command_status) - error_handler_mock.assert_called_once_with(task, mock.ANY, - traceback=True) - self.assertFalse(notify_mock.called) - collect_logs_mock.assert_called_once_with(task.node, - label='cleaning') - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_old_command(self, status_mock, notify_mock): - # Test when a second execute_clean_step happens to the agent, but - # the new step hasn't started yet. - self.node.clean_step = { - 'priority': 10, - 'interface': 'deploy', - 'step': 'erase_devices', - 'reboot_requested': False - } - self.node.save() - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': { - 'priority': 20, - 'interface': 'deploy', - 'step': 'update_firmware', - 'reboot_requested': False - } - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - self.assertFalse(notify_mock.called) - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_running(self, status_mock, notify_mock): - # Test that no action is taken while a clean step is executing - status_mock.return_value = [{ - 'command_status': 'RUNNING', - 'command_name': 'execute_clean_step', - 'command_result': None - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - self.assertFalse(notify_mock.called) - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_no_step_running(self, status_mock, notify_mock): - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'get_clean_steps', - 'command_result': [] - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - - @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_fail(self, status_mock, error_mock, - collect_logs_mock): - # Test that a failure puts the node in CLEANFAIL - status_mock.return_value = [{ - 'command_status': 'FAILED', - 'command_name': 'execute_clean_step', - 'command_result': {} - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - task.node.clean_step = { - 'step': 'erase_devices', - 'interface': 'deploy', - } - self.deploy.continue_cleaning(task) - error_mock.assert_called_once_with(task, mock.ANY, traceback=False) - collect_logs_mock.assert_called_once_with(task.node, - label='cleaning') - - @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', - autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def _test_continue_cleaning_clean_version_mismatch( - self, status_mock, refresh_steps_mock, notify_mock, steps_mock, - manual=False): - status_mock.return_value = [{ - 'command_status': 'CLEAN_VERSION_MISMATCH', - 'command_name': 'execute_clean_step', - }] - tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE - self.node.provision_state = states.CLEANWAIT - self.node.target_provision_state = tgt_prov_state - self.node.save() - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') - if manual: - self.assertFalse( - task.node.driver_internal_info['skip_current_clean_step']) - self.assertFalse(steps_mock.called) - else: - steps_mock.assert_called_once_with(task) - self.assertNotIn('skip_current_clean_step', - task.node.driver_internal_info) - - def test_continue_cleaning_automated_clean_version_mismatch(self): - self._test_continue_cleaning_clean_version_mismatch() - - def test_continue_cleaning_manual_clean_version_mismatch(self): - self._test_continue_cleaning_clean_version_mismatch(manual=True) - - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', - autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_clean_version_mismatch_fail( - self, status_mock, refresh_steps_mock, notify_mock, steps_mock, - error_mock, manual=False): - status_mock.return_value = [{ - 'command_status': 'CLEAN_VERSION_MISMATCH', - 'command_name': 'execute_clean_step', - 'command_result': {'hardware_manager_version': {'Generic': '1'}} - }] - refresh_steps_mock.side_effect = exception.NodeCleaningFailure("boo") - tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE - self.node.provision_state = states.CLEANWAIT - self.node.target_provision_state = tgt_prov_state - self.node.save() - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - - status_mock.assert_called_once_with(mock.ANY, task.node) - refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') - error_mock.assert_called_once_with(task, mock.ANY, traceback=True) - self.assertFalse(notify_mock.called) - self.assertFalse(steps_mock.called) - - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_unknown(self, status_mock, error_mock): - # Test that unknown commands are treated as failures - status_mock.return_value = [{ - 'command_status': 'UNKNOWN', - 'command_name': 'execute_clean_step', - 'command_result': {} - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - error_mock.assert_called_once_with(task, mock.ANY, traceback=False) - def _test_clean_step_hook(self): """Helper method for unit tests related to clean step hooks.""" some_function_mock = mock.MagicMock() @@ -1985,6 +1671,351 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertIsNone(hook_returned) +class ContinueCleaningTest(AgentDeployMixinBaseTest): + + def setUp(self): + super().setUp() + self.node.provision_state = states.CLEANWAIT + self.node.target_provision_state = states.AVAILABLE + self.node.save() + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning(self, status_mock, clean_mock): + # Test a successful execute clean step on the agent + self.node.clean_step = { + 'priority': 10, + 'interface': 'deploy', + 'step': 'erase_devices', + 'reboot_requested': False + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'clean_step': self.node.clean_step + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + self.assertEqual(states.CLEANING, task.node.provision_state) + self.assertEqual(states.AVAILABLE, + task.node.target_provision_state) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_manual_cleaning(self, status_mock, clean_mock): + self.node.target_provision_state = states.MANAGEABLE + self.node.clean_step = { + 'priority': 10, + 'interface': 'deploy', + 'step': 'erase_devices', + 'reboot_requested': False + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'clean_step': self.node.clean_step + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + self.assertEqual(states.CLEANING, task.node.provision_state) + self.assertEqual(states.MANAGEABLE, + task.node.target_provision_state) + + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) + @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, + autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_reboot( + self, status_mock, reboot_mock, mock_prepare, mock_build_opt): + # Test a successful execute clean step on the agent, with reboot + self.node.clean_step = { + 'priority': 42, + 'interface': 'deploy', + 'step': 'reboot_me_afterwards', + 'reboot_requested': True + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'clean_step': self.node.clean_step + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + reboot_mock.assert_called_once_with(task, states.REBOOT) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_after_reboot(self, status_mock, clean_mock): + # Test a successful execute clean step on the agent, with reboot + self.node.clean_step = { + 'priority': 42, + 'interface': 'deploy', + 'step': 'reboot_me_afterwards', + 'reboot_requested': True + } + driver_internal_info = self.node.driver_internal_info + driver_internal_info['cleaning_reboot'] = True + self.node.driver_internal_info = driver_internal_info + self.node.save() + # Represents a freshly booted agent with no commands + status_mock.return_value = [] + + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + self.assertEqual(states.CLEANING, task.node.provision_state) + self.assertNotIn('cleaning_reboot', + task.node.driver_internal_info) + + @mock.patch.object(agent_base, + '_get_post_step_hook', autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_with_hook( + self, status_mock, clean_mock, get_hook_mock): + self.node.clean_step = { + 'priority': 10, + 'interface': 'raid', + 'step': 'create_configuration', + } + self.node.save() + command_status = { + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': {'clean_step': self.node.clean_step}} + status_mock.return_value = [command_status] + hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') + get_hook_mock.return_value = hook_mock + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + + get_hook_mock.assert_called_once_with(task.node, 'clean') + hook_mock.assert_called_once_with(task, command_status) + clean_mock.assert_called_once_with(task) + + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_base, + '_get_post_step_hook', autospec=True) + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_with_hook_fails( + self, status_mock, error_handler_mock, get_hook_mock, + clean_mock, collect_logs_mock): + self.node.clean_step = { + 'priority': 10, + 'interface': 'raid', + 'step': 'create_configuration', + } + self.node.save() + command_status = { + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': {'clean_step': self.node.clean_step}} + status_mock.return_value = [command_status] + hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') + hook_mock.side_effect = RuntimeError('error') + get_hook_mock.return_value = hook_mock + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + + get_hook_mock.assert_called_once_with(task.node, 'clean') + hook_mock.assert_called_once_with(task, command_status) + error_handler_mock.assert_called_once_with(task, mock.ANY, + traceback=True) + self.assertFalse(clean_mock.called) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_old_command(self, status_mock, clean_mock): + # Test when a second execute_clean_step happens to the agent, but + # the new step hasn't started yet. + self.node.clean_step = { + 'priority': 10, + 'interface': 'deploy', + 'step': 'erase_devices', + 'reboot_requested': False + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'priority': 20, + 'interface': 'deploy', + 'step': 'update_firmware', + 'reboot_requested': False + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + self.assertFalse(clean_mock.called) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_running(self, status_mock, clean_mock): + # Test that no action is taken while a clean step is executing + status_mock.return_value = [{ + 'command_status': 'RUNNING', + 'command_name': 'execute_clean_step', + 'command_result': None + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + self.assertFalse(clean_mock.called) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_no_step_running(self, status_mock, clean_mock): + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'get_clean_steps', + 'command_result': [] + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_fail(self, status_mock, error_mock, + collect_logs_mock): + # Test that a failure puts the node in CLEANFAIL + status_mock.return_value = [{ + 'command_status': 'FAILED', + 'command_name': 'execute_clean_step', + 'command_result': {} + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + task.node.clean_step = { + 'step': 'erase_devices', + 'interface': 'deploy', + } + self.deploy.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY, traceback=False) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') + + @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', + autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', + autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def _test_continue_cleaning_clean_version_mismatch( + self, status_mock, refresh_steps_mock, clean_mock, steps_mock, + manual=False): + status_mock.return_value = [{ + 'command_status': 'CLEAN_VERSION_MISMATCH', + 'command_name': 'execute_clean_step', + }] + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE + self.node.provision_state = states.CLEANWAIT + self.node.target_provision_state = tgt_prov_state + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') + if manual: + self.assertFalse( + task.node.driver_internal_info['skip_current_clean_step']) + self.assertFalse(steps_mock.called) + else: + steps_mock.assert_called_once_with(task) + self.assertNotIn('skip_current_clean_step', + task.node.driver_internal_info) + + def test_continue_cleaning_automated_clean_version_mismatch(self): + self._test_continue_cleaning_clean_version_mismatch() + + def test_continue_cleaning_manual_clean_version_mismatch(self): + self._test_continue_cleaning_clean_version_mismatch(manual=True) + + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', + autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', + autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_clean_version_mismatch_fail( + self, status_mock, refresh_steps_mock, clean_mock, steps_mock, + error_mock, manual=False): + status_mock.return_value = [{ + 'command_status': 'CLEAN_VERSION_MISMATCH', + 'command_name': 'execute_clean_step', + 'command_result': {'hardware_manager_version': {'Generic': '1'}} + }] + refresh_steps_mock.side_effect = exception.NodeCleaningFailure("boo") + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE + self.node.provision_state = states.CLEANWAIT + self.node.target_provision_state = tgt_prov_state + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + + status_mock.assert_called_once_with(mock.ANY, task.node) + refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') + error_mock.assert_called_once_with(task, mock.ANY, traceback=True) + self.assertFalse(clean_mock.called) + self.assertFalse(steps_mock.called) + + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_unknown(self, status_mock, error_mock): + # Test that unknown commands are treated as failures + status_mock.return_value = [{ + 'command_status': 'UNKNOWN', + 'command_name': 'execute_clean_step', + 'command_result': {} + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY, traceback=False) + + class TestRefreshCleanSteps(AgentDeployMixinBaseTest): def setUp(self): diff --git a/releasenotes/notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml b/releasenotes/notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml new file mode 100644 index 0000000000..74f333b484 --- /dev/null +++ b/releasenotes/notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml @@ -0,0 +1,5 @@ +--- +other: + - | + The agent deploy and cleaning code no longer uses an RPC call to the same + conductor when proceeding to the next deploy or clean step.