diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 7b91996171..58b8969ad7 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1206,10 +1206,6 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, shared=False, patient=True, purpose='continue node cleaning') as task: node = task.node - if node.target_provision_state == states.MANAGEABLE: - target_state = states.MANAGEABLE - else: - target_state = None if node.provision_state != states.CLEANWAIT: raise exception.InvalidStateRequested(_( @@ -1219,7 +1215,7 @@ class ConductorManager(base_manager.BaseConductorManager): 'state': node.provision_state, 'clean_state': states.CLEANWAIT}) - task.process_event('resume', target_state=target_state) + task.resume_cleaning() task.set_spawn_error_hook(utils.spawn_cleaning_error_handler, task.node) diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py index 5063cb2113..509c9ce92e 100644 --- a/ironic/conductor/task_manager.py +++ b/ironic/conductor/task_manager.py @@ -604,6 +604,14 @@ class TaskManager(object): # emitted at __exit__(). self._saved_node = self.node + def resume_cleaning(self): + """A helper to resume cleaning with the right target state.""" + if self.node.target_provision_state == states.MANAGEABLE: + target_state = states.MANAGEABLE + else: + target_state = None + self.process_event('resume', target_state=target_state) + def __enter__(self): return self diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 51d96ec4d4..33df542929 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -31,6 +31,8 @@ from ironic.common.i18n import _ from ironic.common import image_service from ironic.common import states from ironic.common import utils +from ironic.conductor import cleaning +from ironic.conductor import deployments from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils @@ -400,6 +402,15 @@ def _step_failure_handler(task, msg, step_type, traceback=False): manager_utils.deploying_error_handler(task, msg, traceback=traceback) +def _continue_steps(task, step_type): + if step_type == 'clean': + task.resume_cleaning() + cleaning.continue_node_clean(task) + else: + task.process_event('resume') + deployments.continue_node_deploy(task) + + class HeartbeatMixin(object): """Mixin class implementing heartbeat processing.""" @@ -510,13 +521,12 @@ class HeartbeatMixin(object): LOG.debug('Node %s just booted to start cleaning.', node.uuid) msg = _('Node failed to start the first cleaning step') + task.resume_cleaning() # First, cache the clean steps self.refresh_clean_steps(task) # Then set/verify node clean steps and start cleaning conductor_steps.set_node_cleaning_steps(task) - # The exceptions from RPC are not possible as we using cast - # here - manager_utils.notify_conductor_resume_clean(task) + cleaning.continue_node_clean(task) else: msg = _('Node failed to check cleaning progress') # Check if the driver is polling for completion of a step, @@ -910,7 +920,7 @@ class AgentBaseMixin(object): return manager_utils.deploying_error_handler(task, msg, traceback=True) - manager_utils.notify_conductor_resume_operation(task, step_type) + _continue_steps(task, step_type) @METRICS.timer('AgentBaseMixin.process_next_step') def process_next_step(self, task, step_type, **kwargs): @@ -940,8 +950,7 @@ class AgentBaseMixin(object): else 'deployment_reboot') utils.pop_node_nested_field(node, 'driver_internal_info', field) node.save() - manager_utils.notify_conductor_resume_operation(task, step_type) - return + return _continue_steps(task, step_type) current_step = (node.clean_step if step_type == 'clean' else node.deploy_step) @@ -1000,7 +1009,7 @@ class AgentBaseMixin(object): LOG.info('Agent on node %(node)s returned %(type)s command ' 'success, moving to next step', {'node': node.uuid, 'type': step_type}) - manager_utils.notify_conductor_resume_operation(task, step_type) + _continue_steps(task, step_type) else: msg = (_('Agent returned unknown status for %(type)s step %(step)s' ' on node %(node)s : %(err)s.') % @@ -1202,24 +1211,6 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): 'error': e}) log_and_raise_deployment_error(task, msg, exc=e) - # TODO(dtantsur): remove in W - @METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy') - def reboot_and_finish_deploy(self, task): - """Helper method to trigger reboot on the node and finish deploy. - - This method initiates a reboot on the node. On success, it - marks the deploy as complete. On failure, it logs the error - and marks deploy as failure. - - :param task: a TaskManager object containing the node - :raises: InstanceDeployFailure, if node reboot failed. - """ - # NOTE(dtantsur): do nothing here, the new deploy steps tear_down_agent - # and boot_instance will be picked up and finish the deploy (even for - # legacy deploy interfaces without decomposed steps). - task.process_event('wait') - manager_utils.notify_conductor_resume_deploy(task) - @METRICS.timer('AgentDeployMixin.prepare_instance_to_boot') def prepare_instance_to_boot(self, task, root_uuid, efi_sys_uuid, prep_boot_part_uuid=None): diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 21c0b4fff2..d3c826e152 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -24,6 +24,7 @@ from ironic.common import boot_devices from ironic.common import exception from ironic.common import image_service from ironic.common import states +from ironic.conductor import cleaning from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils @@ -315,9 +316,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): 'refresh_steps', autospec=True) @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - def test_heartbeat_resume_clean(self, mock_notify, mock_set_steps, + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + def test_heartbeat_resume_clean(self, mock_clean, mock_set_steps, mock_refresh, mock_touch): self.node.clean_step = {} self.node.provision_state = states.CLEANWAIT @@ -328,7 +328,7 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): mock_touch.assert_called_once_with(mock.ANY) mock_refresh.assert_called_once_with(mock.ANY, task, 'clean') - mock_notify.assert_called_once_with(task, 'clean') + mock_clean.assert_called_once_with(task) mock_set_steps.assert_called_once_with(task) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @@ -337,16 +337,16 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): 'refresh_steps', autospec=True) @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - def test_heartbeat_resume_clean_fails(self, mock_notify, mock_set_steps, + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + def test_heartbeat_resume_clean_fails(self, mock_clean, mock_set_steps, mock_refresh, mock_touch, mock_handler): - mocks = [mock_refresh, mock_set_steps, mock_notify] - self.node.clean_step = {} - self.node.provision_state = states.CLEANWAIT - self.node.save() + mocks = [mock_refresh, mock_set_steps, mock_clean] for i in range(len(mocks)): + self.node.clean_step = {} + self.node.provision_state = states.CLEANWAIT + self.node.save() + before_failed_mocks = mocks[:i] failed_mock = mocks[i] after_failed_mocks = mocks[i + 1:] @@ -1528,31 +1528,6 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertFalse(prepare_mock.called) self.assertFalse(failed_state_mock.called) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning(self, status_mock, notify_mock): - # Test a successful execute clean step on the agent - self.node.clean_step = { - 'priority': 10, - 'interface': 'deploy', - 'step': 'erase_devices', - 'reboot_requested': False - } - self.node.save() - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': { - 'clean_step': self.node.clean_step - } - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @@ -1647,295 +1622,6 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertNotIn('deployment_reboot', task.node.driver_internal_info) - @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) - @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, - autospec=True) - @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_reboot( - self, status_mock, reboot_mock, mock_prepare, mock_build_opt): - # Test a successful execute clean step on the agent, with reboot - self.node.clean_step = { - 'priority': 42, - 'interface': 'deploy', - 'step': 'reboot_me_afterwards', - 'reboot_requested': True - } - self.node.save() - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': { - 'clean_step': self.node.clean_step - } - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - reboot_mock.assert_called_once_with(task, states.REBOOT) - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_after_reboot(self, status_mock, notify_mock): - # Test a successful execute clean step on the agent, with reboot - self.node.clean_step = { - 'priority': 42, - 'interface': 'deploy', - 'step': 'reboot_me_afterwards', - 'reboot_requested': True - } - driver_internal_info = self.node.driver_internal_info - driver_internal_info['cleaning_reboot'] = True - self.node.driver_internal_info = driver_internal_info - self.node.save() - # Represents a freshly booted agent with no commands - status_mock.return_value = [] - - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - self.assertNotIn('cleaning_reboot', - task.node.driver_internal_info) - - @mock.patch.object(agent_base, - '_get_post_step_hook', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_with_hook( - self, status_mock, notify_mock, get_hook_mock): - self.node.clean_step = { - 'priority': 10, - 'interface': 'raid', - 'step': 'create_configuration', - } - self.node.save() - command_status = { - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': {'clean_step': self.node.clean_step}} - status_mock.return_value = [command_status] - hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') - get_hook_mock.return_value = hook_mock - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - - get_hook_mock.assert_called_once_with(task.node, 'clean') - hook_mock.assert_called_once_with(task, command_status) - notify_mock.assert_called_once_with(task, 'clean') - - @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_base, - '_get_post_step_hook', autospec=True) - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_with_hook_fails( - self, status_mock, error_handler_mock, get_hook_mock, - notify_mock, collect_logs_mock): - self.node.clean_step = { - 'priority': 10, - 'interface': 'raid', - 'step': 'create_configuration', - } - self.node.save() - command_status = { - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': {'clean_step': self.node.clean_step}} - status_mock.return_value = [command_status] - hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') - hook_mock.side_effect = RuntimeError('error') - get_hook_mock.return_value = hook_mock - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - - get_hook_mock.assert_called_once_with(task.node, 'clean') - hook_mock.assert_called_once_with(task, command_status) - error_handler_mock.assert_called_once_with(task, mock.ANY, - traceback=True) - self.assertFalse(notify_mock.called) - collect_logs_mock.assert_called_once_with(task.node, - label='cleaning') - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_old_command(self, status_mock, notify_mock): - # Test when a second execute_clean_step happens to the agent, but - # the new step hasn't started yet. - self.node.clean_step = { - 'priority': 10, - 'interface': 'deploy', - 'step': 'erase_devices', - 'reboot_requested': False - } - self.node.save() - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'execute_clean_step', - 'command_result': { - 'priority': 20, - 'interface': 'deploy', - 'step': 'update_firmware', - 'reboot_requested': False - } - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - self.assertFalse(notify_mock.called) - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_running(self, status_mock, notify_mock): - # Test that no action is taken while a clean step is executing - status_mock.return_value = [{ - 'command_status': 'RUNNING', - 'command_name': 'execute_clean_step', - 'command_result': None - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - self.assertFalse(notify_mock.called) - - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_no_step_running(self, status_mock, notify_mock): - status_mock.return_value = [{ - 'command_status': 'SUCCEEDED', - 'command_name': 'get_clean_steps', - 'command_result': [] - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - - @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_fail(self, status_mock, error_mock, - collect_logs_mock): - # Test that a failure puts the node in CLEANFAIL - status_mock.return_value = [{ - 'command_status': 'FAILED', - 'command_name': 'execute_clean_step', - 'command_result': {} - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - task.node.clean_step = { - 'step': 'erase_devices', - 'interface': 'deploy', - } - self.deploy.continue_cleaning(task) - error_mock.assert_called_once_with(task, mock.ANY, traceback=False) - collect_logs_mock.assert_called_once_with(task.node, - label='cleaning') - - @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', - autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def _test_continue_cleaning_clean_version_mismatch( - self, status_mock, refresh_steps_mock, notify_mock, steps_mock, - manual=False): - status_mock.return_value = [{ - 'command_status': 'CLEAN_VERSION_MISMATCH', - 'command_name': 'execute_clean_step', - }] - tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE - self.node.provision_state = states.CLEANWAIT - self.node.target_provision_state = tgt_prov_state - self.node.save() - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task, 'clean') - refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') - if manual: - self.assertFalse( - task.node.driver_internal_info['skip_current_clean_step']) - self.assertFalse(steps_mock.called) - else: - steps_mock.assert_called_once_with(task) - self.assertNotIn('skip_current_clean_step', - task.node.driver_internal_info) - - def test_continue_cleaning_automated_clean_version_mismatch(self): - self._test_continue_cleaning_clean_version_mismatch() - - def test_continue_cleaning_manual_clean_version_mismatch(self): - self._test_continue_cleaning_clean_version_mismatch(manual=True) - - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', - autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', - autospec=True) - @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', - autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_clean_version_mismatch_fail( - self, status_mock, refresh_steps_mock, notify_mock, steps_mock, - error_mock, manual=False): - status_mock.return_value = [{ - 'command_status': 'CLEAN_VERSION_MISMATCH', - 'command_name': 'execute_clean_step', - 'command_result': {'hardware_manager_version': {'Generic': '1'}} - }] - refresh_steps_mock.side_effect = exception.NodeCleaningFailure("boo") - tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE - self.node.provision_state = states.CLEANWAIT - self.node.target_provision_state = tgt_prov_state - self.node.save() - with task_manager.acquire(self.context, self.node.uuid, - shared=False) as task: - self.deploy.continue_cleaning(task) - - status_mock.assert_called_once_with(mock.ANY, task.node) - refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') - error_mock.assert_called_once_with(task, mock.ANY, traceback=True) - self.assertFalse(notify_mock.called) - self.assertFalse(steps_mock.called) - - @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) - @mock.patch.object(agent_client.AgentClient, 'get_commands_status', - autospec=True) - def test_continue_cleaning_unknown(self, status_mock, error_mock): - # Test that unknown commands are treated as failures - status_mock.return_value = [{ - 'command_status': 'UNKNOWN', - 'command_name': 'execute_clean_step', - 'command_result': {} - }] - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: - self.deploy.continue_cleaning(task) - error_mock.assert_called_once_with(task, mock.ANY, traceback=False) - def _test_clean_step_hook(self): """Helper method for unit tests related to clean step hooks.""" some_function_mock = mock.MagicMock() @@ -1985,6 +1671,351 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertIsNone(hook_returned) +class ContinueCleaningTest(AgentDeployMixinBaseTest): + + def setUp(self): + super().setUp() + self.node.provision_state = states.CLEANWAIT + self.node.target_provision_state = states.AVAILABLE + self.node.save() + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning(self, status_mock, clean_mock): + # Test a successful execute clean step on the agent + self.node.clean_step = { + 'priority': 10, + 'interface': 'deploy', + 'step': 'erase_devices', + 'reboot_requested': False + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'clean_step': self.node.clean_step + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + self.assertEqual(states.CLEANING, task.node.provision_state) + self.assertEqual(states.AVAILABLE, + task.node.target_provision_state) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_manual_cleaning(self, status_mock, clean_mock): + self.node.target_provision_state = states.MANAGEABLE + self.node.clean_step = { + 'priority': 10, + 'interface': 'deploy', + 'step': 'erase_devices', + 'reboot_requested': False + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'clean_step': self.node.clean_step + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + self.assertEqual(states.CLEANING, task.node.provision_state) + self.assertEqual(states.MANAGEABLE, + task.node.target_provision_state) + + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) + @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, + autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_reboot( + self, status_mock, reboot_mock, mock_prepare, mock_build_opt): + # Test a successful execute clean step on the agent, with reboot + self.node.clean_step = { + 'priority': 42, + 'interface': 'deploy', + 'step': 'reboot_me_afterwards', + 'reboot_requested': True + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'clean_step': self.node.clean_step + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + reboot_mock.assert_called_once_with(task, states.REBOOT) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_after_reboot(self, status_mock, clean_mock): + # Test a successful execute clean step on the agent, with reboot + self.node.clean_step = { + 'priority': 42, + 'interface': 'deploy', + 'step': 'reboot_me_afterwards', + 'reboot_requested': True + } + driver_internal_info = self.node.driver_internal_info + driver_internal_info['cleaning_reboot'] = True + self.node.driver_internal_info = driver_internal_info + self.node.save() + # Represents a freshly booted agent with no commands + status_mock.return_value = [] + + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + self.assertEqual(states.CLEANING, task.node.provision_state) + self.assertNotIn('cleaning_reboot', + task.node.driver_internal_info) + + @mock.patch.object(agent_base, + '_get_post_step_hook', autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_with_hook( + self, status_mock, clean_mock, get_hook_mock): + self.node.clean_step = { + 'priority': 10, + 'interface': 'raid', + 'step': 'create_configuration', + } + self.node.save() + command_status = { + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': {'clean_step': self.node.clean_step}} + status_mock.return_value = [command_status] + hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') + get_hook_mock.return_value = hook_mock + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + + get_hook_mock.assert_called_once_with(task.node, 'clean') + hook_mock.assert_called_once_with(task, command_status) + clean_mock.assert_called_once_with(task) + + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_base, + '_get_post_step_hook', autospec=True) + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_with_hook_fails( + self, status_mock, error_handler_mock, get_hook_mock, + clean_mock, collect_logs_mock): + self.node.clean_step = { + 'priority': 10, + 'interface': 'raid', + 'step': 'create_configuration', + } + self.node.save() + command_status = { + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': {'clean_step': self.node.clean_step}} + status_mock.return_value = [command_status] + hook_mock = mock.MagicMock(spec=types.FunctionType, __name__='foo') + hook_mock.side_effect = RuntimeError('error') + get_hook_mock.return_value = hook_mock + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + + get_hook_mock.assert_called_once_with(task.node, 'clean') + hook_mock.assert_called_once_with(task, command_status) + error_handler_mock.assert_called_once_with(task, mock.ANY, + traceback=True) + self.assertFalse(clean_mock.called) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_old_command(self, status_mock, clean_mock): + # Test when a second execute_clean_step happens to the agent, but + # the new step hasn't started yet. + self.node.clean_step = { + 'priority': 10, + 'interface': 'deploy', + 'step': 'erase_devices', + 'reboot_requested': False + } + self.node.save() + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'execute_clean_step', + 'command_result': { + 'priority': 20, + 'interface': 'deploy', + 'step': 'update_firmware', + 'reboot_requested': False + } + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + self.assertFalse(clean_mock.called) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_running(self, status_mock, clean_mock): + # Test that no action is taken while a clean step is executing + status_mock.return_value = [{ + 'command_status': 'RUNNING', + 'command_name': 'execute_clean_step', + 'command_result': None + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + self.assertFalse(clean_mock.called) + + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_no_step_running(self, status_mock, clean_mock): + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'get_clean_steps', + 'command_result': [] + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_fail(self, status_mock, error_mock, + collect_logs_mock): + # Test that a failure puts the node in CLEANFAIL + status_mock.return_value = [{ + 'command_status': 'FAILED', + 'command_name': 'execute_clean_step', + 'command_result': {} + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + task.node.clean_step = { + 'step': 'erase_devices', + 'interface': 'deploy', + } + self.deploy.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY, traceback=False) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') + + @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', + autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', + autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def _test_continue_cleaning_clean_version_mismatch( + self, status_mock, refresh_steps_mock, clean_mock, steps_mock, + manual=False): + status_mock.return_value = [{ + 'command_status': 'CLEAN_VERSION_MISMATCH', + 'command_name': 'execute_clean_step', + }] + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE + self.node.provision_state = states.CLEANWAIT + self.node.target_provision_state = tgt_prov_state + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + clean_mock.assert_called_once_with(task) + refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') + if manual: + self.assertFalse( + task.node.driver_internal_info['skip_current_clean_step']) + self.assertFalse(steps_mock.called) + else: + steps_mock.assert_called_once_with(task) + self.assertNotIn('skip_current_clean_step', + task.node.driver_internal_info) + + def test_continue_cleaning_automated_clean_version_mismatch(self): + self._test_continue_cleaning_clean_version_mismatch() + + def test_continue_cleaning_manual_clean_version_mismatch(self): + self._test_continue_cleaning_clean_version_mismatch(manual=True) + + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', + autospec=True) + @mock.patch.object(cleaning, 'continue_node_clean', autospec=True) + @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', + autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_clean_version_mismatch_fail( + self, status_mock, refresh_steps_mock, clean_mock, steps_mock, + error_mock, manual=False): + status_mock.return_value = [{ + 'command_status': 'CLEAN_VERSION_MISMATCH', + 'command_name': 'execute_clean_step', + 'command_result': {'hardware_manager_version': {'Generic': '1'}} + }] + refresh_steps_mock.side_effect = exception.NodeCleaningFailure("boo") + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE + self.node.provision_state = states.CLEANWAIT + self.node.target_provision_state = tgt_prov_state + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + self.deploy.continue_cleaning(task) + + status_mock.assert_called_once_with(mock.ANY, task.node) + refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') + error_mock.assert_called_once_with(task, mock.ANY, traceback=True) + self.assertFalse(clean_mock.called) + self.assertFalse(steps_mock.called) + + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_unknown(self, status_mock, error_mock): + # Test that unknown commands are treated as failures + status_mock.return_value = [{ + 'command_status': 'UNKNOWN', + 'command_name': 'execute_clean_step', + 'command_result': {} + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + error_mock.assert_called_once_with(task, mock.ANY, traceback=False) + + class TestRefreshCleanSteps(AgentDeployMixinBaseTest): def setUp(self): diff --git a/releasenotes/notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml b/releasenotes/notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml new file mode 100644 index 0000000000..74f333b484 --- /dev/null +++ b/releasenotes/notes/no-deploy-rpc-dec8ee1d0326d1ad.yaml @@ -0,0 +1,5 @@ +--- +other: + - | + The agent deploy and cleaning code no longer uses an RPC call to the same + conductor when proceeding to the next deploy or clean step.