From 07fd5b92a5c1a47b35543b9d809cfac2d6a3bda7 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Thu, 14 May 2020 14:52:16 +0200 Subject: [PATCH] Collect ramdisk logs also during cleaning Change-Id: Ia60996b4198e6fcfba6094af26498869589e175e (cherry picked from commit d31e71a736e18c4ec403d411bd1514808e9529a7) --- ironic/conductor/cleaning.py | 5 ++ ironic/drivers/modules/agent_base.py | 21 ++++--- ironic/drivers/utils.py | 10 +++- ironic/tests/unit/conductor/test_cleaning.py | 59 ++++++++++++++++++- .../unit/drivers/modules/test_agent_base.py | 11 +++- ironic/tests/unit/drivers/test_utils.py | 13 +++- .../notes/cleaning-logs-dc115b0926ae3982.yaml | 5 ++ 7 files changed, 106 insertions(+), 18 deletions(-) create mode 100644 releasenotes/notes/cleaning-logs-dc115b0926ae3982.yaml diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py index 351c381211..6a188c7a16 100644 --- a/ironic/conductor/cleaning.py +++ b/ironic/conductor/cleaning.py @@ -21,6 +21,7 @@ from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager from ironic.conductor import utils from ironic.conf import CONF +from ironic.drivers import utils as driver_utils LOG = log.getLogger(__name__) @@ -182,6 +183,7 @@ def do_next_clean_step(task, step_index): {'node': node.uuid, 'exc': e, 'step': node.clean_step}) LOG.exception(msg) + driver_utils.collect_ramdisk_logs(task.node, label='cleaning') utils.cleaning_error_handler(task, msg) return @@ -206,6 +208,9 @@ def do_next_clean_step(task, step_index): LOG.info('Node %(node)s finished clean step %(step)s', {'node': node.uuid, 'step': step}) + if CONF.agent.deploy_logs_collect == 'always': + driver_utils.collect_ramdisk_logs(task.node, label='cleaning') + # Clear clean_step node.clean_step = None driver_internal_info = node.driver_internal_info diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 3678020934..0cdd854703 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -369,6 +369,15 @@ def execute_clean_step(task, step): return execute_step(task, step, 'clean') +def _step_failure_handler(task, msg, step_type): + driver_utils.collect_ramdisk_logs( + task.node, label='cleaning' if step_type == 'clean' else None) + if step_type == 'clean': + manager_utils.cleaning_error_handler(task, msg) + else: + manager_utils.deploying_error_handler(task, msg) + + class HeartbeatMixin(object): """Mixin class implementing heartbeat processing.""" @@ -922,7 +931,7 @@ class AgentDeployMixin(HeartbeatMixin): 'step': current_step, 'type': step_type}) LOG.error(msg) - return manager_utils.cleaning_error_handler(task, msg) + return _step_failure_handler(task, msg, step_type) # NOTE(dtantsur): VERSION_MISMATCH is a new alias for # CLEAN_VERSION_MISMATCH, remove the old one after IPA removes it. elif command.get('command_status') in ('CLEAN_VERSION_MISMATCH', @@ -950,10 +959,7 @@ class AgentDeployMixin(HeartbeatMixin): 'step': current_step, 'type': step_type}) LOG.exception(msg) - if step_type == 'clean': - return manager_utils.cleaning_error_handler(task, msg) - else: - return manager_utils.deploying_error_handler(task, msg) + return _step_failure_handler(task, msg, step_type) if current_step.get('reboot_requested'): _post_step_reboot(task, step_type) @@ -971,10 +977,7 @@ class AgentDeployMixin(HeartbeatMixin): 'step': current_step, 'type': step_type}) LOG.error(msg) - if step_type == 'clean': - return manager_utils.cleaning_error_handler(task, msg) - else: - return manager_utils.deploying_error_handler(task, msg) + return _step_failure_handler(task, msg, step_type) @METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy') def reboot_and_finish_deploy(self, task): diff --git a/ironic/drivers/utils.py b/ironic/drivers/utils.py index 99b2a76657..e61d7ab866 100644 --- a/ironic/drivers/utils.py +++ b/ironic/drivers/utils.py @@ -323,7 +323,7 @@ def store_ramdisk_logs(node, logs, label=None): f.name, object_headers=object_headers) -def collect_ramdisk_logs(node): +def collect_ramdisk_logs(node, label=None): """Collect and store the system logs from the IPA ramdisk. Collect and store the system logs from the IPA ramdisk. This method @@ -331,8 +331,11 @@ def collect_ramdisk_logs(node): according to the configured storage backend. :param node: A node object. - + :param label: A string to label the log file such as a clean step name. """ + if CONF.agent.deploy_logs_collect == 'never': + return + client = agent_client.AgentClient() try: result = client.collect_system_logs(node) @@ -350,7 +353,8 @@ def collect_ramdisk_logs(node): return try: - store_ramdisk_logs(node, result['command_result']['system_logs']) + store_ramdisk_logs(node, result['command_result']['system_logs'], + label=label) except exception.SwiftOperationError as e: LOG.error('Failed to store the logs from the node %(node)s ' 'deployment in Swift. Error: %(error)s', diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py index f48fee0477..d785b63282 100644 --- a/ironic/tests/unit/conductor/test_cleaning.py +++ b/ironic/tests/unit/conductor/test_cleaning.py @@ -572,12 +572,14 @@ class DoNodeCleanTestCase(db_base.DbTestCase): def test__do_next_clean_step_retired_last_step_change_tgt_state(self): self._do_next_clean_step_last_step_noop(retired=True) + @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', autospec=True) def _do_next_clean_step_all(self, mock_deploy_execute, - mock_power_execute, manual=False): + mock_power_execute, mock_collect_logs, + manual=False): # Run all steps from start to finish (all synchronous) tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE @@ -617,6 +619,7 @@ class DoNodeCleanTestCase(db_base.DbTestCase): mock_deploy_execute.assert_has_calls( [mock.call(mock.ANY, mock.ANY, self.clean_steps[0]), mock.call(mock.ANY, mock.ANY, self.clean_steps[2])]) + self.assertFalse(mock_collect_logs.called) def test_do_next_clean_step_automated_all(self): self._do_next_clean_step_all() @@ -624,11 +627,62 @@ class DoNodeCleanTestCase(db_base.DbTestCase): def test_do_next_clean_step_manual_all(self): self._do_next_clean_step_all(manual=True) + @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', + autospec=True) + def test_do_next_clean_step_collect_logs(self, mock_deploy_execute, + mock_power_execute, + mock_collect_logs): + CONF.set_override('deploy_logs_collect', 'always', group='agent') + # Run all steps from start to finish (all synchronous) + tgt_prov_state = states.MANAGEABLE + + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.CLEANING, + target_provision_state=tgt_prov_state, + last_error=None, + driver_internal_info={'clean_steps': self.clean_steps, + 'clean_step_index': None}, + clean_step={}) + + def fake_deploy(conductor_obj, task, step): + driver_internal_info = task.node.driver_internal_info + driver_internal_info['goober'] = 'test' + task.node.driver_internal_info = driver_internal_info + task.node.save() + + mock_deploy_execute.side_effect = fake_deploy + mock_power_execute.return_value = None + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + cleaning.do_next_clean_step(task, 0) + + node.refresh() + + # Cleaning should be complete + self.assertEqual(tgt_prov_state, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) + self.assertEqual({}, node.clean_step) + self.assertNotIn('clean_step_index', node.driver_internal_info) + self.assertEqual('test', node.driver_internal_info['goober']) + self.assertIsNone(node.driver_internal_info['clean_steps']) + mock_power_execute.assert_called_once_with(mock.ANY, mock.ANY, + self.clean_steps[1]) + mock_deploy_execute.assert_has_calls( + [mock.call(mock.ANY, mock.ANY, self.clean_steps[0]), + mock.call(mock.ANY, mock.ANY, self.clean_steps[2])]) + mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning') + + @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', autospec=True) @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) def _do_next_clean_step_execute_fail(self, tear_mock, mock_execute, - manual=False): + mock_collect_logs, manual=False): # When a clean step fails, go to CLEANFAIL tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE @@ -658,6 +712,7 @@ class DoNodeCleanTestCase(db_base.DbTestCase): self.assertTrue(node.maintenance) mock_execute.assert_called_once_with( mock.ANY, mock.ANY, self.clean_steps[0]) + mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning') def test__do_next_clean_step_automated_execute_fail(self): self._do_next_clean_step_execute_fail() diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 4f7ce9a1fe..8dc82e5a8c 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -1815,6 +1815,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): hook_mock.assert_called_once_with(task, command_status) notify_mock.assert_called_once_with(task, 'clean') + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_base, @@ -1824,7 +1825,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): autospec=True) def test_continue_cleaning_with_hook_fails( self, status_mock, error_handler_mock, get_hook_mock, - notify_mock): + notify_mock, collect_logs_mock): self.node.clean_step = { 'priority': 10, 'interface': 'raid', @@ -1847,6 +1848,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): hook_mock.assert_called_once_with(task, command_status) error_handler_mock.assert_called_once_with(task, mock.ANY) self.assertFalse(notify_mock.called) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @@ -1908,10 +1911,12 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.deploy.continue_cleaning(task) notify_mock.assert_called_once_with(task, 'clean') + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) - def test_continue_cleaning_fail(self, status_mock, error_mock): + def test_continue_cleaning_fail(self, status_mock, error_mock, + collect_logs_mock): # Test that a failure puts the node in CLEANFAIL status_mock.return_value = [{ 'command_status': 'FAILED', @@ -1922,6 +1927,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): shared=False) as task: self.deploy.continue_cleaning(task) error_mock.assert_called_once_with(task, mock.ANY) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) diff --git a/ironic/tests/unit/drivers/test_utils.py b/ironic/tests/unit/drivers/test_utils.py index b09915704f..83fd8c5f0f 100644 --- a/ironic/tests/unit/drivers/test_utils.py +++ b/ironic/tests/unit/drivers/test_utils.py @@ -252,7 +252,16 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase): logs = 'Gary the Snail' mock_collect.return_value = {'command_result': {'system_logs': logs}} driver_utils.collect_ramdisk_logs(self.node) - mock_store.assert_called_once_with(self.node, logs) + mock_store.assert_called_once_with(self.node, logs, label=None) + + @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) + @mock.patch.object(agent_client.AgentClient, + 'collect_system_logs', autospec=True) + def test_collect_ramdisk_logs_with_label(self, mock_collect, mock_store): + logs = 'Gary the Snail' + mock_collect.return_value = {'command_result': {'system_logs': logs}} + driver_utils.collect_ramdisk_logs(self.node, label='logs') + mock_store.assert_called_once_with(self.node, logs, label='logs') @mock.patch.object(driver_utils.LOG, 'error', autospec=True) @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) @@ -286,7 +295,7 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase): logs = 'Gary the Snail' mock_collect.return_value = {'command_result': {'system_logs': logs}} driver_utils.collect_ramdisk_logs(self.node) - mock_store.assert_called_once_with(self.node, logs) + mock_store.assert_called_once_with(self.node, logs, label=None) @mock.patch.object(driver_utils.LOG, 'exception', autospec=True) def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log): diff --git a/releasenotes/notes/cleaning-logs-dc115b0926ae3982.yaml b/releasenotes/notes/cleaning-logs-dc115b0926ae3982.yaml new file mode 100644 index 0000000000..c90e2edd46 --- /dev/null +++ b/releasenotes/notes/cleaning-logs-dc115b0926ae3982.yaml @@ -0,0 +1,5 @@ +--- +other: + - | + Ramdisk logs are now collected during cleaning the same way as during + deployment.