diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 6de308ae5c..be81483d77 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -75,6 +75,7 @@ from ironic.conductor import task_manager from ironic.conductor import utils from ironic.conf import CONF from ironic.drivers import base as drivers_base +from ironic.drivers import utils as driver_utils from ironic import objects from ironic.objects import base as objects_base from ironic.objects import fields @@ -1458,6 +1459,7 @@ class ConductorManager(base_manager.BaseConductorManager): {'node': node.uuid, 'exc': e, 'step': node.clean_step}) LOG.exception(msg) + driver_utils.collect_ramdisk_logs(task.node, label='cleaning') utils.cleaning_error_handler(task, msg) return @@ -1482,6 +1484,9 @@ class ConductorManager(base_manager.BaseConductorManager): LOG.info('Node %(node)s finished clean step %(step)s', {'node': node.uuid, 'step': step}) + if CONF.agent.deploy_logs_collect == 'always': + driver_utils.collect_ramdisk_logs(node, label='cleaning') + # Clear clean_step node.clean_step = None driver_internal_info = node.driver_internal_info diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index d6b4531b9d..9094d4374b 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -585,6 +585,7 @@ class AgentDeployMixin(HeartbeatMixin): 'err': command.get('command_error'), 'step': node.clean_step}) LOG.error(msg) + driver_utils.collect_ramdisk_logs(task.node, label='cleaning') return manager_utils.cleaning_error_handler(task, msg) elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH': # Cache the new clean steps (and 'hardware_manager_version') @@ -649,6 +650,8 @@ class AgentDeployMixin(HeartbeatMixin): 'cls': e.__class__.__name__, 'step': node.clean_step}) LOG.exception(msg) + driver_utils.collect_ramdisk_logs(task.node, + label='cleaning') return manager_utils.cleaning_error_handler(task, msg) if task.node.clean_step.get('reboot_requested'): @@ -665,6 +668,7 @@ class AgentDeployMixin(HeartbeatMixin): 'err': command.get('command_status'), 'step': node.clean_step}) LOG.error(msg) + driver_utils.collect_ramdisk_logs(task.node, label='cleaning') return manager_utils.cleaning_error_handler(task, msg) @METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy') diff --git a/ironic/drivers/utils.py b/ironic/drivers/utils.py index e0c69aa81b..4a496daec0 100644 --- a/ironic/drivers/utils.py +++ b/ironic/drivers/utils.py @@ -324,7 +324,7 @@ def store_ramdisk_logs(node, logs, label=None): f.name, object_headers=object_headers) -def collect_ramdisk_logs(node): +def collect_ramdisk_logs(node, label=None): """Collect and store the system logs from the IPA ramdisk. Collect and store the system logs from the IPA ramdisk. This method @@ -332,8 +332,11 @@ def collect_ramdisk_logs(node): according to the configured storage backend. :param node: A node object. - + :param label: A string to label the log file such as a clean step name. """ + if CONF.agent.deploy_logs_collect == 'never': + return + client = agent_client.AgentClient() try: result = client.collect_system_logs(node) @@ -351,7 +354,8 @@ def collect_ramdisk_logs(node): return try: - store_ramdisk_logs(node, result['command_result']['system_logs']) + store_ramdisk_logs(node, result['command_result']['system_logs'], + label=label) except exception.SwiftOperationError as e: LOG.error('Failed to store the logs from the node %(node)s ' 'deployment in Swift. Error: %(error)s', diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 53cdba3abd..c4feb7407f 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -4275,12 +4275,14 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): def test__do_next_clean_step_manual_last_step_noop(self): self._do_next_clean_step_last_step_noop(manual=True) + @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', autospec=True) def _do_next_clean_step_all(self, mock_deploy_execute, - mock_power_execute, manual=False): + mock_power_execute, mock_collect_logs, + manual=False): # Run all steps from start to finish (all synchronous) tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE @@ -4322,6 +4324,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_deploy_execute.assert_has_calls( [mock.call(mock.ANY, mock.ANY, self.clean_steps[0]), mock.call(mock.ANY, mock.ANY, self.clean_steps[2])]) + self.assertFalse(mock_collect_logs.called) def test_do_next_clean_step_automated_all(self): self._do_next_clean_step_all() @@ -4329,11 +4332,64 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): def test_do_next_clean_step_manual_all(self): self._do_next_clean_step_all(manual=True) + @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', + autospec=True) + def test_do_next_clean_step_collect_logs(self, mock_deploy_execute, + mock_power_execute, + mock_collect_logs): + CONF.set_override('deploy_logs_collect', 'always', group='agent') + # Run all steps from start to finish (all synchronous) + tgt_prov_state = states.MANAGEABLE + + self._start_service() + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.CLEANING, + target_provision_state=tgt_prov_state, + last_error=None, + driver_internal_info={'clean_steps': self.clean_steps, + 'clean_step_index': None}, + clean_step={}) + + def fake_deploy(conductor_obj, task, step): + driver_internal_info = task.node.driver_internal_info + driver_internal_info['goober'] = 'test' + task.node.driver_internal_info = driver_internal_info + task.node.save() + + mock_deploy_execute.side_effect = fake_deploy + mock_power_execute.return_value = None + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + self.service._do_next_clean_step(task, 0) + + self._stop_service() + node.refresh() + + # Cleaning should be complete + self.assertEqual(tgt_prov_state, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) + self.assertEqual({}, node.clean_step) + self.assertNotIn('clean_step_index', node.driver_internal_info) + self.assertEqual('test', node.driver_internal_info['goober']) + self.assertIsNone(node.driver_internal_info['clean_steps']) + mock_power_execute.assert_called_once_with(mock.ANY, mock.ANY, + self.clean_steps[1]) + mock_deploy_execute.assert_has_calls( + [mock.call(mock.ANY, mock.ANY, self.clean_steps[0]), + mock.call(mock.ANY, mock.ANY, self.clean_steps[2])]) + mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning') + + @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', autospec=True) @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) def _do_next_clean_step_execute_fail(self, tear_mock, mock_execute, - manual=False): + mock_collect_logs, manual=False): # When a clean step fails, go to CLEANFAIL tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE @@ -4365,6 +4421,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertTrue(node.maintenance) mock_execute.assert_called_once_with( mock.ANY, mock.ANY, self.clean_steps[0]) + mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning') def test__do_next_clean_step_automated_execute_fail(self): self._do_next_clean_step_execute_fail() diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index a9b77c0b08..509d17720d 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -1626,6 +1626,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): hook_mock.assert_called_once_with(task, command_status) notify_mock.assert_called_once_with(task) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_base_vendor, @@ -1635,7 +1636,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): autospec=True) def test_continue_cleaning_with_hook_fails( self, status_mock, error_handler_mock, get_hook_mock, - notify_mock): + notify_mock, collect_logs_mock): self.node.clean_step = { 'priority': 10, 'interface': 'raid', @@ -1658,6 +1659,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): hook_mock.assert_called_once_with(task, command_status) error_handler_mock.assert_called_once_with(task, mock.ANY) self.assertFalse(notify_mock.called) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', autospec=True) @@ -1704,10 +1707,12 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.deploy.continue_cleaning(task) self.assertFalse(notify_mock.called) + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) - def test_continue_cleaning_fail(self, status_mock, error_mock): + def test_continue_cleaning_fail(self, status_mock, error_mock, + collect_logs_mock): # Test that a failure puts the node in CLEANFAIL status_mock.return_value = [{ 'command_status': 'FAILED', @@ -1718,6 +1723,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): shared=False) as task: self.deploy.continue_cleaning(task) error_mock.assert_called_once_with(task, mock.ANY) + collect_logs_mock.assert_called_once_with(task.node, + label='cleaning') @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) diff --git a/ironic/tests/unit/drivers/test_utils.py b/ironic/tests/unit/drivers/test_utils.py index b09915704f..83fd8c5f0f 100644 --- a/ironic/tests/unit/drivers/test_utils.py +++ b/ironic/tests/unit/drivers/test_utils.py @@ -252,7 +252,16 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase): logs = 'Gary the Snail' mock_collect.return_value = {'command_result': {'system_logs': logs}} driver_utils.collect_ramdisk_logs(self.node) - mock_store.assert_called_once_with(self.node, logs) + mock_store.assert_called_once_with(self.node, logs, label=None) + + @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) + @mock.patch.object(agent_client.AgentClient, + 'collect_system_logs', autospec=True) + def test_collect_ramdisk_logs_with_label(self, mock_collect, mock_store): + logs = 'Gary the Snail' + mock_collect.return_value = {'command_result': {'system_logs': logs}} + driver_utils.collect_ramdisk_logs(self.node, label='logs') + mock_store.assert_called_once_with(self.node, logs, label='logs') @mock.patch.object(driver_utils.LOG, 'error', autospec=True) @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) @@ -286,7 +295,7 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase): logs = 'Gary the Snail' mock_collect.return_value = {'command_result': {'system_logs': logs}} driver_utils.collect_ramdisk_logs(self.node) - mock_store.assert_called_once_with(self.node, logs) + mock_store.assert_called_once_with(self.node, logs, label=None) @mock.patch.object(driver_utils.LOG, 'exception', autospec=True) def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log): diff --git a/releasenotes/notes/cleaning-logs-dc115b0926ae3982.yaml b/releasenotes/notes/cleaning-logs-dc115b0926ae3982.yaml new file mode 100644 index 0000000000..c90e2edd46 --- /dev/null +++ b/releasenotes/notes/cleaning-logs-dc115b0926ae3982.yaml @@ -0,0 +1,5 @@ +--- +other: + - | + Ramdisk logs are now collected during cleaning the same way as during + deployment.