Collect ramdisk logs also during cleaning

Change-Id: Ia60996b4198e6fcfba6094af26498869589e175e
(cherry picked from commit d31e71a736)
This commit is contained in:
Dmitry Tantsur 2020-05-14 14:52:16 +02:00
parent 48cfa42c34
commit 07fd5b92a5
7 changed files with 106 additions and 18 deletions

View File

@ -21,6 +21,7 @@ from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager
from ironic.conductor import utils
from ironic.conf import CONF
from ironic.drivers import utils as driver_utils
LOG = log.getLogger(__name__)
@ -182,6 +183,7 @@ def do_next_clean_step(task, step_index):
{'node': node.uuid, 'exc': e,
'step': node.clean_step})
LOG.exception(msg)
driver_utils.collect_ramdisk_logs(task.node, label='cleaning')
utils.cleaning_error_handler(task, msg)
return
@ -206,6 +208,9 @@ def do_next_clean_step(task, step_index):
LOG.info('Node %(node)s finished clean step %(step)s',
{'node': node.uuid, 'step': step})
if CONF.agent.deploy_logs_collect == 'always':
driver_utils.collect_ramdisk_logs(task.node, label='cleaning')
# Clear clean_step
node.clean_step = None
driver_internal_info = node.driver_internal_info

View File

@ -369,6 +369,15 @@ def execute_clean_step(task, step):
return execute_step(task, step, 'clean')
def _step_failure_handler(task, msg, step_type):
driver_utils.collect_ramdisk_logs(
task.node, label='cleaning' if step_type == 'clean' else None)
if step_type == 'clean':
manager_utils.cleaning_error_handler(task, msg)
else:
manager_utils.deploying_error_handler(task, msg)
class HeartbeatMixin(object):
"""Mixin class implementing heartbeat processing."""
@ -922,7 +931,7 @@ class AgentDeployMixin(HeartbeatMixin):
'step': current_step,
'type': step_type})
LOG.error(msg)
return manager_utils.cleaning_error_handler(task, msg)
return _step_failure_handler(task, msg, step_type)
# NOTE(dtantsur): VERSION_MISMATCH is a new alias for
# CLEAN_VERSION_MISMATCH, remove the old one after IPA removes it.
elif command.get('command_status') in ('CLEAN_VERSION_MISMATCH',
@ -950,10 +959,7 @@ class AgentDeployMixin(HeartbeatMixin):
'step': current_step,
'type': step_type})
LOG.exception(msg)
if step_type == 'clean':
return manager_utils.cleaning_error_handler(task, msg)
else:
return manager_utils.deploying_error_handler(task, msg)
return _step_failure_handler(task, msg, step_type)
if current_step.get('reboot_requested'):
_post_step_reboot(task, step_type)
@ -971,10 +977,7 @@ class AgentDeployMixin(HeartbeatMixin):
'step': current_step,
'type': step_type})
LOG.error(msg)
if step_type == 'clean':
return manager_utils.cleaning_error_handler(task, msg)
else:
return manager_utils.deploying_error_handler(task, msg)
return _step_failure_handler(task, msg, step_type)
@METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy')
def reboot_and_finish_deploy(self, task):

View File

@ -323,7 +323,7 @@ def store_ramdisk_logs(node, logs, label=None):
f.name, object_headers=object_headers)
def collect_ramdisk_logs(node):
def collect_ramdisk_logs(node, label=None):
"""Collect and store the system logs from the IPA ramdisk.
Collect and store the system logs from the IPA ramdisk. This method
@ -331,8 +331,11 @@ def collect_ramdisk_logs(node):
according to the configured storage backend.
:param node: A node object.
:param label: A string to label the log file such as a clean step name.
"""
if CONF.agent.deploy_logs_collect == 'never':
return
client = agent_client.AgentClient()
try:
result = client.collect_system_logs(node)
@ -350,7 +353,8 @@ def collect_ramdisk_logs(node):
return
try:
store_ramdisk_logs(node, result['command_result']['system_logs'])
store_ramdisk_logs(node, result['command_result']['system_logs'],
label=label)
except exception.SwiftOperationError as e:
LOG.error('Failed to store the logs from the node %(node)s '
'deployment in Swift. Error: %(error)s',

View File

@ -572,12 +572,14 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
def test__do_next_clean_step_retired_last_step_change_tgt_state(self):
self._do_next_clean_step_last_step_noop(retired=True)
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step',
autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
def _do_next_clean_step_all(self, mock_deploy_execute,
mock_power_execute, manual=False):
mock_power_execute, mock_collect_logs,
manual=False):
# Run all steps from start to finish (all synchronous)
tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE
@ -617,6 +619,7 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
mock_deploy_execute.assert_has_calls(
[mock.call(mock.ANY, mock.ANY, self.clean_steps[0]),
mock.call(mock.ANY, mock.ANY, self.clean_steps[2])])
self.assertFalse(mock_collect_logs.called)
def test_do_next_clean_step_automated_all(self):
self._do_next_clean_step_all()
@ -624,11 +627,62 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
def test_do_next_clean_step_manual_all(self):
self._do_next_clean_step_all(manual=True)
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step',
autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
def test_do_next_clean_step_collect_logs(self, mock_deploy_execute,
mock_power_execute,
mock_collect_logs):
CONF.set_override('deploy_logs_collect', 'always', group='agent')
# Run all steps from start to finish (all synchronous)
tgt_prov_state = states.MANAGEABLE
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.CLEANING,
target_provision_state=tgt_prov_state,
last_error=None,
driver_internal_info={'clean_steps': self.clean_steps,
'clean_step_index': None},
clean_step={})
def fake_deploy(conductor_obj, task, step):
driver_internal_info = task.node.driver_internal_info
driver_internal_info['goober'] = 'test'
task.node.driver_internal_info = driver_internal_info
task.node.save()
mock_deploy_execute.side_effect = fake_deploy
mock_power_execute.return_value = None
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
cleaning.do_next_clean_step(task, 0)
node.refresh()
# Cleaning should be complete
self.assertEqual(tgt_prov_state, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
self.assertEqual({}, node.clean_step)
self.assertNotIn('clean_step_index', node.driver_internal_info)
self.assertEqual('test', node.driver_internal_info['goober'])
self.assertIsNone(node.driver_internal_info['clean_steps'])
mock_power_execute.assert_called_once_with(mock.ANY, mock.ANY,
self.clean_steps[1])
mock_deploy_execute.assert_has_calls(
[mock.call(mock.ANY, mock.ANY, self.clean_steps[0]),
mock.call(mock.ANY, mock.ANY, self.clean_steps[2])])
mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning')
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def _do_next_clean_step_execute_fail(self, tear_mock, mock_execute,
manual=False):
mock_collect_logs, manual=False):
# When a clean step fails, go to CLEANFAIL
tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE
@ -658,6 +712,7 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
self.assertTrue(node.maintenance)
mock_execute.assert_called_once_with(
mock.ANY, mock.ANY, self.clean_steps[0])
mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning')
def test__do_next_clean_step_automated_execute_fail(self):
self._do_next_clean_step_execute_fail()

View File

@ -1815,6 +1815,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
hook_mock.assert_called_once_with(task, command_status)
notify_mock.assert_called_once_with(task, 'clean')
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'notify_conductor_resume_operation',
autospec=True)
@mock.patch.object(agent_base,
@ -1824,7 +1825,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
autospec=True)
def test_continue_cleaning_with_hook_fails(
self, status_mock, error_handler_mock, get_hook_mock,
notify_mock):
notify_mock, collect_logs_mock):
self.node.clean_step = {
'priority': 10,
'interface': 'raid',
@ -1847,6 +1848,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
hook_mock.assert_called_once_with(task, command_status)
error_handler_mock.assert_called_once_with(task, mock.ANY)
self.assertFalse(notify_mock.called)
collect_logs_mock.assert_called_once_with(task.node,
label='cleaning')
@mock.patch.object(manager_utils, 'notify_conductor_resume_operation',
autospec=True)
@ -1908,10 +1911,12 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
self.deploy.continue_cleaning(task)
notify_mock.assert_called_once_with(task, 'clean')
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning_fail(self, status_mock, error_mock):
def test_continue_cleaning_fail(self, status_mock, error_mock,
collect_logs_mock):
# Test that a failure puts the node in CLEANFAIL
status_mock.return_value = [{
'command_status': 'FAILED',
@ -1922,6 +1927,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
shared=False) as task:
self.deploy.continue_cleaning(task)
error_mock.assert_called_once_with(task, mock.ANY)
collect_logs_mock.assert_called_once_with(task.node,
label='cleaning')
@mock.patch.object(conductor_steps, 'set_node_cleaning_steps',
autospec=True)

View File

@ -252,7 +252,16 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs)
mock_store.assert_called_once_with(self.node, logs, label=None)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@mock.patch.object(agent_client.AgentClient,
'collect_system_logs', autospec=True)
def test_collect_ramdisk_logs_with_label(self, mock_collect, mock_store):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node, label='logs')
mock_store.assert_called_once_with(self.node, logs, label='logs')
@mock.patch.object(driver_utils.LOG, 'error', autospec=True)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@ -286,7 +295,7 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs)
mock_store.assert_called_once_with(self.node, logs, label=None)
@mock.patch.object(driver_utils.LOG, 'exception', autospec=True)
def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log):

View File

@ -0,0 +1,5 @@
---
other:
- |
Ramdisk logs are now collected during cleaning the same way as during
deployment.