Collect ramdisk logs also during cleaning

Change-Id: Ia60996b4198e6fcfba6094af26498869589e175e
This commit is contained in:
Dmitry Tantsur 2020-05-14 14:52:16 +02:00
parent a5d120c8c3
commit d31e71a736
7 changed files with 106 additions and 18 deletions

View File

@ -21,6 +21,7 @@ from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager from ironic.conductor import task_manager
from ironic.conductor import utils from ironic.conductor import utils
from ironic.conf import CONF from ironic.conf import CONF
from ironic.drivers import utils as driver_utils
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
@ -182,6 +183,7 @@ def do_next_clean_step(task, step_index):
{'node': node.uuid, 'exc': e, {'node': node.uuid, 'exc': e,
'step': node.clean_step}) 'step': node.clean_step})
LOG.exception(msg) LOG.exception(msg)
driver_utils.collect_ramdisk_logs(task.node, label='cleaning')
utils.cleaning_error_handler(task, msg) utils.cleaning_error_handler(task, msg)
return return
@ -206,6 +208,9 @@ def do_next_clean_step(task, step_index):
LOG.info('Node %(node)s finished clean step %(step)s', LOG.info('Node %(node)s finished clean step %(step)s',
{'node': node.uuid, 'step': step}) {'node': node.uuid, 'step': step})
if CONF.agent.deploy_logs_collect == 'always':
driver_utils.collect_ramdisk_logs(task.node, label='cleaning')
# Clear clean_step # Clear clean_step
node.clean_step = None node.clean_step = None
driver_internal_info = node.driver_internal_info driver_internal_info = node.driver_internal_info

View File

@ -369,6 +369,15 @@ def execute_clean_step(task, step):
return execute_step(task, step, 'clean') return execute_step(task, step, 'clean')
def _step_failure_handler(task, msg, step_type):
driver_utils.collect_ramdisk_logs(
task.node, label='cleaning' if step_type == 'clean' else None)
if step_type == 'clean':
manager_utils.cleaning_error_handler(task, msg)
else:
manager_utils.deploying_error_handler(task, msg)
class HeartbeatMixin(object): class HeartbeatMixin(object):
"""Mixin class implementing heartbeat processing.""" """Mixin class implementing heartbeat processing."""
@ -922,7 +931,7 @@ class AgentDeployMixin(HeartbeatMixin):
'step': current_step, 'step': current_step,
'type': step_type}) 'type': step_type})
LOG.error(msg) LOG.error(msg)
return manager_utils.cleaning_error_handler(task, msg) return _step_failure_handler(task, msg, step_type)
# NOTE(dtantsur): VERSION_MISMATCH is a new alias for # NOTE(dtantsur): VERSION_MISMATCH is a new alias for
# CLEAN_VERSION_MISMATCH, remove the old one after IPA removes it. # CLEAN_VERSION_MISMATCH, remove the old one after IPA removes it.
elif command.get('command_status') in ('CLEAN_VERSION_MISMATCH', elif command.get('command_status') in ('CLEAN_VERSION_MISMATCH',
@ -950,10 +959,7 @@ class AgentDeployMixin(HeartbeatMixin):
'step': current_step, 'step': current_step,
'type': step_type}) 'type': step_type})
LOG.exception(msg) LOG.exception(msg)
if step_type == 'clean': return _step_failure_handler(task, msg, step_type)
return manager_utils.cleaning_error_handler(task, msg)
else:
return manager_utils.deploying_error_handler(task, msg)
if current_step.get('reboot_requested'): if current_step.get('reboot_requested'):
_post_step_reboot(task, step_type) _post_step_reboot(task, step_type)
@ -971,10 +977,7 @@ class AgentDeployMixin(HeartbeatMixin):
'step': current_step, 'step': current_step,
'type': step_type}) 'type': step_type})
LOG.error(msg) LOG.error(msg)
if step_type == 'clean': return _step_failure_handler(task, msg, step_type)
return manager_utils.cleaning_error_handler(task, msg)
else:
return manager_utils.deploying_error_handler(task, msg)
@METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy') @METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy')
def reboot_and_finish_deploy(self, task): def reboot_and_finish_deploy(self, task):

View File

@ -323,7 +323,7 @@ def store_ramdisk_logs(node, logs, label=None):
f.name, object_headers=object_headers) f.name, object_headers=object_headers)
def collect_ramdisk_logs(node): def collect_ramdisk_logs(node, label=None):
"""Collect and store the system logs from the IPA ramdisk. """Collect and store the system logs from the IPA ramdisk.
Collect and store the system logs from the IPA ramdisk. This method Collect and store the system logs from the IPA ramdisk. This method
@ -331,8 +331,11 @@ def collect_ramdisk_logs(node):
according to the configured storage backend. according to the configured storage backend.
:param node: A node object. :param node: A node object.
:param label: A string to label the log file such as a clean step name.
""" """
if CONF.agent.deploy_logs_collect == 'never':
return
client = agent_client.AgentClient() client = agent_client.AgentClient()
try: try:
result = client.collect_system_logs(node) result = client.collect_system_logs(node)
@ -350,7 +353,8 @@ def collect_ramdisk_logs(node):
return return
try: try:
store_ramdisk_logs(node, result['command_result']['system_logs']) store_ramdisk_logs(node, result['command_result']['system_logs'],
label=label)
except exception.SwiftOperationError as e: except exception.SwiftOperationError as e:
LOG.error('Failed to store the logs from the node %(node)s ' LOG.error('Failed to store the logs from the node %(node)s '
'deployment in Swift. Error: %(error)s', 'deployment in Swift. Error: %(error)s',

View File

@ -573,12 +573,14 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
def test__do_next_clean_step_retired_last_step_change_tgt_state(self): def test__do_next_clean_step_retired_last_step_change_tgt_state(self):
self._do_next_clean_step_last_step_noop(retired=True) self._do_next_clean_step_last_step_noop(retired=True)
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step',
autospec=True) autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True) autospec=True)
def _do_next_clean_step_all(self, mock_deploy_execute, def _do_next_clean_step_all(self, mock_deploy_execute,
mock_power_execute, manual=False): mock_power_execute, mock_collect_logs,
manual=False):
# Run all steps from start to finish (all synchronous) # Run all steps from start to finish (all synchronous)
tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE
@ -618,6 +620,7 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
mock_deploy_execute.assert_has_calls( mock_deploy_execute.assert_has_calls(
[mock.call(mock.ANY, mock.ANY, self.clean_steps[0]), [mock.call(mock.ANY, mock.ANY, self.clean_steps[0]),
mock.call(mock.ANY, mock.ANY, self.clean_steps[2])]) mock.call(mock.ANY, mock.ANY, self.clean_steps[2])])
self.assertFalse(mock_collect_logs.called)
def test_do_next_clean_step_automated_all(self): def test_do_next_clean_step_automated_all(self):
self._do_next_clean_step_all() self._do_next_clean_step_all()
@ -625,11 +628,62 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
def test_do_next_clean_step_manual_all(self): def test_do_next_clean_step_manual_all(self):
self._do_next_clean_step_all(manual=True) self._do_next_clean_step_all(manual=True)
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step',
autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True)
def test_do_next_clean_step_collect_logs(self, mock_deploy_execute,
mock_power_execute,
mock_collect_logs):
CONF.set_override('deploy_logs_collect', 'always', group='agent')
# Run all steps from start to finish (all synchronous)
tgt_prov_state = states.MANAGEABLE
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.CLEANING,
target_provision_state=tgt_prov_state,
last_error=None,
driver_internal_info={'clean_steps': self.clean_steps,
'clean_step_index': None},
clean_step={})
def fake_deploy(conductor_obj, task, step):
driver_internal_info = task.node.driver_internal_info
driver_internal_info['goober'] = 'test'
task.node.driver_internal_info = driver_internal_info
task.node.save()
mock_deploy_execute.side_effect = fake_deploy
mock_power_execute.return_value = None
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
cleaning.do_next_clean_step(task, 0)
node.refresh()
# Cleaning should be complete
self.assertEqual(tgt_prov_state, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
self.assertEqual({}, node.clean_step)
self.assertNotIn('clean_step_index', node.driver_internal_info)
self.assertEqual('test', node.driver_internal_info['goober'])
self.assertIsNone(node.driver_internal_info['clean_steps'])
mock_power_execute.assert_called_once_with(mock.ANY, mock.ANY,
self.clean_steps[1])
mock_deploy_execute.assert_has_calls(
[mock.call(mock.ANY, mock.ANY, self.clean_steps[0]),
mock.call(mock.ANY, mock.ANY, self.clean_steps[2])])
mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning')
@mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step',
autospec=True) autospec=True)
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def _do_next_clean_step_execute_fail(self, tear_mock, mock_execute, def _do_next_clean_step_execute_fail(self, tear_mock, mock_execute,
manual=False): mock_collect_logs, manual=False):
# When a clean step fails, go to CLEANFAIL # When a clean step fails, go to CLEANFAIL
tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE
@ -659,6 +713,7 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
self.assertTrue(node.maintenance) self.assertTrue(node.maintenance)
mock_execute.assert_called_once_with( mock_execute.assert_called_once_with(
mock.ANY, mock.ANY, self.clean_steps[0]) mock.ANY, mock.ANY, self.clean_steps[0])
mock_collect_logs.assert_called_once_with(mock.ANY, label='cleaning')
def test__do_next_clean_step_automated_execute_fail(self): def test__do_next_clean_step_automated_execute_fail(self):
self._do_next_clean_step_execute_fail() self._do_next_clean_step_execute_fail()

View File

@ -1815,6 +1815,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
hook_mock.assert_called_once_with(task, command_status) hook_mock.assert_called_once_with(task, command_status)
notify_mock.assert_called_once_with(task, 'clean') notify_mock.assert_called_once_with(task, 'clean')
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'notify_conductor_resume_operation', @mock.patch.object(manager_utils, 'notify_conductor_resume_operation',
autospec=True) autospec=True)
@mock.patch.object(agent_base, @mock.patch.object(agent_base,
@ -1824,7 +1825,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
autospec=True) autospec=True)
def test_continue_cleaning_with_hook_fails( def test_continue_cleaning_with_hook_fails(
self, status_mock, error_handler_mock, get_hook_mock, self, status_mock, error_handler_mock, get_hook_mock,
notify_mock): notify_mock, collect_logs_mock):
self.node.clean_step = { self.node.clean_step = {
'priority': 10, 'priority': 10,
'interface': 'raid', 'interface': 'raid',
@ -1847,6 +1848,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
hook_mock.assert_called_once_with(task, command_status) hook_mock.assert_called_once_with(task, command_status)
error_handler_mock.assert_called_once_with(task, mock.ANY) error_handler_mock.assert_called_once_with(task, mock.ANY)
self.assertFalse(notify_mock.called) self.assertFalse(notify_mock.called)
collect_logs_mock.assert_called_once_with(task.node,
label='cleaning')
@mock.patch.object(manager_utils, 'notify_conductor_resume_operation', @mock.patch.object(manager_utils, 'notify_conductor_resume_operation',
autospec=True) autospec=True)
@ -1908,10 +1911,12 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
self.deploy.continue_cleaning(task) self.deploy.continue_cleaning(task)
notify_mock.assert_called_once_with(task, 'clean') notify_mock.assert_called_once_with(task, 'clean')
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status', @mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True) autospec=True)
def test_continue_cleaning_fail(self, status_mock, error_mock): def test_continue_cleaning_fail(self, status_mock, error_mock,
collect_logs_mock):
# Test that a failure puts the node in CLEANFAIL # Test that a failure puts the node in CLEANFAIL
status_mock.return_value = [{ status_mock.return_value = [{
'command_status': 'FAILED', 'command_status': 'FAILED',
@ -1922,6 +1927,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
shared=False) as task: shared=False) as task:
self.deploy.continue_cleaning(task) self.deploy.continue_cleaning(task)
error_mock.assert_called_once_with(task, mock.ANY) error_mock.assert_called_once_with(task, mock.ANY)
collect_logs_mock.assert_called_once_with(task.node,
label='cleaning')
@mock.patch.object(conductor_steps, 'set_node_cleaning_steps', @mock.patch.object(conductor_steps, 'set_node_cleaning_steps',
autospec=True) autospec=True)

View File

@ -252,7 +252,16 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase):
logs = 'Gary the Snail' logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}} mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node) driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs) mock_store.assert_called_once_with(self.node, logs, label=None)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@mock.patch.object(agent_client.AgentClient,
'collect_system_logs', autospec=True)
def test_collect_ramdisk_logs_with_label(self, mock_collect, mock_store):
logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node, label='logs')
mock_store.assert_called_once_with(self.node, logs, label='logs')
@mock.patch.object(driver_utils.LOG, 'error', autospec=True) @mock.patch.object(driver_utils.LOG, 'error', autospec=True)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True) @mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
@ -286,7 +295,7 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase):
logs = 'Gary the Snail' logs = 'Gary the Snail'
mock_collect.return_value = {'command_result': {'system_logs': logs}} mock_collect.return_value = {'command_result': {'system_logs': logs}}
driver_utils.collect_ramdisk_logs(self.node) driver_utils.collect_ramdisk_logs(self.node)
mock_store.assert_called_once_with(self.node, logs) mock_store.assert_called_once_with(self.node, logs, label=None)
@mock.patch.object(driver_utils.LOG, 'exception', autospec=True) @mock.patch.object(driver_utils.LOG, 'exception', autospec=True)
def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log): def test_collect_ramdisk_logs_storage_fail_fs(self, mock_log):

View File

@ -0,0 +1,5 @@
---
other:
- |
Ramdisk logs are now collected during cleaning the same way as during
deployment.