Don't collect logs from powered off nodes
The agent deploy mixin tries to collect logs from the deployment ramdisk after powering off a node. This is clearly not going to work, as it requires a call to the IPA API. This change avoids collecting logs from the ramdisk in failure scenarios where the node is powered off. Change-Id: I353e73e3612ece893276c683ce469f8561e9ebf9 Closes-Bug: #1732939
This commit is contained in:
parent
af050b5d69
commit
752c82ccd9
@ -203,10 +203,18 @@ def _get_completed_cleaning_command(task, commands):
|
||||
|
||||
|
||||
@METRICS.timer('log_and_raise_deployment_error')
|
||||
def log_and_raise_deployment_error(task, msg):
|
||||
"""Helper method to log the error and raise exception."""
|
||||
def log_and_raise_deployment_error(task, msg, collect_logs=True):
|
||||
"""Helper method to log the error and raise exception.
|
||||
|
||||
:param task: a TaskManager instance containing the node to act on.
|
||||
:param msg: the message to set in last_error of the node.
|
||||
:param collect_logs: Boolean indicating whether to attempt to collect
|
||||
logs from IPA-based ramdisk. Defaults to True.
|
||||
Actual log collection is also affected by
|
||||
CONF.agent.deploy_logs_collect config option.
|
||||
"""
|
||||
LOG.error(msg)
|
||||
deploy_utils.set_failed_state(task, msg)
|
||||
deploy_utils.set_failed_state(task, msg, collect_logs=collect_logs)
|
||||
raise exception.InstanceDeployFailure(msg)
|
||||
|
||||
|
||||
@ -590,7 +598,13 @@ class AgentDeployMixin(HeartbeatMixin):
|
||||
{'node': node.uuid, 'error': error})
|
||||
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
except Exception as e:
|
||||
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||
'Error: %(error)s') %
|
||||
{'node': node.uuid, 'error': e})
|
||||
log_and_raise_deployment_error(task, msg)
|
||||
|
||||
try:
|
||||
task.driver.network.remove_provisioning_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
|
||||
@ -599,7 +613,9 @@ class AgentDeployMixin(HeartbeatMixin):
|
||||
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||
'Error: %(error)s') %
|
||||
{'node': node.uuid, 'error': e})
|
||||
log_and_raise_deployment_error(task, msg)
|
||||
# NOTE(mgoddard): Don't collect logs since the node has been
|
||||
# powered off.
|
||||
log_and_raise_deployment_error(task, msg, collect_logs=False)
|
||||
|
||||
task.process_event('done')
|
||||
LOG.info('Deployment to node %s done', task.node.uuid)
|
||||
|
@ -504,7 +504,7 @@ def set_failed_state(task, msg, collect_logs=True):
|
||||
|
||||
:param task: a TaskManager instance containing the node to act on.
|
||||
:param msg: the message to set in last_error of the node.
|
||||
:param collect_logs: boolean indicating whether to attempt collect
|
||||
:param collect_logs: Boolean indicating whether to attempt to collect
|
||||
logs from IPA-based ramdisk. Defaults to True.
|
||||
Actual log collection is also affected by
|
||||
CONF.agent.deploy_logs_collect config option.
|
||||
|
@ -437,7 +437,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
||||
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
|
||||
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||
mock_collect.assert_called_once_with(task.node)
|
||||
self.assertFalse(mock_collect.called)
|
||||
|
||||
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||
@ -446,7 +446,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
||||
spec=types.FunctionType)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy_power_action_fails(
|
||||
def test_reboot_and_finish_deploy_power_off_fails(
|
||||
self, power_off_mock, get_power_state_mock,
|
||||
node_power_action_mock, mock_collect):
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
@ -467,6 +467,44 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||
mock_collect.assert_called_once_with(task.node)
|
||||
|
||||
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||
spec=types.FunctionType)
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
|
||||
'remove_provisioning_network', spec_set=True, autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
|
||||
'configure_tenant_networks', spec_set=True, autospec=True)
|
||||
def test_reboot_and_finish_deploy_power_on_fails(
|
||||
self, configure_tenant_net_mock, remove_provisioning_net_mock,
|
||||
power_off_mock, get_power_state_mock,
|
||||
node_power_action_mock, mock_collect):
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
get_power_state_mock.return_value = states.POWER_ON
|
||||
node_power_action_mock.side_effect = [None,
|
||||
RuntimeError("boom")]
|
||||
self.assertRaises(exception.InstanceDeployFailure,
|
||||
self.deploy.reboot_and_finish_deploy,
|
||||
task)
|
||||
power_off_mock.assert_called_once_with(task.node)
|
||||
self.assertEqual(7, get_power_state_mock.call_count)
|
||||
node_power_action_mock.assert_has_calls([
|
||||
mock.call(task, states.POWER_OFF),
|
||||
mock.call(task, states.POWER_ON)])
|
||||
remove_provisioning_net_mock.assert_called_once_with(mock.ANY,
|
||||
task)
|
||||
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
|
||||
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||
self.assertFalse(mock_collect.called)
|
||||
|
||||
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'sync',
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes an issue where if a failure occurs during deployment, the Bare Metal
|
||||
service could attempt to collect logs from a node that had been powered
|
||||
off. This would result in a number of failed attempts to collect the logs
|
||||
before failing the deployment. See `bug 1732939
|
||||
<https://bugs.launchpad.net/ironic/+bug/1732939>`_ for details.
|
Loading…
Reference in New Issue
Block a user