Don't collect logs from powered off nodes
The agent deploy mixin tries to collect logs from the deployment ramdisk after powering off a node. This is clearly not going to work, as it requires a call to the IPA API. This change avoids collecting logs from the ramdisk in failure scenarios where the node is powered off. Change-Id: I353e73e3612ece893276c683ce469f8561e9ebf9 Closes-Bug: #1732939
This commit is contained in:
parent
af050b5d69
commit
752c82ccd9
@ -203,10 +203,18 @@ def _get_completed_cleaning_command(task, commands):
|
|||||||
|
|
||||||
|
|
||||||
@METRICS.timer('log_and_raise_deployment_error')
|
@METRICS.timer('log_and_raise_deployment_error')
|
||||||
def log_and_raise_deployment_error(task, msg):
|
def log_and_raise_deployment_error(task, msg, collect_logs=True):
|
||||||
"""Helper method to log the error and raise exception."""
|
"""Helper method to log the error and raise exception.
|
||||||
|
|
||||||
|
:param task: a TaskManager instance containing the node to act on.
|
||||||
|
:param msg: the message to set in last_error of the node.
|
||||||
|
:param collect_logs: Boolean indicating whether to attempt to collect
|
||||||
|
logs from IPA-based ramdisk. Defaults to True.
|
||||||
|
Actual log collection is also affected by
|
||||||
|
CONF.agent.deploy_logs_collect config option.
|
||||||
|
"""
|
||||||
LOG.error(msg)
|
LOG.error(msg)
|
||||||
deploy_utils.set_failed_state(task, msg)
|
deploy_utils.set_failed_state(task, msg, collect_logs=collect_logs)
|
||||||
raise exception.InstanceDeployFailure(msg)
|
raise exception.InstanceDeployFailure(msg)
|
||||||
|
|
||||||
|
|
||||||
@ -590,7 +598,13 @@ class AgentDeployMixin(HeartbeatMixin):
|
|||||||
{'node': node.uuid, 'error': error})
|
{'node': node.uuid, 'error': error})
|
||||||
|
|
||||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||||
|
except Exception as e:
|
||||||
|
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||||
|
'Error: %(error)s') %
|
||||||
|
{'node': node.uuid, 'error': e})
|
||||||
|
log_and_raise_deployment_error(task, msg)
|
||||||
|
|
||||||
|
try:
|
||||||
task.driver.network.remove_provisioning_network(task)
|
task.driver.network.remove_provisioning_network(task)
|
||||||
task.driver.network.configure_tenant_networks(task)
|
task.driver.network.configure_tenant_networks(task)
|
||||||
|
|
||||||
@ -599,7 +613,9 @@ class AgentDeployMixin(HeartbeatMixin):
|
|||||||
msg = (_('Error rebooting node %(node)s after deploy. '
|
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||||
'Error: %(error)s') %
|
'Error: %(error)s') %
|
||||||
{'node': node.uuid, 'error': e})
|
{'node': node.uuid, 'error': e})
|
||||||
log_and_raise_deployment_error(task, msg)
|
# NOTE(mgoddard): Don't collect logs since the node has been
|
||||||
|
# powered off.
|
||||||
|
log_and_raise_deployment_error(task, msg, collect_logs=False)
|
||||||
|
|
||||||
task.process_event('done')
|
task.process_event('done')
|
||||||
LOG.info('Deployment to node %s done', task.node.uuid)
|
LOG.info('Deployment to node %s done', task.node.uuid)
|
||||||
|
@ -504,7 +504,7 @@ def set_failed_state(task, msg, collect_logs=True):
|
|||||||
|
|
||||||
:param task: a TaskManager instance containing the node to act on.
|
:param task: a TaskManager instance containing the node to act on.
|
||||||
:param msg: the message to set in last_error of the node.
|
:param msg: the message to set in last_error of the node.
|
||||||
:param collect_logs: boolean indicating whether to attempt collect
|
:param collect_logs: Boolean indicating whether to attempt to collect
|
||||||
logs from IPA-based ramdisk. Defaults to True.
|
logs from IPA-based ramdisk. Defaults to True.
|
||||||
Actual log collection is also affected by
|
Actual log collection is also affected by
|
||||||
CONF.agent.deploy_logs_collect config option.
|
CONF.agent.deploy_logs_collect config option.
|
||||||
|
@ -437,7 +437,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
|||||||
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
|
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
|
||||||
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
||||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||||
mock_collect.assert_called_once_with(task.node)
|
self.assertFalse(mock_collect.called)
|
||||||
|
|
||||||
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
||||||
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||||
@ -446,7 +446,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
|||||||
spec=types.FunctionType)
|
spec=types.FunctionType)
|
||||||
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||||
spec=types.FunctionType)
|
spec=types.FunctionType)
|
||||||
def test_reboot_and_finish_deploy_power_action_fails(
|
def test_reboot_and_finish_deploy_power_off_fails(
|
||||||
self, power_off_mock, get_power_state_mock,
|
self, power_off_mock, get_power_state_mock,
|
||||||
node_power_action_mock, mock_collect):
|
node_power_action_mock, mock_collect):
|
||||||
self.node.provision_state = states.DEPLOYING
|
self.node.provision_state = states.DEPLOYING
|
||||||
@ -467,6 +467,44 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
|||||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||||
mock_collect.assert_called_once_with(task.node)
|
mock_collect.assert_called_once_with(task.node)
|
||||||
|
|
||||||
|
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
||||||
|
@mock.patch.object(time, 'sleep', lambda seconds: None)
|
||||||
|
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||||
|
@mock.patch.object(fake.FakePower, 'get_power_state',
|
||||||
|
spec=types.FunctionType)
|
||||||
|
@mock.patch.object(agent_client.AgentClient, 'power_off',
|
||||||
|
spec=types.FunctionType)
|
||||||
|
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
|
||||||
|
'remove_provisioning_network', spec_set=True, autospec=True)
|
||||||
|
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
|
||||||
|
'configure_tenant_networks', spec_set=True, autospec=True)
|
||||||
|
def test_reboot_and_finish_deploy_power_on_fails(
|
||||||
|
self, configure_tenant_net_mock, remove_provisioning_net_mock,
|
||||||
|
power_off_mock, get_power_state_mock,
|
||||||
|
node_power_action_mock, mock_collect):
|
||||||
|
self.node.provision_state = states.DEPLOYING
|
||||||
|
self.node.target_provision_state = states.ACTIVE
|
||||||
|
self.node.save()
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=True) as task:
|
||||||
|
get_power_state_mock.return_value = states.POWER_ON
|
||||||
|
node_power_action_mock.side_effect = [None,
|
||||||
|
RuntimeError("boom")]
|
||||||
|
self.assertRaises(exception.InstanceDeployFailure,
|
||||||
|
self.deploy.reboot_and_finish_deploy,
|
||||||
|
task)
|
||||||
|
power_off_mock.assert_called_once_with(task.node)
|
||||||
|
self.assertEqual(7, get_power_state_mock.call_count)
|
||||||
|
node_power_action_mock.assert_has_calls([
|
||||||
|
mock.call(task, states.POWER_OFF),
|
||||||
|
mock.call(task, states.POWER_ON)])
|
||||||
|
remove_provisioning_net_mock.assert_called_once_with(mock.ANY,
|
||||||
|
task)
|
||||||
|
configure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
|
||||||
|
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
||||||
|
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||||
|
self.assertFalse(mock_collect.called)
|
||||||
|
|
||||||
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
@mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True)
|
||||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||||
@mock.patch.object(agent_client.AgentClient, 'sync',
|
@mock.patch.object(agent_client.AgentClient, 'sync',
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes an issue where if a failure occurs during deployment, the Bare Metal
|
||||||
|
service could attempt to collect logs from a node that had been powered
|
||||||
|
off. This would result in a number of failed attempts to collect the logs
|
||||||
|
before failing the deployment. See `bug 1732939
|
||||||
|
<https://bugs.launchpad.net/ironic/+bug/1732939>`_ for details.
|
Loading…
Reference in New Issue
Block a user