diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index cc651034df..7a53e1af54 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -919,6 +919,10 @@ # Deprecated group/name - [agent]/agent_erase_devices_iterations #erase_devices_iterations=1 +# Whether to power off a node after deploy failure. Defaults +# to True. (boolean value) +#power_off_after_deploy_failure=true + [dhcp] diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 7b615a9b84..95e59cbc5f 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -71,6 +71,10 @@ deploy_opts = [ deprecated_group='agent', default=1, help=_('Number of iterations to be run for erasing devices.')), + cfg.BoolOpt('power_off_after_deploy_failure', + default=True, + help=_('Whether to power off a node after deploy failure. ' + 'Defaults to True.')), ] CONF = cfg.CONF CONF.register_opts(deploy_opts, group='deploy') @@ -492,15 +496,15 @@ def set_failed_state(task, msg): % {'node': node.uuid, 'state': node.provision_state}) LOG.exception(msg2) - try: - manager_utils.node_power_action(task, states.POWER_OFF) - except Exception: - msg2 = (_LE('Node %s failed to power off while handling deploy ' - 'failure. This may be a serious condition. Node ' - 'should be removed from Ironic or put in maintenance ' - 'mode until the problem is resolved.') % node.uuid) - LOG.exception(msg2) - + if CONF.deploy.power_off_after_deploy_failure: + try: + manager_utils.node_power_action(task, states.POWER_OFF) + except Exception: + msg2 = (_LE('Node %s failed to power off while handling deploy ' + 'failure. This may be a serious condition. Node ' + 'should be removed from Ironic or put in maintenance ' + 'mode until the problem is resolved.') % node.uuid) + LOG.exception(msg2) # NOTE(deva): node_power_action() erases node.last_error # so we need to set it here. node.last_error = msg diff --git a/ironic/tests/unit/drivers/modules/test_deploy_utils.py b/ironic/tests/unit/drivers/modules/test_deploy_utils.py index 8a916b11ac..8362c07bbf 100644 --- a/ironic/tests/unit/drivers/modules/test_deploy_utils.py +++ b/ironic/tests/unit/drivers/modules/test_deploy_utils.py @@ -1252,7 +1252,7 @@ class OtherFunctionTestCase(db_base.DbTestCase): autospec=True) def _test_set_failed_state(self, mock_event, mock_power, mock_log, event_value=None, power_value=None, - log_calls=None): + log_calls=None, poweroff=True): err_msg = 'some failure' mock_event.side_effect = event_value mock_power.side_effect = power_value @@ -1260,9 +1260,12 @@ class OtherFunctionTestCase(db_base.DbTestCase): shared=False) as task: utils.set_failed_state(task, err_msg) mock_event.assert_called_once_with(task, 'fail') - mock_power.assert_called_once_with(task, states.POWER_OFF) + if poweroff: + mock_power.assert_called_once_with(task, states.POWER_OFF) + else: + self.assertFalse(mock_power.called) self.assertEqual(err_msg, task.node.last_error) - if log_calls: + if (log_calls and poweroff): mock_log.exception.assert_has_calls(log_calls) else: self.assertFalse(mock_log.called) @@ -1283,6 +1286,23 @@ class OtherFunctionTestCase(db_base.DbTestCase): power_value=iter([exc_param] * len(calls)), log_calls=calls) + def test_set_failed_state_no_poweroff(self): + cfg.CONF.deploy.power_off_after_deploy_failure = False + exc_state = exception.InvalidState('invalid state') + exc_param = exception.InvalidParameterValue('invalid parameter') + mock_call = mock.call(mock.ANY) + self._test_set_failed_state(poweroff=False) + calls = [mock_call] + self._test_set_failed_state(event_value=iter([exc_state] * len(calls)), + log_calls=calls, poweroff=False) + calls = [mock_call] + self._test_set_failed_state(power_value=iter([exc_param] * len(calls)), + log_calls=calls, poweroff=False) + calls = [mock_call, mock_call] + self._test_set_failed_state(event_value=iter([exc_state] * len(calls)), + power_value=iter([exc_param] * len(calls)), + log_calls=calls, poweroff=False) + def test_get_boot_option(self): self.node.instance_info = {'capabilities': '{"boot_option": "local"}'} result = utils.get_boot_option(self.node) diff --git a/releasenotes/notes/add-support-for-no-poweroff-on-failure-86e43b3e39043990.yaml b/releasenotes/notes/add-support-for-no-poweroff-on-failure-86e43b3e39043990.yaml new file mode 100644 index 0000000000..4808ad400c --- /dev/null +++ b/releasenotes/notes/add-support-for-no-poweroff-on-failure-86e43b3e39043990.yaml @@ -0,0 +1,12 @@ +--- +features: + - Operators can now set + deploy.power_off_after_deploy_failure to leave nodes + powered on when a deployment fails. This is useful + for troubleshooting deployment issues. As a note, + Nova will still attempt to delete a node after a failed + deployment, so deploy.power_off_after_deploy_failure + may not be very effective in non-standalone + deployments until a similar patch to ironic's driver in + nova is proposed. +