diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 2272c0df72..add9ee74de 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -499,6 +499,11 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False, # NOTE(dtantsur): avoid overwriting existing maintenance_reason if not node.maintenance_reason and set_maintenance: node.maintenance_reason = errmsg + + if CONF.conductor.poweroff_in_cleanfail: + # NOTE(NobodyCam): Power off node in clean fail + node_power_action(task, states.POWER_OFF) + node.save() if set_fail_state and node.provision_state != states.CLEANFAIL: diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index 653e30f56d..2452fafe78 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -349,6 +349,14 @@ opts = [ 'is a global setting applying to all requests this ' 'conductor receives, regardless of access rights. ' 'The concurrent clean limit cannot be disabled.')), + + cfg.BoolOpt('poweroff_in_cleanfail', + default=False, + help=_('If True power off nodes in the ``clean failed`` ' + 'state. Default False. Option may be unsafe ' + 'when using Cleaning to perform ' + 'hardware-transformative actions such as ' + 'firmware upgrade.')), ] diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py index 34e805deb7..cdfbf14ee6 100644 --- a/ironic/tests/unit/conductor/test_cleaning.py +++ b/ironic/tests/unit/conductor/test_cleaning.py @@ -436,6 +436,36 @@ class DoNodeCleanTestCase(db_base.DbTestCase): self.assertFalse(node.maintenance) self.assertIsNone(node.fault) + @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state', + autospec=True) + @mock.patch.object(n_flat.FlatNetwork, 'validate', autospec=True) + @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', + autospec=True) + def test_do_node_clean_steps_fail_poweroff(self, mock_steps, mock_validate, + mock_power, clean_steps=None, + invalid_exc=True): + if invalid_exc: + mock_steps.side_effect = exception.InvalidParameterValue('invalid') + else: + mock_steps.side_effect = exception.NodeCleaningFailure('failure') + tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE + self.config(poweroff_in_cleanfail=True, group='conductor') + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + uuid=uuidutils.generate_uuid(), + provision_state=states.CLEANING, + power_state=states.POWER_ON, + target_provision_state=tgt_prov_state) + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + cleaning.do_node_clean(task, clean_steps=clean_steps) + mock_validate.assert_called_once_with(mock.ANY, task) + node.refresh() + self.assertEqual(states.CLEANFAIL, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) + mock_steps.assert_called_once_with(mock.ANY, disable_ramdisk=False) + self.assertTrue(mock_power.called) + def test__do_node_clean_automated_steps_fail(self): for invalid in (True, False): self.__do_node_clean_steps_fail(invalid_exc=invalid) diff --git a/releasenotes/notes/Cleanfail-power-off-13b5fdcc2727866a.yaml b/releasenotes/notes/Cleanfail-power-off-13b5fdcc2727866a.yaml new file mode 100644 index 0000000000..2856bac062 --- /dev/null +++ b/releasenotes/notes/Cleanfail-power-off-13b5fdcc2727866a.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Add new conductor conf option: [conductor]poweroff_in_cleanfail + (default: False). when True nodes entering clean failed state + will be powered off. This option may be unsafe when using + Cleaning to perform hardware-transformative actions such as + firmware upgrade.