Actually ignore [inspector]power_off with fast track

The option's help says that the option is ignored when fast track is on,
but in reality it only happens with the legacy inspection.

The clean-up logic diverges between the two inspection implementation,
so change the new implementation to reuse the old clean-up code.

Add a few missing unit tests.

Change-Id: I2e84aa285b5673bcc911d35439ba80a739460f59
This commit is contained in:
Dmitry Tantsur 2024-10-18 16:15:08 +02:00
parent 6f4e679c4c
commit 4f40ddef26
No known key found for this signature in database
GPG Key ID: 315B2AF9FD216C60
5 changed files with 177 additions and 156 deletions
ironic
drivers/modules/inspector
tests/unit/drivers/modules/inspector
releasenotes/notes

View File

@ -79,7 +79,7 @@ class AgentInspect(common.Common):
if inspect_utils.clear_lookup_addresses(task.node): if inspect_utils.clear_lookup_addresses(task.node):
task.node.save() task.node.save()
common.clean_up(task, finish=False) common.clean_up(task, finish=False, always_power_off=True)
def continue_inspection(self, task, inventory, plugin_data): def continue_inspection(self, task, inventory, plugin_data):
"""Continue in-band hardware inspection. """Continue in-band hardware inspection.
@ -90,8 +90,7 @@ class AgentInspect(common.Common):
""" """
# Run the inspection hooks # Run the inspection hooks
run_inspection_hooks(task, inventory, plugin_data, self.hooks) run_inspection_hooks(task, inventory, plugin_data, self.hooks)
common.clean_up(task, finish=False, always_power_off=True)
common.clean_up(task, finish=False)
def _store_logs(plugin_data, node): def _store_logs(plugin_data, node):
@ -115,7 +114,6 @@ def run_inspection_hooks(task, inventory, plugin_data, hooks):
try: try:
_run_post_hooks(task, inventory, plugin_data, hooks) _run_post_hooks(task, inventory, plugin_data, hooks)
_power_off_node(task)
except exception.HardwareInspectionFailure: except exception.HardwareInspectionFailure:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
_store_logs(plugin_data, task.node) _store_logs(plugin_data, task.node)
@ -180,20 +178,3 @@ def _run_post_hooks(task, inventory, plugin_data, hooks):
LOG.debug('Running inspection hook %(hook)s for node %(node)s', LOG.debug('Running inspection hook %(hook)s for node %(node)s',
{'hook': hook.name, 'node': task.node.uuid}) {'hook': hook.name, 'node': task.node.uuid})
hook.obj.__call__(task, inventory, plugin_data) hook.obj.__call__(task, inventory, plugin_data)
def _power_off_node(task):
power_off = CONF.inspector.power_off
if not power_off:
return
node = task.node
LOG.debug('Forcing power off of node %(node)s', {'node': node.uuid})
try:
cond_utils.node_power_action(task, states.POWER_OFF)
except Exception as exc:
msg = _("Failed to power off node %(node)s after inspection. Check "
"its power management configuration. Error: %(error)s" %
{'node': node.uuid, "error": exc})
raise exception.HardwareInspectionFailure(msg)
LOG.info('Node %(node)s powered off', {'node': node.uuid})

View File

@ -62,29 +62,30 @@ def _get_callback_endpoint(client):
parts.query, parts.fragment)) parts.query, parts.fragment))
def tear_down_managed_boot(task): def tear_down_managed_boot(task, always_power_off=False):
errors = [] errors = []
ironic_manages_boot = utils.pop_node_nested_field( ironic_manages_boot = utils.pop_node_nested_field(
task.node, 'driver_internal_info', _IRONIC_MANAGES_BOOT) task.node, 'driver_internal_info', _IRONIC_MANAGES_BOOT)
if not ironic_manages_boot:
return errors
try: if ironic_manages_boot:
task.driver.boot.clean_up_ramdisk(task) try:
except Exception as exc: task.driver.boot.clean_up_ramdisk(task)
errors.append(_('unable to clean up ramdisk boot: %s') % exc) except Exception as exc:
LOG.exception('Unable to clean up ramdisk boot for node %s', errors.append(_('unable to clean up ramdisk boot: %s') % exc)
task.node.uuid) LOG.exception('Unable to clean up ramdisk boot for node %s',
try: task.node.uuid)
with cond_utils.power_state_for_network_configuration(task): try:
task.driver.network.remove_inspection_network(task) with cond_utils.power_state_for_network_configuration(task):
except Exception as exc: task.driver.network.remove_inspection_network(task)
errors.append(_('unable to remove inspection ports: %s') % exc) except Exception as exc:
LOG.exception('Unable to remove inspection network for node %s', errors.append(_('unable to remove inspection ports: %s') % exc)
task.node.uuid) LOG.exception('Unable to remove inspection network for node %s',
task.node.uuid)
if CONF.inspector.power_off and not utils.fast_track_enabled(task.node): if ((ironic_manages_boot or always_power_off)
and CONF.inspector.power_off
and not utils.fast_track_enabled(task.node)):
try: try:
cond_utils.node_power_action(task, states.POWER_OFF) cond_utils.node_power_action(task, states.POWER_OFF)
except Exception as exc: except Exception as exc:
@ -363,8 +364,8 @@ def _check_status(task):
task.context) task.context)
def clean_up(task, finish=True): def clean_up(task, finish=True, always_power_off=False):
errors = tear_down_managed_boot(task) errors = tear_down_managed_boot(task, always_power_off=always_power_off)
if errors: if errors:
errors = ', '.join(errors) errors = ', '.join(errors)
LOG.error('Inspection clean up failed for node %(uuid)s: %(err)s', LOG.error('Inspection clean up failed for node %(uuid)s: %(err)s',

View File

@ -20,7 +20,9 @@ from ironic.conf import CONF
from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules import inspect_utils from ironic.drivers.modules import inspect_utils
from ironic.drivers.modules.inspector import agent as inspector from ironic.drivers.modules.inspector import agent as inspector
from ironic.drivers.modules.inspector.hooks import base as hooks_base
from ironic.drivers.modules.inspector import interface as common from ironic.drivers.modules.inspector import interface as common
from ironic.drivers import utils as driver_utils
from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.objects import utils as obj_utils from ironic.tests.unit.objects import utils as obj_utils
@ -132,12 +134,56 @@ class ContinueInspectionTestCase(db_base.DbTestCase):
mock_inspection_hooks.assert_called_once_with( mock_inspection_hooks.assert_called_once_with(
task, mock.sentinel.inventory, mock.sentinel.plugin_data, task, mock.sentinel.inventory, mock.sentinel.plugin_data,
self.iface.hooks) self.iface.hooks)
mock_tear_down.assert_called_once_with(task) mock_tear_down.assert_called_once_with(task, always_power_off=True)
self.assertEqual(states.INSPECTING, task.node.provision_state) self.assertEqual(states.INSPECTING, task.node.provision_state)
self.assertIsNone(result) self.assertIsNone(result)
@mock.patch.object(driver_utils, 'store_ramdisk_logs', autospec=True)
class RunInspectionHooksTestCase(db_base.DbTestCase):
def setUp(self):
super().setUp()
CONF.set_override('enabled_inspect_interfaces',
['agent', 'no-inspect'])
self.node = obj_utils.create_test_node(
self.context,
inspect_interface='agent',
provision_state=states.INSPECTING)
self.task = mock.Mock(spec=task_manager.TaskManager, node=self.node)
self.hooks = [
mock.Mock(name=str(i),
obj=mock.MagicMock(spec=hooks_base.InspectionHook))
for i in range(2)
]
self.inventory = {"interfaces": [42]}
self.plugin_data = {"logs": "abcd"}
def test(self, mock_ramdisk_logs):
inspector.run_inspection_hooks(self.task, self.inventory,
self.plugin_data, self.hooks)
for hook in self.hooks:
hook.obj.preprocess.assert_called_once_with(
self.task, self.inventory, self.plugin_data)
hook.obj.assert_called_once_with(
self.task, self.inventory, self.plugin_data)
mock_ramdisk_logs.assert_not_called()
def test_always_collect_logs(self, mock_ramdisk_logs):
CONF.set_override('deploy_logs_collect', 'always', group='agent')
inspector.run_inspection_hooks(self.task, self.inventory,
self.plugin_data, self.hooks)
for hook in self.hooks:
hook.obj.preprocess.assert_called_once_with(
self.task, self.inventory, self.plugin_data)
hook.obj.assert_called_once_with(
self.task, self.inventory, self.plugin_data)
mock_ramdisk_logs.assert_called_once_with(
self.node, "abcd", label="inspect")
@mock.patch.object(common, 'tear_down_managed_boot', autospec=True) @mock.patch.object(common, 'tear_down_managed_boot', autospec=True)
class AbortInspectionTestCase(db_base.DbTestCase): class AbortInspectionTestCase(db_base.DbTestCase):
def setUp(self): def setUp(self):
@ -157,7 +203,7 @@ class AbortInspectionTestCase(db_base.DbTestCase):
mock_tear_down.return_value = None mock_tear_down.return_value = None
with task_manager.acquire(self.context, self.node.id) as task: with task_manager.acquire(self.context, self.node.id) as task:
self.iface.abort(task) self.iface.abort(task)
mock_tear_down.assert_called_once_with(task) mock_tear_down.assert_called_once_with(task, always_power_off=True)
self.node.refresh() self.node.refresh()
# Keep the state - it will be changed on the conductor level # Keep the state - it will be changed on the conductor level
@ -169,7 +215,7 @@ class AbortInspectionTestCase(db_base.DbTestCase):
mock_tear_down.return_value = ['boom'] mock_tear_down.return_value = ['boom']
with task_manager.acquire(self.context, self.node.id) as task: with task_manager.acquire(self.context, self.node.id) as task:
self.iface.abort(task) self.iface.abort(task)
mock_tear_down.assert_called_once_with(task) mock_tear_down.assert_called_once_with(task, always_power_off=True)
self.node.refresh() self.node.refresh()
self.assertEqual(states.INSPECTFAIL, self.node.provision_state) self.assertEqual(states.INSPECTFAIL, self.node.provision_state)

View File

@ -323,6 +323,89 @@ class InspectHardwareTestCase(BaseTestCase):
self.task, 'power off', timeout=None) self.task, 'power off', timeout=None)
class TearDownManagedInspectionTestCase(BaseTestCase):
def test_unmanaged(self):
inspector.tear_down_managed_boot(self.task)
self.assertFalse(self.driver.network.remove_inspection_network.called)
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
self.assertFalse(self.driver.power.set_power_state.called)
def test_unmanaged_force_power_off(self):
inspector.tear_down_managed_boot(self.task, always_power_off=True)
self.assertFalse(self.driver.network.remove_inspection_network.called)
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
self.driver.power.set_power_state.assert_called_once_with(
self.task, 'power off', timeout=None)
def test_managed(self):
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
inspector.tear_down_managed_boot(self.task)
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.driver.power.set_power_state.assert_called_once_with(
self.task, 'power off', timeout=None)
def test_managed_no_power_off(self):
CONF.set_override('power_off', False, group='inspector')
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
inspector.tear_down_managed_boot(self.task)
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def test_managed_no_power_off_on_fast_track(self):
CONF.set_override('fast_track', True, group='deploy')
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
inspector.tear_down_managed_boot(self.task)
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def _test_clean_up_failed(self):
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
result = inspector.tear_down_managed_boot(self.task)
self.assertIn("boom", result[0])
def test_boot_clean_up_failed(self):
self.driver.boot.clean_up_ramdisk.side_effect = RuntimeError('boom')
self._test_clean_up_failed()
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
def test_network_clean_up_failed(self):
self.driver.network.remove_inspection_network.side_effect = \
RuntimeError('boom')
self._test_clean_up_failed()
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
@mock.patch.object(client, 'get_client', autospec=True) @mock.patch.object(client, 'get_client', autospec=True)
class CheckStatusTestCase(BaseTestCase): class CheckStatusTestCase(BaseTestCase):
def setUp(self): def setUp(self):
@ -363,119 +446,34 @@ class CheckStatusTestCase(BaseTestCase):
mock_get.assert_called_once_with(self.node.uuid) mock_get.assert_called_once_with(self.node.uuid)
self.assertFalse(self.task.process_event.called) self.assertFalse(self.task.process_event.called)
def test_status_ok(self, mock_client): @mock.patch.object(inspector, 'tear_down_managed_boot', autospec=True)
def test_status_ok(self, mock_tear_down, mock_client):
mock_get = mock_client.return_value.get_introspection mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True, mock_get.return_value = mock.Mock(is_finished=True,
error=None, error=None,
spec=['is_finished', 'error']) spec=['is_finished', 'error'])
mock_tear_down.return_value = []
inspector._check_status(self.task) inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid) mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('done') self.task.process_event.assert_called_once_with('done')
self.assertFalse(self.driver.network.remove_inspection_network.called) mock_tear_down.assert_called_once_with(
self.assertFalse(self.driver.boot.clean_up_ramdisk.called) self.task, always_power_off=False)
self.assertFalse(self.driver.power.set_power_state.called)
def test_status_ok_managed(self, mock_client): @mock.patch.object(inspector, 'tear_down_managed_boot', autospec=True)
utils.set_node_nested_field(self.node, 'driver_internal_info', def test_status_error(self, mock_tear_down, mock_client):
'inspector_manage_boot', True)
self.node.save()
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,
error=None,
spec=['is_finished', 'error'])
inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('done')
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.driver.power.set_power_state.assert_called_once_with(
self.task, 'power off', timeout=None)
def test_status_ok_managed_no_power_off(self, mock_client):
CONF.set_override('power_off', False, group='inspector')
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,
error=None,
spec=['is_finished', 'error'])
inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('done')
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def test_status_ok_managed_no_power_off_on_fast_track(self, mock_client):
CONF.set_override('fast_track', True, group='deploy')
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,
error=None,
spec=['is_finished', 'error'])
inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('done')
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def test_status_error(self, mock_client):
mock_get = mock_client.return_value.get_introspection mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True, mock_get.return_value = mock.Mock(is_finished=True,
error='boom', error='boom',
spec=['is_finished', 'error']) spec=['is_finished', 'error'])
mock_tear_down.return_value = []
inspector._check_status(self.task) inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid) mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('fail') self.task.process_event.assert_called_once_with('fail')
self.assertIn('boom', self.node.last_error) self.assertIn('boom', self.node.last_error)
self.assertFalse(self.driver.network.remove_inspection_network.called) mock_tear_down.assert_called_once_with(self.task)
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
self.assertFalse(self.driver.power.set_power_state.called)
def test_status_error_managed(self, mock_client): @mock.patch.object(inspector, 'tear_down_managed_boot', autospec=True)
utils.set_node_nested_field(self.node, 'driver_internal_info', def test_status_clean_up_failed(self, mock_tear_down, mock_client):
'inspector_manage_boot', True)
self.node.save()
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,
error='boom',
spec=['is_finished', 'error'])
inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('fail')
self.assertIn('boom', self.node.last_error)
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.driver.power.set_power_state.assert_called_once_with(
self.task, 'power off', timeout=None)
def test_status_error_managed_no_power_off(self, mock_client):
CONF.set_override('power_off', False, group='inspector')
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,
error='boom',
spec=['is_finished', 'error'])
inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('fail')
self.assertIn('boom', self.node.last_error)
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def _test_status_clean_up_failed(self, mock_client):
utils.set_node_nested_field(self.node, 'driver_internal_info', utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True) 'inspector_manage_boot', True)
self.node.save() self.node.save()
@ -483,27 +481,13 @@ class CheckStatusTestCase(BaseTestCase):
mock_get.return_value = mock.Mock(is_finished=True, mock_get.return_value = mock.Mock(is_finished=True,
error=None, error=None,
spec=['is_finished', 'error']) spec=['is_finished', 'error'])
mock_tear_down.return_value = ["boom"]
inspector._check_status(self.task) inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid) mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('fail') self.task.process_event.assert_called_once_with('fail')
self.assertIn('boom', self.node.last_error) self.assertIn('boom', self.node.last_error)
mock_tear_down.assert_called_once_with(
def test_status_boot_clean_up_failed(self, mock_client): self.task, always_power_off=False)
self.driver.boot.clean_up_ramdisk.side_effect = RuntimeError('boom')
self._test_status_clean_up_failed(mock_client)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
def test_status_network_clean_up_failed(self, mock_client):
self.driver.network.remove_inspection_network.side_effect = \
RuntimeError('boom')
self._test_status_clean_up_failed(mock_client)
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
@mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True) @mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True)
def test_status_ok_store_inventory(self, mock_store_data, mock_client): def test_status_ok_store_inventory(self, mock_store_data, mock_client):

View File

@ -0,0 +1,9 @@
---
fixes:
- |
The configuration option ``[inspector]power_off`` is now actually ignored
for nodes with fast track enabled, as documented in its help.
- |
Fixes the built-in in-band inspection implementation to power off the
node after aborting inspection on user's request, unless the node is
in the fast track mode or ``[inspector]power_off`` is set to ``False``.