From 8a6b5eb8c3dc385b49bb8a7811029a2a60b57e71 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Tue, 24 Sep 2024 10:27:18 +0200 Subject: [PATCH] Fix double transition to INSPECTFAIL on aborting in-band inspection Both the driver and the conductor code try to transition the node to INSPECTFAIL, with the 2nd attempt failing. Rework the driver code to only do implementation-specific clean-up. Also safeguard the conductor code against this case. Change-Id: Ie1c64b4807ecf29fa0da54501798d363675977c8 --- ironic/conductor/inspection.py | 5 ++- ironic/drivers/modules/inspector/agent.py | 8 ++-- ironic/tests/unit/conductor/test_manager.py | 42 ++++++++++++++----- .../drivers/modules/inspector/test_agent.py | 40 ++++++++++++++++++ .../notes/inspect-abort-8add5e6e6b599357.yaml | 5 +++ 5 files changed, 84 insertions(+), 16 deletions(-) create mode 100644 releasenotes/notes/inspect-abort-8add5e6e6b599357.yaml diff --git a/ironic/conductor/inspection.py b/ironic/conductor/inspection.py index 6236397437..fb8f7f5d55 100644 --- a/ironic/conductor/inspection.py +++ b/ironic/conductor/inspection.py @@ -104,7 +104,10 @@ def abort_inspection(task): error=True, user=task.context.user_id) utils.wipe_token_and_url(task) - task.process_event('abort') + if task.node.provision_state != states.INSPECTFAIL: + task.process_event('abort') + else: + task.node.save() LOG.info('Successfully aborted inspection of node %(node)s', {'node': node.uuid}) diff --git a/ironic/drivers/modules/inspector/agent.py b/ironic/drivers/modules/inspector/agent.py index 727058b315..b253481075 100644 --- a/ironic/drivers/modules/inspector/agent.py +++ b/ironic/drivers/modules/inspector/agent.py @@ -76,10 +76,10 @@ class AgentInspect(common.Common): :param task: a task from TaskManager. """ - error = _("By request, the inspection operation has been aborted") - inspect_utils.clear_lookup_addresses(task.node) - common.inspection_error_handler(task, error, raise_exc=False, - clean_up=True) + if inspect_utils.clear_lookup_addresses(task.node): + task.node.save() + + common.clean_up(task, finish=False) def continue_inspection(self, task, inventory, plugin_data): """Continue in-band hardware inspection. diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index a258f5a9af..1067805d98 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -8500,14 +8500,14 @@ class NodeTraitsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mgr_utils.mock_record_keepalive +@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True) +@mock.patch('ironic.conductor.task_manager.acquire', autospec=True) class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mock.patch.object(inspection, 'LOG', autospec=True) - @mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True) - @mock.patch('ironic.conductor.task_manager.acquire', autospec=True) - def test_do_inspect_abort_interface_not_support(self, mock_acquire, - mock_abort, mock_log): + def test_do_inspect_abort_interface_not_support(self, mock_log, + mock_acquire, mock_abort): node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.INSPECTWAIT) @@ -8525,10 +8525,9 @@ class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn, self.assertTrue(mock_log.error.called) @mock.patch.object(inspection, 'LOG', autospec=True) - @mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True) - @mock.patch('ironic.conductor.task_manager.acquire', autospec=True) - def test_do_inspect_abort_interface_return_failed(self, mock_acquire, - mock_abort, mock_log): + def test_do_inspect_abort_interface_return_failed(self, mock_log, + mock_acquire, + mock_abort): mock_abort.side_effect = exception.IronicException('Oops') self._start_service() node = obj_utils.create_test_node(self.context, @@ -8544,8 +8543,6 @@ class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn, self.assertTrue(mock_log.exception.called) self.assertIn('Failed to abort inspection', node.last_error) - @mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True) - @mock.patch('ironic.conductor.task_manager.acquire', autospec=True) def test_do_inspect_abort_succeeded(self, mock_acquire, mock_abort): self._start_service() node = obj_utils.create_test_node( @@ -8559,7 +8556,30 @@ class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn, self.service.do_provisioning_action(self.context, task.node.uuid, "abort") node.refresh() - self.assertEqual('inspect failed', node.provision_state) + self.assertEqual(states.INSPECTFAIL, node.provision_state) + self.assertIn('Inspection was aborted', node.last_error) + self.assertNotIn('agent_url', node.driver_internal_info) + self.assertNotIn('agent_secret_token', node.driver_internal_info) + + def test_do_inspect_abort_state_set_by_driver(self, mock_acquire, + mock_abort): + def abort(driver, task): + task.process_event('abort') + + mock_abort.side_effect = abort + self._start_service() + node = obj_utils.create_test_node( + self.context, + driver='fake-hardware', + provision_state=states.INSPECTWAIT, + driver_internal_info={'agent_url': 'url', + 'agent_secret_token': 'token'}) + task = task_manager.TaskManager(self.context, node.uuid) + mock_acquire.side_effect = self._get_acquire_side_effect(task) + self.service.do_provisioning_action(self.context, task.node.uuid, + "abort") + node.refresh() + self.assertEqual(states.INSPECTFAIL, node.provision_state) self.assertIn('Inspection was aborted', node.last_error) self.assertNotIn('agent_url', node.driver_internal_info) self.assertNotIn('agent_secret_token', node.driver_internal_info) diff --git a/ironic/tests/unit/drivers/modules/inspector/test_agent.py b/ironic/tests/unit/drivers/modules/inspector/test_agent.py index 452d2259ed..8aab6004c4 100644 --- a/ironic/tests/unit/drivers/modules/inspector/test_agent.py +++ b/ironic/tests/unit/drivers/modules/inspector/test_agent.py @@ -136,3 +136,43 @@ class ContinueInspectionTestCase(db_base.DbTestCase): self.assertEqual(states.INSPECTING, task.node.provision_state) self.assertIsNone(result) + + +@mock.patch.object(common, 'tear_down_managed_boot', autospec=True) +class AbortInspectionTestCase(db_base.DbTestCase): + def setUp(self): + super().setUp() + CONF.set_override('enabled_inspect_interfaces', + ['agent', 'no-inspect']) + self.node = obj_utils.create_test_node( + self.context, + driver_internal_info={ + 'lookup_bmc_addresses': [42], + }, + inspect_interface='agent', + provision_state=states.INSPECTWAIT) + self.iface = inspector.AgentInspect() + + def test_success(self, mock_tear_down): + mock_tear_down.return_value = None + with task_manager.acquire(self.context, self.node.id) as task: + self.iface.abort(task) + mock_tear_down.assert_called_once_with(task) + + self.node.refresh() + # Keep the state - it will be changed on the conductor level + self.assertEqual(states.INSPECTWAIT, self.node.provision_state) + self.assertNotIn('lookup_bmc_addresses', + self.node.driver_internal_info) + + def test_cleanup_failed(self, mock_tear_down): + mock_tear_down.return_value = ['boom'] + with task_manager.acquire(self.context, self.node.id) as task: + self.iface.abort(task) + mock_tear_down.assert_called_once_with(task) + + self.node.refresh() + self.assertEqual(states.INSPECTFAIL, self.node.provision_state) + self.assertIn('boom', self.node.last_error) + self.assertNotIn('lookup_bmc_addresses', + self.node.driver_internal_info) diff --git a/releasenotes/notes/inspect-abort-8add5e6e6b599357.yaml b/releasenotes/notes/inspect-abort-8add5e6e6b599357.yaml new file mode 100644 index 0000000000..2bdea92e61 --- /dev/null +++ b/releasenotes/notes/inspect-abort-8add5e6e6b599357.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes aborting in-band inspection. Previously, it would fail with + ``Can not transition from state 'inspect failed' on event 'abort'``.