From ccbe3ec3b812face313071fa2889ac927d955e47 Mon Sep 17 00:00:00 2001 From: Alfredo Moralejo Date: Tue, 24 Feb 2026 12:17:32 +0100 Subject: [PATCH] Check Audit state in the decision-engine before starting the execution In some cases, a oneshot audit may be CANCELLED before being executed and before it's moved to ONGOING. In those cases, the decision-engine is moving the audit to ONGOING as it is not checking the state. This patch is implementing Audit state check right before executing it to make sure it has not been cancelled before. Closes-Bug: 2134046 Change-Id: I14a59e897dc5c522fd7f7b21eee99e3f4b70a392 Signed-off-by: Alfredo Moralejo --- .../notes/bug-2134046-7daa7debfc302122.yaml | 10 ++++++++ watcher/common/exception.py | 4 +++ watcher/decision_engine/audit/base.py | 8 ++++++ .../audit/test_audit_handlers.py | 25 +++++++++++++++++++ 4 files changed, 47 insertions(+) create mode 100644 releasenotes/notes/bug-2134046-7daa7debfc302122.yaml diff --git a/releasenotes/notes/bug-2134046-7daa7debfc302122.yaml b/releasenotes/notes/bug-2134046-7daa7debfc302122.yaml new file mode 100644 index 000000000..587e4ab09 --- /dev/null +++ b/releasenotes/notes/bug-2134046-7daa7debfc302122.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + The decision-engine could move a oneshot audit to ONGOING and start + execution even when the audit had already been CANCELLED (e.g. before + it reached ONGOING). The decision-engine now checks the audit state + immediately before execution, so cancelled audits are no longer + executed or transitioned to ONGOING. + + For more info see: https://bugs.launchpad.net/watcher/+bug/2134046 diff --git a/watcher/common/exception.py b/watcher/common/exception.py index de1928841..86c961144 100644 --- a/watcher/common/exception.py +++ b/watcher/common/exception.py @@ -277,6 +277,10 @@ class AuditReferenced(Invalid): "plans") +class AuditCancelled(WatcherException): + msg_fmt = _("Audit with UUID %(uuid)s is cancelled") + + class ActionSkipped(WatcherException): pass diff --git a/watcher/decision_engine/audit/base.py b/watcher/decision_engine/audit/base.py index abd2207fd..f48d34b44 100644 --- a/watcher/decision_engine/audit/base.py +++ b/watcher/decision_engine/audit/base.py @@ -127,6 +127,12 @@ class AuditHandler(BaseAuditHandler, metaclass=abc.ABCMeta): # despite of ongoing actionplan if not audit.force: self.check_ongoing_action_plans(request_context) + # check current state of the audit right before moving to ONGOING + # to avoid race conditions + audit = objects.Audit.get_by_uuid( + request_context, audit.uuid, eager=True) + if audit.state == objects.audit.State.CANCELLED: + raise exception.AuditCancelled(uuid=audit.uuid) # Write hostname that will execute this audit. audit.hostname = CONF.host # change state of the audit to ONGOING @@ -147,6 +153,8 @@ class AuditHandler(BaseAuditHandler, metaclass=abc.ABCMeta): LOG.warning(e) if audit.audit_type == objects.audit.AuditType.ONESHOT.value: self.update_audit_state(audit, objects.audit.State.CANCELLED) + except exception.AuditCancelled as e: + LOG.info(e) except Exception as e: LOG.exception(e) self.update_audit_state(audit, objects.audit.State.FAILED) diff --git a/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py b/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py index 20c19acf5..581508fbe 100644 --- a/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py +++ b/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py @@ -164,6 +164,31 @@ class TestOneShotAuditHandler(base.DbTestCase): expected_calls, self.m_audit_notifications.send_action_notification.call_args_list) + @mock.patch.object(oneshot.OneShotAuditHandler, "update_audit_state", + autospec=True) + def test_pre_execute_audit_cancelled(self, m_update_audit_state): + audit_handler = oneshot.OneShotAuditHandler() + self.audit.state = objects.audit.State.CANCELLED + self.audit.save() + self.assertRaises(exception.AuditCancelled, + audit_handler.pre_execute, self.audit, self.context) + + m_update_audit_state.assert_not_called() + + @mock.patch.object(oneshot.OneShotAuditHandler, "do_execute", + autospec=True) + @mock.patch.object(oneshot.OneShotAuditHandler, "post_execute", + autospec=True) + def test_cancelled_audit_is_not_executed(self, m_post_execute, + m_do_execute): + self.audit.state = objects.audit.State.CANCELLED + self.audit.save() + audit_handler = oneshot.OneShotAuditHandler() + audit_handler.execute(self.audit, self.context) + m_do_execute.assert_not_called() + m_post_execute.assert_not_called() + self.assertEqual(objects.audit.State.CANCELLED, self.audit.state) + class TestAutoTriggerActionPlan(base.DbTestCase):