diff --git a/releasenotes/notes/bug-2134046-7daa7debfc302122.yaml b/releasenotes/notes/bug-2134046-7daa7debfc302122.yaml new file mode 100644 index 000000000..587e4ab09 --- /dev/null +++ b/releasenotes/notes/bug-2134046-7daa7debfc302122.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + The decision-engine could move a oneshot audit to ONGOING and start + execution even when the audit had already been CANCELLED (e.g. before + it reached ONGOING). The decision-engine now checks the audit state + immediately before execution, so cancelled audits are no longer + executed or transitioned to ONGOING. + + For more info see: https://bugs.launchpad.net/watcher/+bug/2134046 diff --git a/watcher/common/exception.py b/watcher/common/exception.py index de1928841..86c961144 100644 --- a/watcher/common/exception.py +++ b/watcher/common/exception.py @@ -277,6 +277,10 @@ class AuditReferenced(Invalid): "plans") +class AuditCancelled(WatcherException): + msg_fmt = _("Audit with UUID %(uuid)s is cancelled") + + class ActionSkipped(WatcherException): pass diff --git a/watcher/decision_engine/audit/base.py b/watcher/decision_engine/audit/base.py index abd2207fd..f48d34b44 100644 --- a/watcher/decision_engine/audit/base.py +++ b/watcher/decision_engine/audit/base.py @@ -127,6 +127,12 @@ class AuditHandler(BaseAuditHandler, metaclass=abc.ABCMeta): # despite of ongoing actionplan if not audit.force: self.check_ongoing_action_plans(request_context) + # check current state of the audit right before moving to ONGOING + # to avoid race conditions + audit = objects.Audit.get_by_uuid( + request_context, audit.uuid, eager=True) + if audit.state == objects.audit.State.CANCELLED: + raise exception.AuditCancelled(uuid=audit.uuid) # Write hostname that will execute this audit. audit.hostname = CONF.host # change state of the audit to ONGOING @@ -147,6 +153,8 @@ class AuditHandler(BaseAuditHandler, metaclass=abc.ABCMeta): LOG.warning(e) if audit.audit_type == objects.audit.AuditType.ONESHOT.value: self.update_audit_state(audit, objects.audit.State.CANCELLED) + except exception.AuditCancelled as e: + LOG.info(e) except Exception as e: LOG.exception(e) self.update_audit_state(audit, objects.audit.State.FAILED) diff --git a/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py b/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py index 20c19acf5..581508fbe 100644 --- a/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py +++ b/watcher/tests/unit/decision_engine/audit/test_audit_handlers.py @@ -164,6 +164,31 @@ class TestOneShotAuditHandler(base.DbTestCase): expected_calls, self.m_audit_notifications.send_action_notification.call_args_list) + @mock.patch.object(oneshot.OneShotAuditHandler, "update_audit_state", + autospec=True) + def test_pre_execute_audit_cancelled(self, m_update_audit_state): + audit_handler = oneshot.OneShotAuditHandler() + self.audit.state = objects.audit.State.CANCELLED + self.audit.save() + self.assertRaises(exception.AuditCancelled, + audit_handler.pre_execute, self.audit, self.context) + + m_update_audit_state.assert_not_called() + + @mock.patch.object(oneshot.OneShotAuditHandler, "do_execute", + autospec=True) + @mock.patch.object(oneshot.OneShotAuditHandler, "post_execute", + autospec=True) + def test_cancelled_audit_is_not_executed(self, m_post_execute, + m_do_execute): + self.audit.state = objects.audit.State.CANCELLED + self.audit.save() + audit_handler = oneshot.OneShotAuditHandler() + audit_handler.execute(self.audit, self.context) + m_do_execute.assert_not_called() + m_post_execute.assert_not_called() + self.assertEqual(objects.audit.State.CANCELLED, self.audit.state) + class TestAutoTriggerActionPlan(base.DbTestCase):