diff --git a/masakari/engine/drivers/taskflow/driver.py b/masakari/engine/drivers/taskflow/driver.py index b060a01e..618a9d9d 100644 --- a/masakari/engine/drivers/taskflow/driver.py +++ b/masakari/engine/drivers/taskflow/driver.py @@ -184,7 +184,19 @@ class TaskFlowDriver(driver.NotificationDriver): # taskflow sends out and redirect them to a more useful log for # masakari's debugging (or error reporting) usage. with base.DynamicLogListener(flow_engine, logger=LOG): - flow_engine.run() + try: + flow_engine.run() + except Exception as exc: + with excutils.save_and_reraise_exception(reraise=False) as e: + if isinstance( + exc, (exception.SkipInstanceRecoveryException, + exception.IgnoreInstanceRecoveryException, + exception.InstanceRecoveryFailureException)): + e.reraise = True + return + msg = _("Failed to execute instance failure flow for " + "notification '%s'.") % notification_uuid + raise exception.MasakariException(msg) def execute_process_failure(self, context, process_name, host_name, notification_uuid): @@ -218,7 +230,17 @@ class TaskFlowDriver(driver.NotificationDriver): # taskflow sends out and redirect them to a more useful log for # masakari's debugging (or error reporting) usage. with base.DynamicLogListener(flow_engine, logger=LOG): - flow_engine.run() + try: + flow_engine.run() + except Exception as exc: + with excutils.save_and_reraise_exception(reraise=False) as e: + if isinstance( + exc, exception.ProcessRecoveryFailureException): + e.reraise = True + return + msg = _("Failed to execute instance failure flow for " + "notification '%s'.") % notification_uuid + raise exception.MasakariException(msg) @contextlib.contextmanager def upgrade_backend(self, persistence_backend): diff --git a/releasenotes/notes/fix-notification-stuck-problem-fdb84bad8641384b.yaml b/releasenotes/notes/fix-notification-stuck-problem-fdb84bad8641384b.yaml new file mode 100644 index 00000000..c10d34f9 --- /dev/null +++ b/releasenotes/notes/fix-notification-stuck-problem-fdb84bad8641384b.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes an issue where failure notification stuck into running status + when timeout. `LP#1996835 + `__