Merge "Ensure workflow check is retried on DB error"

2018-03-21 08:58:59 +00:00 · 2018-03-21 08:58:59 +00:00 · 1361e8d2ad
commit 1361e8d2ad
parent 34cdee8bf2 a59af3f247
1 changed files with 19 additions and 24 deletions
--- a/mistral/engine/workflow_handler.py
+++ b/mistral/engine/workflow_handler.py
@ -101,12 +101,28 @@ def _check_and_complete(wf_ex_id):
        wf = workflows.Workflow(wf_ex=wf_ex)
        incomplete_tasks_count = 0
        try:
            check_and_fix_integrity(wf_ex)
-            incomplete_tasks_count = wf.check_and_complete()
+            num_incomplete_tasks = wf.check_and_complete()
            if not states.is_completed(wf_ex.state):
                delay = (
                    2 + int(num_incomplete_tasks * 0.1) if num_incomplete_tasks
                    else 4
                )
                # Rescheduling this check may not happen if erros are
                # raised in the business logic. If the error is DB related
                # and not considered fatal (e.g. disconnect, deadlock), the
                # retry annotation around the method will ensure that the
                # whole method is retried in a new transaction. On fatal
                # errors, the check should not be rescheduled as it could
                # result in undesired consequences.
                # In case there are some errors that should not be
                # considered fatal, those should be handled explicitly.
                _schedule_check_and_complete(wf_ex, delay)
        except exc.MistralException as e:
            msg = (
                "Failed to check and complete [wf_ex_id=%s, wf_name=%s]:"
@ -117,27 +133,6 @@ def _check_and_complete(wf_ex_id):
            force_fail_workflow(wf.wf_ex, msg)
            return
        finally:
            if states.is_completed(wf_ex.state):
                return
            # Let's assume that a task takes 0.01 sec in average to complete
            # and based on this assumption calculate a time of the next check.
            # The estimation is very rough but this delay will be decreasing
            # as tasks will be completing which will give a decent
            # approximation.
            # For example, if a workflow has 100 incomplete tasks then the
            # next check call will happen in 1 second. For 500 tasks it will
            # be 5 seconds. The larger the workflow is, the more beneficial
            # this mechanism will be.
            delay = (
                int(incomplete_tasks_count * 0.01) if incomplete_tasks_count
                else 4
            )
            _schedule_check_and_complete(wf_ex, delay)
@profiler.trace('workflow-handler-check-and-fix-integrity')
 def check_and_fix_integrity(wf_ex):