Merge "Optimize final workflow context evaluation with a batch request"
This commit is contained in:
commit
644f705345
@ -312,6 +312,10 @@ def get_completed_task_executions(**kwargs):
|
|||||||
return IMPL.get_completed_task_executions(**kwargs)
|
return IMPL.get_completed_task_executions(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_completed_task_executions_as_batches(**kwargs):
|
||||||
|
return IMPL.get_completed_task_executions_as_batches(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def get_incomplete_task_executions(**kwargs):
|
def get_incomplete_task_executions(**kwargs):
|
||||||
return IMPL.get_incomplete_task_executions(**kwargs)
|
return IMPL.get_incomplete_task_executions(**kwargs)
|
||||||
|
|
||||||
|
@ -876,6 +876,30 @@ def get_completed_task_executions(session=None, **kwargs):
|
|||||||
return query.all()
|
return query.all()
|
||||||
|
|
||||||
|
|
||||||
|
@b.session_aware()
|
||||||
|
def get_completed_task_executions_as_batches(session=None, **kwargs):
|
||||||
|
# NOTE: Using batch querying seriously allows to optimize memory
|
||||||
|
# consumption on operations when we need to iterate through
|
||||||
|
# a list of task executions and do some processing like merging
|
||||||
|
# their inbound contexts. If we don't use batches Mistral has to
|
||||||
|
# hold all the collection (that can be large) in memory.
|
||||||
|
# Using a generator that returns batches lets GC to collect a
|
||||||
|
# batch of task executions that has already been processed.
|
||||||
|
query = _get_completed_task_executions_query(kwargs)
|
||||||
|
|
||||||
|
# Batch size 20 may be arguable but still seems reasonable: it's big
|
||||||
|
# enough to keep the total number of DB hops small (say for 100 tasks
|
||||||
|
# we'll need only 5) and small enough not to drastically increase
|
||||||
|
# memory footprint if the number of tasks is big like several hundreds.
|
||||||
|
batch_size = 20
|
||||||
|
idx = 0
|
||||||
|
|
||||||
|
while idx < query.count():
|
||||||
|
yield query.slice(idx, idx + batch_size).all()
|
||||||
|
|
||||||
|
idx += batch_size
|
||||||
|
|
||||||
|
|
||||||
def _get_incomplete_task_executions_query(kwargs):
|
def _get_incomplete_task_executions_query(kwargs):
|
||||||
query = b.model_query(models.TaskExecution)
|
query = b.model_query(models.TaskExecution)
|
||||||
|
|
||||||
|
@ -945,9 +945,9 @@ class DirectWorkflowEngineTest(base.EngineTestCase):
|
|||||||
with db_api.transaction():
|
with db_api.transaction():
|
||||||
wf_ex = db_api.get_workflow_execution(wf_ex.id)
|
wf_ex = db_api.get_workflow_execution(wf_ex.id)
|
||||||
|
|
||||||
tasks = wf_ex.task_executions
|
task_execs = wf_ex.task_executions
|
||||||
|
|
||||||
self.assertEqual(task_cnt + 3, len(tasks))
|
self.assertEqual(task_cnt + 3, len(task_execs))
|
||||||
|
|
||||||
self._assert_single_item(tasks, name='task0')
|
self._assert_single_item(task_execs, name='task0')
|
||||||
self._assert_single_item(tasks, name='task{}'.format(task_cnt))
|
self._assert_single_item(task_execs, name='task{}'.format(task_cnt))
|
||||||
|
@ -168,11 +168,15 @@ class DirectWorkflowController(base.WorkflowController):
|
|||||||
def evaluate_workflow_final_context(self):
|
def evaluate_workflow_final_context(self):
|
||||||
ctx = {}
|
ctx = {}
|
||||||
|
|
||||||
for t_ex in self._find_end_task_executions():
|
for batch in self._find_end_task_executions_as_batches():
|
||||||
ctx = utils.merge_dicts(
|
if not batch:
|
||||||
ctx,
|
break
|
||||||
data_flow.evaluate_task_outbound_context(t_ex)
|
|
||||||
)
|
for t_ex in batch:
|
||||||
|
ctx = utils.merge_dicts(
|
||||||
|
ctx,
|
||||||
|
data_flow.evaluate_task_outbound_context(t_ex)
|
||||||
|
)
|
||||||
|
|
||||||
return ctx
|
return ctx
|
||||||
|
|
||||||
@ -208,7 +212,7 @@ class DirectWorkflowController(base.WorkflowController):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _find_end_task_executions(self):
|
def _find_end_task_executions_as_batches(self):
|
||||||
def is_end_task(t_ex):
|
def is_end_task(t_ex):
|
||||||
try:
|
try:
|
||||||
return not self._has_outbound_tasks(t_ex)
|
return not self._has_outbound_tasks(t_ex)
|
||||||
@ -219,13 +223,15 @@ class DirectWorkflowController(base.WorkflowController):
|
|||||||
# of given task also.
|
# of given task also.
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return list(
|
batches = lookup_utils.find_completed_task_executions_as_batches(
|
||||||
filter(
|
self.wf_ex.id
|
||||||
is_end_task,
|
|
||||||
lookup_utils.find_completed_task_executions(self.wf_ex.id)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
for batch in batches:
|
||||||
|
yield list(
|
||||||
|
filter(is_end_task, batch)
|
||||||
|
)
|
||||||
|
|
||||||
def _has_outbound_tasks(self, task_ex):
|
def _has_outbound_tasks(self, task_ex):
|
||||||
# In order to determine if there are outbound tasks we just need
|
# In order to determine if there are outbound tasks we just need
|
||||||
# to calculate next task names (based on task outbound context)
|
# to calculate next task names (based on task outbound context)
|
||||||
|
@ -151,6 +151,12 @@ def find_completed_task_executions(wf_ex_id):
|
|||||||
return db_api.get_completed_task_executions(workflow_execution_id=wf_ex_id)
|
return db_api.get_completed_task_executions(workflow_execution_id=wf_ex_id)
|
||||||
|
|
||||||
|
|
||||||
|
def find_completed_task_executions_as_batches(wf_ex_id):
|
||||||
|
return db_api.get_completed_task_executions_as_batches(
|
||||||
|
workflow_execution_id=wf_ex_id
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_task_execution_cache_size():
|
def get_task_execution_cache_size():
|
||||||
return len(_TASK_EX_CACHE)
|
return len(_TASK_EX_CACHE)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user