From 8d75784356c948bb6fcabbb6e3e0b6b59a82bcf6 Mon Sep 17 00:00:00 2001 From: Renat Akhmerov Date: Tue, 11 Feb 2020 11:18:50 +0700 Subject: [PATCH] Add "convert_output_data" config property for YAQL * Adding "convert_output_data" config property gives an opportunity to increase overal performance. If YAQL always converts an expression result, it often takes significant CPU time and overall workflow execution time increases. It is especially important when a workflow publishes lots of data into the context and uses big workflow environments. It's been tested on a very big workflow (~7k tasks) with a big workflow environment (~2.5mb) that often uses the YAQL function "<% env() %>". This function basically just returns the workflow environment. * Created all necessary unit tests. * Other style fixes. Change-Id: Ie3169ec884ec9a0e7e50327dd03cd78dcda0a39b --- mistral/config.py | 27 ++++- mistral/engine/engine_server.py | 11 ++ mistral/engine/tasks.py | 44 ++++--- mistral/expressions/yaql_expression.py | 18 ++- mistral/lang/v2/tasks.py | 14 ++- .../engine/test_disabled_yaql_conversion.py | 111 ++++++++++++++++++ .../unit/expressions/test_yaql_expression.py | 3 + 7 files changed, 197 insertions(+), 31 deletions(-) create mode 100644 mistral/tests/unit/engine/test_disabled_yaql_conversion.py diff --git a/mistral/config.py b/mistral/config.py index e8e3d8716..9b614dc7b 100644 --- a/mistral/config.py +++ b/mistral/config.py @@ -615,7 +615,7 @@ yaql_opts = [ 'convert_input_data', default=True, help=_('Enables input data conversion for YAQL expressions. If set ' - 'to True then YAQL will convert mutable data structures ' + 'to True, YAQL will convert mutable data structures ' '(lists, dicts, sets) into their immutable versions. That ' 'will allow them to work with some constructs that require ' 'hashable types even if elements are not hashable. For ' @@ -628,23 +628,38 @@ yaql_opts = [ 'performance boost if the input data for an expression is ' 'large.') ), + cfg.BoolOpt( + 'convert_output_data', + default=True, + help=_('Enables output data conversion for YAQL expressions.' + 'If set to False, it is possible that YAQL will generate ' + 'an output that will be not JSON-serializable. For example, ' + 'if an expression has ".toSet()" in the end to convert a list ' + 'into a set. It does not mean though that such functions ' + 'cannot be used, they can still be used in expressions but ' + 'user has to keep in mind of what type a result will be, ' + 'whereas if the value of ths property is True YAQL will ' + 'convert the result to a JSON-compatible type.') + ), cfg.BoolOpt( 'convert_tuples_to_lists', default=True, - help=_('When set to true, yaql converts all tuples in the expression ' - 'result to lists.') + help=_('When set to True, yaql converts all tuples in the expression ' + 'result to lists. It works only if "convert_output_data" is ' + 'set to True.') ), cfg.BoolOpt( 'convert_sets_to_lists', default=False, - help=_('When set to true, yaql converts all sets in the expression ' + help=_('When set to True, yaql converts all sets in the expression ' 'result to lists. Otherwise the produced result may contain ' - 'sets that are not JSON-serializable.') + 'sets that are not JSON-serializable. It works only if ' + '"convert_output_data" is set to True.') ), cfg.BoolOpt( 'iterable_dicts', default=False, - help=_('When set to true, dictionaries are considered to be iterable ' + help=_('When set to True, dictionaries are considered to be iterable ' 'and iteration over dictionaries produces their keys (as in ' 'Python and yaql 0.2).') ), diff --git a/mistral/engine/engine_server.py b/mistral/engine/engine_server.py index 2be3390a7..3f8ff9b1d 100644 --- a/mistral/engine/engine_server.py +++ b/mistral/engine/engine_server.py @@ -17,6 +17,7 @@ from oslo_log import log as logging from mistral import config as cfg from mistral.db.v2 import api as db_api from mistral.engine import default_engine +from mistral import exceptions as exc from mistral.rpc import base as rpc from mistral.scheduler import base as sched_base from mistral.service import base as service_base @@ -31,6 +32,14 @@ LOG = logging.getLogger(__name__) CONF = cfg.CONF +def _validate_config(): + if not CONF.yaql.convert_output_data and CONF.yaql.convert_input_data: + raise exc.MistralError( + "The config property 'yaql.convert_output_data' is set to False " + "so 'yaql.convert_input_data' must also be set to False." + ) + + class EngineServer(service_base.MistralService): """Engine server. @@ -50,6 +59,8 @@ class EngineServer(service_base.MistralService): def start(self): super(EngineServer, self).start() + _validate_config() + db_api.setup_db() self._scheduler = sched_base.get_system_scheduler() diff --git a/mistral/engine/tasks.py b/mistral/engine/tasks.py index 780c3ceac..5b2bf0795 100644 --- a/mistral/engine/tasks.py +++ b/mistral/engine/tasks.py @@ -16,6 +16,7 @@ # limitations under the License. import abc +import collections import copy import json from oslo_config import cfg @@ -780,8 +781,6 @@ class WithItemsTask(RegularTask): with_items_values = self._get_with_items_values() if self._is_new(): - self._validate_values(with_items_values) - action_count = len(six.next(iter(with_items_values.values()))) self._prepare_runtime_context(action_count) @@ -830,25 +829,36 @@ class WithItemsTask(RegularTask): :return: Evaluated 'with-items' expression values. """ - return self._evaluate_expression(self.task_spec.get_with_items()) + exp_res = self._evaluate_expression(self.task_spec.get_with_items()) - def _validate_values(self, with_items_values): - # Take only mapped values and check them. - values = list(with_items_values.values()) + # Expression result may contain iterables instead of lists in the + # dictionary values. So we need to convert them into lists and + # perform all needed checks. - if not all([isinstance(v, list) for v in values]): - raise exc.InputException( - "Wrong input format for: %s. List type is" - " expected for each value." % with_items_values - ) + result = {} - required_len = len(values[0]) + required_len = -1 - if not all(len(v) == required_len for v in values): - raise exc.InputException( - "Wrong input format for: %s. All arrays must" - " have the same length." % with_items_values - ) + for var, items in exp_res.items(): + if not isinstance(items, collections.Iterable): + raise exc.InputException( + "Wrong input format for: %s. Iterable type is" + " expected for each value." % result + ) + + items_list = list(items) + + result[var] = items_list + + if required_len < 0: + required_len = len(items_list) + elif len(items_list) != required_len: + raise exc.InputException( + "Wrong input format for: %s. All arrays must" + " have the same length." % exp_res + ) + + return result def _get_input_dicts(self, with_items_values): """Calculate input dictionaries for another portion of actions. diff --git a/mistral/expressions/yaql_expression.py b/mistral/expressions/yaql_expression.py index 2cab850a4..2a8f75302 100644 --- a/mistral/expressions/yaql_expression.py +++ b/mistral/expressions/yaql_expression.py @@ -22,6 +22,7 @@ from oslo_log import log as logging import six from yaql.language import exceptions as yaql_exc from yaql.language import factory +from yaql.language import utils as yaql_utils from mistral.config import cfg from mistral import exceptions as exc @@ -41,7 +42,7 @@ def get_yaql_engine_options(): "yaql.convertTuplesToLists": _YAQL_CONF.convert_tuples_to_lists, "yaql.convertSetsToLists": _YAQL_CONF.convert_sets_to_lists, "yaql.iterableDicts": _YAQL_CONF.iterable_dicts, - "yaql.convertOutputData": True + "yaql.convertOutputData": _YAQL_CONF.convert_output_data } @@ -73,6 +74,19 @@ LOG.info( INLINE_YAQL_REGEXP = '<%.*?%>' +def _sanitize_yaql_result(result): + # Expression output conversion can be disabled but we can still + # do some basic unboxing if we got an internal YAQL type. + # TODO(rakhmerov): FrozenDict doesn't provide any public method + # or property to access a regular dict that it wraps so ideally + # we need to add it to YAQL. Once it's there we need to make a + # fix here. + if isinstance(result, yaql_utils.FrozenDict): + return result._d + + return result if not inspect.isgenerator(result) else list(result) + + class YAQLEvaluator(Evaluator): @classmethod def validate(cls, expression): @@ -113,7 +127,7 @@ class YAQLEvaluator(Evaluator): ", data=%s]" % (expression, str(e), data_context) ) - return result if not inspect.isgenerator(result) else list(result) + return _sanitize_yaql_result(result) @classmethod def is_expression(cls, s): diff --git a/mistral/lang/v2/tasks.py b/mistral/lang/v2/tasks.py index a40d37e75..1258e5dc6 100644 --- a/mistral/lang/v2/tasks.py +++ b/mistral/lang/v2/tasks.py @@ -113,7 +113,7 @@ class TaskSpec(base.BaseSpec): self._workflow = data.get('workflow') self._tags = data.get('tags', []) self._input = data.get('input', {}) - self._with_items = self._transform_with_items() + self._with_items = self._get_with_items_as_dict() self._publish = data.get('publish', {}) self._publish_on_error = data.get('publish-on-error', {}) self._policies = self._group_spec( @@ -159,8 +159,9 @@ class TaskSpec(base.BaseSpec): "The length of a '{0}' task name must not exceed {1}" " symbols".format(task_name, MAX_LENGTH_TASK_NAME)) - def _transform_with_items(self): + def _get_with_items_as_dict(self): raw = self._data.get('with-items', []) + with_items = {} if isinstance(raw, six.string_types): @@ -175,10 +176,11 @@ class TaskSpec(base.BaseSpec): match = re.match(WITH_ITEMS_PTRN, item) if not match: - msg = ("Wrong format of 'with-items' property. Please use " - "format 'var in {[some, list] | <%% $.array %%> }: " - "%s" % self._data) - raise exc.InvalidModelException(msg) + raise exc.InvalidModelException( + "Wrong format of 'with-items' property. Please use " + "format 'var in {[some, list] | <%% $.array %%> }: " + "%s" % self._data + ) match_groups = match.groups() var_name = match_groups[0] diff --git a/mistral/tests/unit/engine/test_disabled_yaql_conversion.py b/mistral/tests/unit/engine/test_disabled_yaql_conversion.py new file mode 100644 index 000000000..56bd4e9c6 --- /dev/null +++ b/mistral/tests/unit/engine/test_disabled_yaql_conversion.py @@ -0,0 +1,111 @@ +# Copyright 2014 - Mirantis, Inc. +# Copyright 2015 - StackStorm, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mistral.db.v2 import api as db_api +from mistral.engine import engine_server +from mistral import exceptions as exc +from mistral.services import workflows as wf_service +from mistral.tests.unit.engine import base as engine_test_base + + +class DisabledYAQLConversionTest(engine_test_base.EngineTestCase): + def setUp(self): + super(DisabledYAQLConversionTest, self).setUp() + + self.override_config('auth_enable', False, 'pecan') + + def test_disabled_yaql_output_conversion(self): + """Test YAQL expressions with disabled data conversion. + + The test is needed to make sure that if we disable YAQL data + conversion (for both input and output), then Mistral will handle + YAQL internal data types properly if they sneak into the Mistral + logic as part of an expression result. Particularly, we need to + make sure that the ORM framework (SQLAlchemy) will also be able + to save data properly if it comes across such a type. + + NOTE: + - set() and toSet() functions produce "frozenset" type + internally within YAQL and it should be handled properly + everywhere in the code including SQLAlchemy. + - dict() produces "FrozenDict" internally but we unwrap the + top most dict after evaluating an expression on the Mistral + side. + """ + + # Both input and output data conversion in YAQL need to be disabled + # so that we're sure that there won't be any surprises from YAQL + # like some YAQL internal types included in expression results. + self.override_config('convert_input_data', False, 'yaql') + self.override_config('convert_output_data', False, 'yaql') + + wf_text = """--- + version: '2.0' + + wf: + tasks: + task1: + publish: + var1: <% range(0,10) %> + var2: <% set(15) %> + var3: <% [4, 5, 6].toSet() %> + var4: <% {k1 => v1, k2 => v2} %> + var5: <% dict([['a', 2], ['b', 4]]) %> + var6: <% [1, dict(k3 => v3, k4 => v4), 3] %> + """ + + wf_service.create_workflows(wf_text) + + # Start workflow. + wf_ex = self.engine.start_workflow('wf') + + self.await_workflow_success(wf_ex.id) + + with db_api.transaction(): + # Note: We need to reread execution to access related tasks. + wf_ex = db_api.get_workflow_execution(wf_ex.id) + + tasks = wf_ex.task_executions + + t_ex = self._assert_single_item(tasks, name='task1') + + self.assertDictEqual( + { + 'var1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + 'var2': [15], + 'var3': [4, 5, 6], + 'var4': {'k1': 'v1', 'k2': 'v2'}, + 'var5': {'a': 2, 'b': 4}, + 'var6': [1, {'k3': 'v3', 'k4': 'v4'}, 3], + }, + t_ex.published + ) + + def test_configuration_check(self): + # Kill all the threads started by default and try to start an + # instance of engine server again with the wrong configuration. + self.kill_threads() + + self.override_config('convert_input_data', True, 'yaql') + self.override_config('convert_output_data', False, 'yaql') + + eng_svc = engine_server.get_oslo_service(setup_profiler=False) + + self.assertRaisesWithMessage( + exc.MistralError, + "The config property 'yaql.convert_output_data' is set to False " + "so 'yaql.convert_input_data' must also be set to False.", + eng_svc.start + ) diff --git a/mistral/tests/unit/expressions/test_yaql_expression.py b/mistral/tests/unit/expressions/test_yaql_expression.py index be7d697e2..3506dcfb9 100644 --- a/mistral/tests/unit/expressions/test_yaql_expression.py +++ b/mistral/tests/unit/expressions/test_yaql_expression.py @@ -327,6 +327,9 @@ class InlineYAQLEvaluatorTest(base.BaseTest): {'a': 1}) def test_set_of_dicts(self): + # This test makes sense only if YAQL expression output conversion + # is enabled. + self.override_config('convert_output_data', True, 'yaql') self.override_config('convert_sets_to_lists', True, 'yaql') def _restore_engine(old_engine):