From 8d75784356c948bb6fcabbb6e3e0b6b59a82bcf6 Mon Sep 17 00:00:00 2001
From: Renat Akhmerov <renat.akhmerov@gmail.com>
Date: Tue, 11 Feb 2020 11:18:50 +0700
Subject: [PATCH] Add "convert_output_data" config property for YAQL

* Adding "convert_output_data" config property gives an opportunity
  to increase overal performance. If YAQL always converts an expression
  result, it often takes significant CPU time and overall workflow
  execution time increases. It is especially important when a workflow
  publishes lots of data into the context and uses big workflow
  environments. It's been tested on a very big workflow (~7k tasks)
  with a big workflow environment (~2.5mb) that often uses the YAQL
  function "<% env() %>". This function basically just returns the
  workflow environment.
* Created all necessary unit tests.
* Other style fixes.

Change-Id: Ie3169ec884ec9a0e7e50327dd03cd78dcda0a39b
---
 mistral/config.py                             |  27 ++++-
 mistral/engine/engine_server.py               |  11 ++
 mistral/engine/tasks.py                       |  44 ++++---
 mistral/expressions/yaql_expression.py        |  18 ++-
 mistral/lang/v2/tasks.py                      |  14 ++-
 .../engine/test_disabled_yaql_conversion.py   | 111 ++++++++++++++++++
 .../unit/expressions/test_yaql_expression.py  |   3 +
 7 files changed, 197 insertions(+), 31 deletions(-)
 create mode 100644 mistral/tests/unit/engine/test_disabled_yaql_conversion.py

diff --git a/mistral/config.py b/mistral/config.py
index e8e3d8716..9b614dc7b 100644
--- a/mistral/config.py
+++ b/mistral/config.py
@@ -615,7 +615,7 @@ yaql_opts = [
         'convert_input_data',
         default=True,
         help=_('Enables input data conversion for YAQL expressions. If set '
-               'to True then YAQL will convert mutable data structures '
+               'to True, YAQL will convert mutable data structures '
                '(lists, dicts, sets) into their immutable versions. That '
                'will allow them to work with some constructs that require '
                'hashable types even if elements are not hashable. For '
@@ -628,23 +628,38 @@ yaql_opts = [
                'performance boost if the input data for an expression is '
                'large.')
     ),
+    cfg.BoolOpt(
+        'convert_output_data',
+        default=True,
+        help=_('Enables output data conversion for YAQL expressions.'
+               'If set to False, it is possible that YAQL will generate '
+               'an output that will be not JSON-serializable. For example, '
+               'if an expression has ".toSet()" in the end to convert a list '
+               'into a set. It does not mean though that such functions '
+               'cannot be used, they can still be used in expressions but '
+               'user has to keep in mind of what type a result will be, '
+               'whereas if the value of ths property is True YAQL will '
+               'convert the result to a JSON-compatible type.')
+    ),
     cfg.BoolOpt(
         'convert_tuples_to_lists',
         default=True,
-        help=_('When set to true, yaql converts all tuples in the expression '
-               'result to lists.')
+        help=_('When set to True, yaql converts all tuples in the expression '
+               'result to lists. It works only if "convert_output_data" is '
+               'set to True.')
     ),
     cfg.BoolOpt(
         'convert_sets_to_lists',
         default=False,
-        help=_('When set to true, yaql converts all sets in the expression '
+        help=_('When set to True, yaql converts all sets in the expression '
                'result to lists. Otherwise the produced result may contain '
-               'sets that are not JSON-serializable.')
+               'sets that are not JSON-serializable. It works only if '
+               '"convert_output_data" is set to True.')
     ),
     cfg.BoolOpt(
         'iterable_dicts',
         default=False,
-        help=_('When set to true, dictionaries are considered to be iterable '
+        help=_('When set to True, dictionaries are considered to be iterable '
                'and iteration over dictionaries produces their keys (as in '
                'Python and yaql 0.2).')
     ),
diff --git a/mistral/engine/engine_server.py b/mistral/engine/engine_server.py
index 2be3390a7..3f8ff9b1d 100644
--- a/mistral/engine/engine_server.py
+++ b/mistral/engine/engine_server.py
@@ -17,6 +17,7 @@ from oslo_log import log as logging
 from mistral import config as cfg
 from mistral.db.v2 import api as db_api
 from mistral.engine import default_engine
+from mistral import exceptions as exc
 from mistral.rpc import base as rpc
 from mistral.scheduler import base as sched_base
 from mistral.service import base as service_base
@@ -31,6 +32,14 @@ LOG = logging.getLogger(__name__)
 CONF = cfg.CONF
 
 
+def _validate_config():
+    if not CONF.yaql.convert_output_data and CONF.yaql.convert_input_data:
+        raise exc.MistralError(
+            "The config property 'yaql.convert_output_data' is set to False "
+            "so 'yaql.convert_input_data' must also be set to False."
+        )
+
+
 class EngineServer(service_base.MistralService):
     """Engine server.
 
@@ -50,6 +59,8 @@ class EngineServer(service_base.MistralService):
     def start(self):
         super(EngineServer, self).start()
 
+        _validate_config()
+
         db_api.setup_db()
 
         self._scheduler = sched_base.get_system_scheduler()
diff --git a/mistral/engine/tasks.py b/mistral/engine/tasks.py
index 780c3ceac..5b2bf0795 100644
--- a/mistral/engine/tasks.py
+++ b/mistral/engine/tasks.py
@@ -16,6 +16,7 @@
 #    limitations under the License.
 
 import abc
+import collections
 import copy
 import json
 from oslo_config import cfg
@@ -780,8 +781,6 @@ class WithItemsTask(RegularTask):
         with_items_values = self._get_with_items_values()
 
         if self._is_new():
-            self._validate_values(with_items_values)
-
             action_count = len(six.next(iter(with_items_values.values())))
 
             self._prepare_runtime_context(action_count)
@@ -830,25 +829,36 @@ class WithItemsTask(RegularTask):
         :return: Evaluated 'with-items' expression values.
         """
 
-        return self._evaluate_expression(self.task_spec.get_with_items())
+        exp_res = self._evaluate_expression(self.task_spec.get_with_items())
 
-    def _validate_values(self, with_items_values):
-        # Take only mapped values and check them.
-        values = list(with_items_values.values())
+        # Expression result may contain iterables instead of lists in the
+        # dictionary values. So we need to convert them into lists and
+        # perform all needed checks.
 
-        if not all([isinstance(v, list) for v in values]):
-            raise exc.InputException(
-                "Wrong input format for: %s. List type is"
-                " expected for each value." % with_items_values
-            )
+        result = {}
 
-        required_len = len(values[0])
+        required_len = -1
 
-        if not all(len(v) == required_len for v in values):
-            raise exc.InputException(
-                "Wrong input format for: %s. All arrays must"
-                " have the same length." % with_items_values
-            )
+        for var, items in exp_res.items():
+            if not isinstance(items, collections.Iterable):
+                raise exc.InputException(
+                    "Wrong input format for: %s. Iterable type is"
+                    " expected for each value." % result
+                )
+
+            items_list = list(items)
+
+            result[var] = items_list
+
+            if required_len < 0:
+                required_len = len(items_list)
+            elif len(items_list) != required_len:
+                raise exc.InputException(
+                    "Wrong input format for: %s. All arrays must"
+                    " have the same length." % exp_res
+                )
+
+        return result
 
     def _get_input_dicts(self, with_items_values):
         """Calculate input dictionaries for another portion of actions.
diff --git a/mistral/expressions/yaql_expression.py b/mistral/expressions/yaql_expression.py
index 2cab850a4..2a8f75302 100644
--- a/mistral/expressions/yaql_expression.py
+++ b/mistral/expressions/yaql_expression.py
@@ -22,6 +22,7 @@ from oslo_log import log as logging
 import six
 from yaql.language import exceptions as yaql_exc
 from yaql.language import factory
+from yaql.language import utils as yaql_utils
 
 from mistral.config import cfg
 from mistral import exceptions as exc
@@ -41,7 +42,7 @@ def get_yaql_engine_options():
         "yaql.convertTuplesToLists": _YAQL_CONF.convert_tuples_to_lists,
         "yaql.convertSetsToLists": _YAQL_CONF.convert_sets_to_lists,
         "yaql.iterableDicts": _YAQL_CONF.iterable_dicts,
-        "yaql.convertOutputData": True
+        "yaql.convertOutputData": _YAQL_CONF.convert_output_data
     }
 
 
@@ -73,6 +74,19 @@ LOG.info(
 INLINE_YAQL_REGEXP = '<%.*?%>'
 
 
+def _sanitize_yaql_result(result):
+    # Expression output conversion can be disabled but we can still
+    # do some basic unboxing if we got an internal YAQL type.
+    # TODO(rakhmerov): FrozenDict doesn't provide any public method
+    # or property to access a regular dict that it wraps so ideally
+    # we need to add it to YAQL. Once it's there we need to make a
+    # fix here.
+    if isinstance(result, yaql_utils.FrozenDict):
+        return result._d
+
+    return result if not inspect.isgenerator(result) else list(result)
+
+
 class YAQLEvaluator(Evaluator):
     @classmethod
     def validate(cls, expression):
@@ -113,7 +127,7 @@ class YAQLEvaluator(Evaluator):
                 ", data=%s]" % (expression, str(e), data_context)
             )
 
-        return result if not inspect.isgenerator(result) else list(result)
+        return _sanitize_yaql_result(result)
 
     @classmethod
     def is_expression(cls, s):
diff --git a/mistral/lang/v2/tasks.py b/mistral/lang/v2/tasks.py
index a40d37e75..1258e5dc6 100644
--- a/mistral/lang/v2/tasks.py
+++ b/mistral/lang/v2/tasks.py
@@ -113,7 +113,7 @@ class TaskSpec(base.BaseSpec):
         self._workflow = data.get('workflow')
         self._tags = data.get('tags', [])
         self._input = data.get('input', {})
-        self._with_items = self._transform_with_items()
+        self._with_items = self._get_with_items_as_dict()
         self._publish = data.get('publish', {})
         self._publish_on_error = data.get('publish-on-error', {})
         self._policies = self._group_spec(
@@ -159,8 +159,9 @@ class TaskSpec(base.BaseSpec):
                 "The length of a '{0}' task name must not exceed {1}"
                 " symbols".format(task_name, MAX_LENGTH_TASK_NAME))
 
-    def _transform_with_items(self):
+    def _get_with_items_as_dict(self):
         raw = self._data.get('with-items', [])
+
         with_items = {}
 
         if isinstance(raw, six.string_types):
@@ -175,10 +176,11 @@ class TaskSpec(base.BaseSpec):
             match = re.match(WITH_ITEMS_PTRN, item)
 
             if not match:
-                msg = ("Wrong format of 'with-items' property. Please use "
-                       "format 'var in {[some, list] | <%% $.array %%> }: "
-                       "%s" % self._data)
-                raise exc.InvalidModelException(msg)
+                raise exc.InvalidModelException(
+                    "Wrong format of 'with-items' property. Please use "
+                    "format 'var in {[some, list] | <%% $.array %%> }: "
+                    "%s" % self._data
+                )
 
             match_groups = match.groups()
             var_name = match_groups[0]
diff --git a/mistral/tests/unit/engine/test_disabled_yaql_conversion.py b/mistral/tests/unit/engine/test_disabled_yaql_conversion.py
new file mode 100644
index 000000000..56bd4e9c6
--- /dev/null
+++ b/mistral/tests/unit/engine/test_disabled_yaql_conversion.py
@@ -0,0 +1,111 @@
+# Copyright 2014 - Mirantis, Inc.
+# Copyright 2015 - StackStorm, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+from mistral.db.v2 import api as db_api
+from mistral.engine import engine_server
+from mistral import exceptions as exc
+from mistral.services import workflows as wf_service
+from mistral.tests.unit.engine import base as engine_test_base
+
+
+class DisabledYAQLConversionTest(engine_test_base.EngineTestCase):
+    def setUp(self):
+        super(DisabledYAQLConversionTest, self).setUp()
+
+        self.override_config('auth_enable', False, 'pecan')
+
+    def test_disabled_yaql_output_conversion(self):
+        """Test YAQL expressions with disabled data conversion.
+
+        The test is needed to make sure that if we disable YAQL data
+        conversion (for both input and output), then Mistral will handle
+        YAQL internal data types properly if they sneak into the Mistral
+        logic as part of an expression result. Particularly, we need to
+        make sure that the ORM framework (SQLAlchemy) will also be able
+        to save data properly if it comes across such a type.
+
+        NOTE:
+            - set() and toSet() functions produce "frozenset" type
+              internally within YAQL and it should be handled properly
+              everywhere in the code including SQLAlchemy.
+            - dict() produces "FrozenDict" internally but we unwrap the
+              top most dict after evaluating an expression on the Mistral
+              side.
+        """
+
+        # Both input and output data conversion in YAQL need to be disabled
+        # so that we're sure that there won't be any surprises from YAQL
+        # like some YAQL internal types included in expression results.
+        self.override_config('convert_input_data', False, 'yaql')
+        self.override_config('convert_output_data', False, 'yaql')
+
+        wf_text = """---
+        version: '2.0'
+
+        wf:
+          tasks:
+            task1:
+              publish:
+                var1: <% range(0,10) %>
+                var2: <% set(15) %>
+                var3: <% [4, 5, 6].toSet() %>
+                var4: <% {k1 => v1, k2 => v2} %>
+                var5: <% dict([['a', 2], ['b', 4]]) %>
+                var6: <% [1, dict(k3 => v3, k4 => v4), 3] %>
+        """
+
+        wf_service.create_workflows(wf_text)
+
+        # Start workflow.
+        wf_ex = self.engine.start_workflow('wf')
+
+        self.await_workflow_success(wf_ex.id)
+
+        with db_api.transaction():
+            # Note: We need to reread execution to access related tasks.
+            wf_ex = db_api.get_workflow_execution(wf_ex.id)
+
+            tasks = wf_ex.task_executions
+
+            t_ex = self._assert_single_item(tasks, name='task1')
+
+            self.assertDictEqual(
+                {
+                    'var1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+                    'var2': [15],
+                    'var3': [4, 5, 6],
+                    'var4': {'k1': 'v1', 'k2': 'v2'},
+                    'var5': {'a': 2, 'b': 4},
+                    'var6': [1, {'k3': 'v3', 'k4': 'v4'}, 3],
+                },
+                t_ex.published
+            )
+
+    def test_configuration_check(self):
+        # Kill all the threads started by default and try to start an
+        # instance of engine server again with the wrong configuration.
+        self.kill_threads()
+
+        self.override_config('convert_input_data', True, 'yaql')
+        self.override_config('convert_output_data', False, 'yaql')
+
+        eng_svc = engine_server.get_oslo_service(setup_profiler=False)
+
+        self.assertRaisesWithMessage(
+            exc.MistralError,
+            "The config property 'yaql.convert_output_data' is set to False "
+            "so 'yaql.convert_input_data' must also be set to False.",
+            eng_svc.start
+        )
diff --git a/mistral/tests/unit/expressions/test_yaql_expression.py b/mistral/tests/unit/expressions/test_yaql_expression.py
index be7d697e2..3506dcfb9 100644
--- a/mistral/tests/unit/expressions/test_yaql_expression.py
+++ b/mistral/tests/unit/expressions/test_yaql_expression.py
@@ -327,6 +327,9 @@ class InlineYAQLEvaluatorTest(base.BaseTest):
                           {'a': 1})
 
     def test_set_of_dicts(self):
+        # This test makes sense only if YAQL expression output conversion
+        # is enabled.
+        self.override_config('convert_output_data', True, 'yaql')
         self.override_config('convert_sets_to_lists', True, 'yaql')
 
         def _restore_engine(old_engine):