Optimizing lang schema validation

* Before this patch some language specification schemas were
  validated twice (or even more) because some parent specs had
  references to specific schemas of child specs in their schemas.
  And due to our recursive parsing algorithm the same schemas were
  checked many times. It reduces performance and complicates
  the entire lang specification framework because there's too many
  relationships between different specs. The better approach is to
  reduce a number of relationships. One way is to not check schemas
  child specs when checking parent spec schema. This patch removes
  such references replacing them just common schemas like
  "non-empty" or "any" which don't lead to validating sub-schemas
  when validating parent schemas.
* Minor style changes

Change-Id: I6b695c1870bf8b70112332d4052115543382cdc7
This commit is contained in:
Renat Akhmerov 2017-04-12 17:02:14 +07:00
parent ca1009c327
commit 5c185c3481
10 changed files with 123 additions and 117 deletions

View File

@ -66,6 +66,15 @@ def instantiate_spec(spec_cls, data):
return spec
# In order to do polymorphic search we need to make sure that
# a spec is backed by a dictionary. Otherwise we can't extract
# a polymorphic key.
if not isinstance(data, dict):
raise exc.InvalidModelException(
"A specification with polymorphic key must be backed by"
" a dictionary [spec_cls=%s, data=%s]" % (spec_cls, data)
)
key = spec_cls._polymorphic_key
if not isinstance(key, tuple):
@ -210,7 +219,10 @@ class BaseSpec(object):
def _spec_property(self, prop_name, spec_cls):
prop_val = self._data.get(prop_name)
return instantiate_spec(spec_cls, prop_val) if prop_val else None
return (
instantiate_spec(spec_cls, prop_val) if prop_val is not None
else None
)
def _group_spec(self, spec_cls, *prop_names):
if not prop_names:
@ -342,8 +354,13 @@ class BaseSpecList(object):
for k, v in data.items():
if k != 'version':
v['name'] = k
v['version'] = self._version
# At this point, we don't know if item schema is valid,
# it may not be even a dictionary. So we should check the
# type first before manipulating with it.
if isinstance(v, dict):
v['name'] = k
v['version'] = self._version
self.items[k] = instantiate_spec(self.item_class, v)
def item_keys(self):

View File

@ -31,7 +31,7 @@ class OnClauseSpec(base.BaseSpec):
_advanced_schema = {
"type": "object",
"properties": {
"publish": publish.PublishSpec.get_schema(includes=None),
"publish": types.NONEMPTY_DICT,
"next": _simple_schema,
},
"additionalProperties": False

View File

@ -18,25 +18,17 @@ from mistral.lang.v2 import base
from mistral.lang.v2 import retry_policy
RETRY_SCHEMA = retry_policy.RetrySpec.get_schema(includes=None)
WAIT_BEFORE_SCHEMA = types.EXPRESSION_OR_POSITIVE_INTEGER
WAIT_AFTER_SCHEMA = types.EXPRESSION_OR_POSITIVE_INTEGER
TIMEOUT_SCHEMA = types.EXPRESSION_OR_POSITIVE_INTEGER
PAUSE_BEFORE_SCHEMA = types.EXPRESSION_OR_BOOLEAN
CONCURRENCY_SCHEMA = types.EXPRESSION_OR_POSITIVE_INTEGER
class PoliciesSpec(base.BaseSpec):
# See http://json-schema.org
_schema = {
"type": "object",
"properties": {
"retry": RETRY_SCHEMA,
"wait-before": WAIT_BEFORE_SCHEMA,
"wait-after": WAIT_AFTER_SCHEMA,
"timeout": TIMEOUT_SCHEMA,
"pause-before": PAUSE_BEFORE_SCHEMA,
"concurrency": CONCURRENCY_SCHEMA,
"retry": types.ANY,
"wait-before": types.EXPRESSION_OR_POSITIVE_INTEGER,
"wait-after": types.EXPRESSION_OR_POSITIVE_INTEGER,
"timeout": types.EXPRESSION_OR_POSITIVE_INTEGER,
"pause-before": types.EXPRESSION_OR_BOOLEAN,
"concurrency": types.EXPRESSION_OR_POSITIVE_INTEGER,
},
"additionalProperties": False
}

View File

@ -28,28 +28,18 @@ from mistral.lang.v2 import policies
class TaskDefaultsSpec(base.BaseSpec):
# See http://json-schema.org
_task_policies_schema = policies.PoliciesSpec.get_schema(
includes=None)
_on_clause_type = {
"oneOf": [
types.NONEMPTY_STRING,
types.UNIQUE_STRING_OR_EXPRESSION_CONDITION_LIST
]
}
_schema = {
"type": "object",
"properties": {
"retry": policies.RETRY_SCHEMA,
"wait-before": policies.WAIT_BEFORE_SCHEMA,
"wait-after": policies.WAIT_AFTER_SCHEMA,
"timeout": policies.TIMEOUT_SCHEMA,
"pause-before": policies.PAUSE_BEFORE_SCHEMA,
"concurrency": policies.CONCURRENCY_SCHEMA,
"on-complete": _on_clause_type,
"on-success": _on_clause_type,
"on-error": _on_clause_type,
"retry": types.ANY,
"wait-before": types.ANY,
"wait-after": types.ANY,
"timeout": types.ANY,
"pause-before": types.ANY,
"concurrency": types.ANY,
"on-complete": types.ANY,
"on-success": types.ANY,
"on-error": types.ANY,
"requires": {
"oneOf": [types.NONEMPTY_STRING, types.UNIQUE_STRING_LIST]
}

View File

@ -59,12 +59,12 @@ class TaskSpec(base.BaseSpec):
},
"publish": types.NONEMPTY_DICT,
"publish-on-error": types.NONEMPTY_DICT,
"retry": policies.RETRY_SCHEMA,
"wait-before": policies.WAIT_BEFORE_SCHEMA,
"wait-after": policies.WAIT_AFTER_SCHEMA,
"timeout": policies.TIMEOUT_SCHEMA,
"pause-before": policies.PAUSE_BEFORE_SCHEMA,
"concurrency": policies.CONCURRENCY_SCHEMA,
"retry": types.ANY,
"wait-before": types.ANY,
"wait-after": types.ANY,
"timeout": types.ANY,
"pause-before": types.ANY,
"concurrency": types.ANY,
"target": types.NONEMPTY_STRING,
"keep-result": types.EXPRESSION_OR_BOOLEAN,
"safe-rerun": types.EXPRESSION_OR_BOOLEAN
@ -239,8 +239,6 @@ class TaskSpec(base.BaseSpec):
class DirectWorkflowTaskSpec(TaskSpec):
_polymorphic_value = 'direct'
_on_clause_schema = on_clause.OnClauseSpec._schema
_direct_workflow_schema = {
"type": "object",
"properties": {
@ -251,9 +249,9 @@ class DirectWorkflowTaskSpec(TaskSpec):
types.POSITIVE_INTEGER
]
},
"on-complete": _on_clause_schema,
"on-success": _on_clause_schema,
"on-error": _on_clause_schema
"on-complete": types.ANY,
"on-success": types.ANY,
"on-error": types.ANY
}
}
@ -334,8 +332,10 @@ class ReverseWorkflowTaskSpec(TaskSpec):
}
}
_schema = utils.merge_dicts(copy.deepcopy(TaskSpec._schema),
_reverse_workflow_schema)
_schema = utils.merge_dicts(
copy.deepcopy(TaskSpec._schema),
_reverse_workflow_schema
)
def __init__(self, data):
super(ReverseWorkflowTaskSpec, self).__init__(data)

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from mistral.lang import types
from mistral.lang.v2 import actions as act
from mistral.lang.v2 import base
from mistral.lang.v2 import workflows as wf
@ -24,10 +25,6 @@ NON_VERSION_WORD_REGEX = "^(?!version$)[\w-]+$"
class WorkbookSpec(base.BaseSpec):
# See http://json-schema.org
_action_schema = act.ActionSpec.get_schema(includes=None)
_workflow_schema = wf.WorkflowSpec.get_schema(includes=None)
_schema = {
"type": "object",
"properties": {
@ -37,7 +34,7 @@ class WorkbookSpec(base.BaseSpec):
"minProperties": 1,
"patternProperties": {
"^version$": {"enum": ["2.0", 2.0]},
NON_VERSION_WORD_REGEX: _action_schema
NON_VERSION_WORD_REGEX: types.ANY
},
"additionalProperties": False
},
@ -46,7 +43,7 @@ class WorkbookSpec(base.BaseSpec):
"minProperties": 1,
"patternProperties": {
"^version$": {"enum": ["2.0", 2.0]},
NON_VERSION_WORD_REGEX: _workflow_schema
NON_VERSION_WORD_REGEX: types.ANY
},
"additionalProperties": False
}
@ -57,7 +54,7 @@ class WorkbookSpec(base.BaseSpec):
def __init__(self, data):
super(WorkbookSpec, self).__init__(data)
self._inject_version(['actions', 'workflows', 'triggers'])
self._inject_version(['actions', 'workflows'])
self._name = data['name']
self._description = data.get('description')

View File

@ -30,14 +30,11 @@ class WorkflowSpec(base.BaseSpec):
_polymorphic_key = ('type', 'direct')
_task_defaults_schema = task_defaults.TaskDefaultsSpec.get_schema(
includes=None)
_meta_schema = {
"type": "object",
"properties": {
"type": types.WORKFLOW_TYPE,
"task-defaults": _task_defaults_schema,
"task-defaults": types.NONEMPTY_DICT,
"input": types.UNIQUE_STRING_OR_ONE_KEY_DICT_LIST,
"output": types.NONEMPTY_DICT,
"output-on-error": types.NONEMPTY_DICT,
@ -149,8 +146,7 @@ class DirectWorkflowSpec(WorkflowSpec):
"type": "object",
"minProperties": 1,
"patternProperties": {
"^\w+$":
tasks.DirectWorkflowTaskSpec.get_schema(includes=None)
"^\w+$": types.NONEMPTY_DICT
}
},
}
@ -362,8 +358,7 @@ class ReverseWorkflowSpec(WorkflowSpec):
"type": "object",
"minProperties": 1,
"patternProperties": {
"^\w+$":
tasks.ReverseWorkflowTaskSpec.get_schema(includes=None)
"^\w+$": types.NONEMPTY_DICT
}
},
}

View File

@ -111,7 +111,7 @@ class WorkbookSpecValidationTestCase(WorkflowSpecValidationTestCase):
'name': 'test_wb'
}
def _parse_dsl_spec(self, dsl_file=None,
def _parse_dsl_spec(self, dsl_file=None, add_tasks=False,
changes=None, expect_error=False):
return super(WorkbookSpecValidationTestCase, self)._parse_dsl_spec(
dsl_file=dsl_file, add_tasks=False, changes=changes,

View File

@ -24,8 +24,7 @@ class ActionSpecValidation(base.WorkbookSpecValidationTestCase):
def test_base_required(self):
actions = {'actions': {'a1': {}}}
exception = self._parse_dsl_spec(changes=actions,
expect_error=True)
exception = self._parse_dsl_spec(changes=actions, expect_error=True)
self.assertIn("'base' is a required property", exception.message)
@ -45,8 +44,7 @@ class ActionSpecValidation(base.WorkbookSpecValidationTestCase):
]
for actions, expect_error in tests:
self._parse_dsl_spec(changes=actions,
expect_error=expect_error)
self._parse_dsl_spec(changes=actions, expect_error=expect_error)
def test_base_input(self):
tests = [
@ -66,9 +64,10 @@ class ActionSpecValidation(base.WorkbookSpecValidationTestCase):
for base_inputs, expect_error in tests:
overlay = {'actions': copy.deepcopy(actions)}
utils.merge_dicts(overlay['actions']['a1'], base_inputs)
self._parse_dsl_spec(changes=overlay,
expect_error=expect_error)
self._parse_dsl_spec(changes=overlay, expect_error=expect_error)
def test_input(self):
tests = [
@ -93,9 +92,10 @@ class ActionSpecValidation(base.WorkbookSpecValidationTestCase):
for inputs, expect_error in tests:
overlay = {'actions': copy.deepcopy(actions)}
utils.merge_dicts(overlay['actions']['a1'], inputs)
self._parse_dsl_spec(changes=overlay,
expect_error=expect_error)
self._parse_dsl_spec(changes=overlay, expect_error=expect_error)
def test_output(self):
tests = [
@ -120,6 +120,7 @@ class ActionSpecValidation(base.WorkbookSpecValidationTestCase):
for outputs, expect_error in tests:
overlay = {'actions': copy.deepcopy(actions)}
utils.merge_dicts(overlay['actions']['a1'], outputs)
self._parse_dsl_spec(changes=overlay,
expect_error=expect_error)
self._parse_dsl_spec(changes=overlay, expect_error=expect_error)

View File

@ -180,7 +180,10 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
task8_spec = wf2_spec.get_tasks().get('task8')
self.assertEqual(
{"itemX": '<% $.arrayI %>', "itemY": '<% $.arrayJ %>'},
{
'itemX': '<% $.arrayI %>',
"itemY": '<% $.arrayJ %>'
},
task8_spec.get_with_items()
)
@ -209,7 +212,10 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
task12_spec = wf2_spec.get_tasks().get('task12')
self.assertDictEqual(
{'url': 'http://site.com?q=<% $.query %>', 'params': ''},
{
'url': 'http://site.com?q=<% $.query %>',
'params': ''
},
task12_spec.get_input()
)
@ -225,8 +231,10 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
action_spec = act_specs.get("action2")
self.assertEqual('std.echo', action_spec.get_base())
self.assertEqual({'output': 'Echo output'},
action_spec.get_base_input())
self.assertEqual(
{'output': 'Echo output'},
action_spec.get_base_input()
)
def test_spec_to_dict(self):
wb_spec = self._parse_dsl_spec(dsl_file='my_workbook.yaml')
@ -248,9 +256,11 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
# required property exception is not triggered. However, a different
# spec validation error returns due to drastically different schema
# between workbook versions.
self.assertRaises(exc.DSLParsingException,
self._spec_parser,
yaml.safe_dump(dsl_dict))
self.assertRaises(
exc.DSLParsingException,
self._spec_parser,
yaml.safe_dump(dsl_dict)
)
def test_version(self):
tests = [
@ -263,16 +273,17 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
]
for version, expect_error in tests:
self._parse_dsl_spec(changes=version,
expect_error=expect_error)
self._parse_dsl_spec(changes=version, expect_error=expect_error)
def test_name_required(self):
dsl_dict = copy.deepcopy(self._dsl_blank)
dsl_dict.pop('name', None)
exception = self.assertRaises(exc.DSLParsingException,
self._spec_parser,
yaml.safe_dump(dsl_dict))
exception = self.assertRaises(
exc.DSLParsingException,
self._spec_parser,
yaml.safe_dump(dsl_dict)
)
self.assertIn("'name' is a required property", exception.message)
@ -285,8 +296,7 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
]
for name, expect_error in tests:
self._parse_dsl_spec(changes=name,
expect_error=expect_error)
self._parse_dsl_spec(changes=name, expect_error=expect_error)
def test_description(self):
tests = [
@ -297,8 +307,10 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
]
for description, expect_error in tests:
self._parse_dsl_spec(changes=description,
expect_error=expect_error)
self._parse_dsl_spec(
changes=description,
expect_error=expect_error
)
def test_tags(self):
tests = [
@ -311,8 +323,7 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
]
for tags, expect_error in tests:
self._parse_dsl_spec(changes=tags,
expect_error=expect_error)
self._parse_dsl_spec(changes=tags, expect_error=expect_error)
def test_actions(self):
actions = {
@ -341,8 +352,10 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
]
for adhoc_actions, expect_error in tests:
self._parse_dsl_spec(changes=adhoc_actions,
expect_error=expect_error)
self._parse_dsl_spec(
changes=adhoc_actions,
expect_error=expect_error
)
def test_workflows(self):
workflows = {
@ -364,23 +377,22 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
}
tests = [
({'workflows': []}, True),
({'workflows': {}}, True),
({'workflows': None}, True),
({'workflows': {'version': None}}, True),
({'workflows': {'version': ''}}, True),
({'workflows': {'version': '1.0'}}, True),
({'workflows': {'version': '2.0'}}, False),
({'workflows': {'version': 2.0}}, False),
({'workflows': {'version': 2}}, False),
({'workflows': {'wf1': workflows['wf1']}}, False),
# ({'workflows': []}, True),
# ({'workflows': {}}, True),
# ({'workflows': None}, True),
# ({'workflows': {'version': None}}, True),
# ({'workflows': {'version': ''}}, True),
# ({'workflows': {'version': '1.0'}}, True),
# ({'workflows': {'version': '2.0'}}, False),
# ({'workflows': {'version': 2.0}}, False),
# ({'workflows': {'version': 2}}, False),
# ({'workflows': {'wf1': workflows['wf1']}}, False),
({'workflows': {'version': '2.0', 'wf1': 'wf1'}}, True),
({'workflows': workflows}, False)
]
for workflows, expect_error in tests:
self._parse_dsl_spec(changes=workflows,
expect_error=expect_error)
self._parse_dsl_spec(changes=workflows, expect_error=expect_error)
def test_workflow_name_validation(self):
wb_spec = self._parse_dsl_spec(dsl_file='workbook_schema_test.yaml')
@ -405,7 +417,6 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
self.assertEqual(name, d['actions'][name]['name'])
def test_name_regex(self):
# We want to match a string containing version at any point.
valid_names = (
"workflowversion",
@ -417,17 +428,20 @@ class WorkbookSpecValidation(base.WorkbookSpecValidationTestCase):
for valid in valid_names:
result = re.match(workbook.NON_VERSION_WORD_REGEX, valid)
self.assertNotEqual(None, result,
"Expected match for: {}".format(valid))
self.assertNotEqual(
None,
result,
"Expected match for: {}".format(valid)
)
# ... except, we don't want to match a string that isn't just one word
# or is exactly "version"
invalid_names = (
"version",
"my workflow",
)
invalid_names = ("version", "my workflow")
for invalid in invalid_names:
result = re.match(workbook.NON_VERSION_WORD_REGEX, invalid)
self.assertEqual(None, result,
"Didn't expected match for: {}".format(invalid))
self.assertEqual(
None,
result,
"Didn't expected match for: {}".format(invalid)
)