From f1597d77e6cba3bd9dbd20f966656203e619ad25 Mon Sep 17 00:00:00 2001 From: Kurt Griffiths Date: Wed, 17 May 2017 09:47:10 -0600 Subject: [PATCH] feat(CompiledRouter): Add an intermediate AST step to the compiler (#1040) Rather than compiling the routing tree directly to Python code, first generate an AST and then use it to produce the code. This provides several benefits, including: * It makes the compilation process easier to reason about. * It makes it easier to keep track of indentation and whitespace. * It sets us up for being able to make transformations that we will need to do to support URI template filters, etc. in the future. --- falcon/routing/compiled.py | 258 ++++++++++++++++++++++++++++------- tests/test_custom_router.py | 1 - tests/test_default_router.py | 26 +++- 3 files changed, 229 insertions(+), 56 deletions(-) diff --git a/falcon/routing/compiled.py b/falcon/routing/compiled.py index 0148893..2c6a53f 100644 --- a/falcon/routing/compiled.py +++ b/falcon/routing/compiled.py @@ -18,7 +18,7 @@ import keyword import re -_FIELD_REGEX = re.compile('{([^}]*)}') +_FIELD_PATTERN = re.compile('{([^}]*)}') _TAB_STR = ' ' * 4 @@ -35,14 +35,26 @@ class CompiledRouter(object): processing quite fast. """ + __slots__ = ( + '_ast', + '_find', + '_finder_src', + '_patterns', + '_return_values', + '_roots', + ) + def __init__(self): self._roots = [] self._find = self._compile() - self._code_lines = None - self._src = None - self._expressions = None + self._finder_src = None + self._patterns = None self._return_values = None + @property + def finder_src(self): + return self._finder_src + def add_route(self, uri_template, method_map, resource): """Adds a route between a URI path template and a resource. @@ -66,7 +78,7 @@ class CompiledRouter(object): # values from more shallow nodes. # 2. For complex nodes, re.compile() raises a nasty error # - fields = _FIELD_REGEX.findall(uri_template) + fields = _FIELD_PATTERN.findall(uri_template) used_names = set() for name in fields: is_identifier = re.match('[A-Za-z_][A-Za-z0-9_]*$', name) @@ -145,28 +157,28 @@ class CompiledRouter(object): path = uri.lstrip('/').split('/') params = {} - node = self._find(path, self._return_values, self._expressions, params) + node = self._find(path, self._return_values, self._patterns, params) if node is not None: return node.resource, node.method_map, params, node.uri_template else: return None - def _compile_tree(self, nodes, indent=1, level=0, fast_return=True): - """Generates Python code for a routing tree or subtree.""" + # ----------------------------------------------------------------- + # Private + # ----------------------------------------------------------------- - def line(text, indent_offset=0): - pad = _TAB_STR * (indent + indent_offset) - self._code_lines.append(pad + text) + def _generate_ast(self, nodes, parent, return_values, patterns, level=0, fast_return=True): + """Generates a coarse AST for the router.""" # NOTE(kgriffs): Base case if not nodes: return - line('if path_len > %d:' % level) - indent += 1 + outer_parent = _CxIfPathLength('>', level) + parent.append(outer_parent) + parent = outer_parent - level_indent = indent found_simple = False # NOTE(kgriffs & philiptzou): Sort nodes in this sequence: @@ -195,20 +207,18 @@ class CompiledRouter(object): # contain anything more than a single literal or variable, # and they need to be checked using a pre-compiled regular # expression. - expression_idx = len(self._expressions) - self._expressions.append(node.var_regex) + pattern_idx = len(patterns) + patterns.append(node.var_pattern) - line('match = expressions[%d].match(path[%d]) # %s' % ( - expression_idx, level, node.var_regex.pattern)) - - line('if match is not None:') - indent += 1 - line('params.update(match.groupdict())') + construct = _CxIfPathSegmentPattern(level, pattern_idx, + node.var_pattern.pattern) + parent.append(construct) + parent = construct else: # NOTE(kgriffs): Simple nodes just capture the entire path # segment as the value for the param. - line('params["%s"] = path[%d]' % (node.var_name, level)) + parent.append(_CxSetParam(node.var_name, level)) # NOTE(kgriffs): We don't allow multiple simple var nodes # to exist at the same level, e.g.: @@ -222,59 +232,78 @@ class CompiledRouter(object): else: # NOTE(kgriffs): Not a param, so must match exactly - line('if path[%d] == "%s":' % (level, node.raw_segment)) - indent += 1 + construct = _CxIfPathSegmentLiteral(level, node.raw_segment) + parent.append(construct) + parent = construct if node.resource is not None: # NOTE(kgriffs): This is a valid route, so we will want to # return the relevant information. - resource_idx = len(self._return_values) - self._return_values.append(node) + resource_idx = len(return_values) + return_values.append(node) - self._compile_tree(node.children, indent, level + 1, fast_return) + self._generate_ast( + node.children, + parent, + return_values, + patterns, + level + 1, + fast_return + ) if node.resource is None: if fast_return: - line('return None') + parent.append(_CxReturnNone()) else: # NOTE(kgriffs): Make sure that we have consumed all of # the segments for the requested route; otherwise we could # mistakenly match "/foo/23/bar" against "/foo/{id}". - line('if path_len == %d:' % (level + 1)) - line('return return_values[%d]' % resource_idx, 1) + construct = _CxIfPathLength('==', level + 1) + construct.append(_CxReturnValue(resource_idx)) + parent.append(construct) if fast_return: - line('return None') + parent.append(_CxReturnNone()) - indent = level_indent + parent = outer_parent if not found_simple and fast_return: - line('return None') + parent.append(_CxReturnNone()) def _compile(self): - """Generates Python code for entire routing tree. + """Generates Python code for the entire routing tree. - The generated code is compiled and the resulting Python method is - returned. + The generated code is compiled and the resulting Python method + is returned. """ - self._return_values = [] - self._expressions = [] - self._code_lines = [ - 'def find(path, return_values, expressions, params):', + + src_lines = [ + 'def find(path, return_values, patterns, params):', _TAB_STR + 'path_len = len(path)', ] - self._compile_tree(self._roots) + self._return_values = [] + self._patterns = [] - self._code_lines.append( + self._ast = _CxParent() + self._generate_ast( + self._roots, + self._ast, + self._return_values, + self._patterns + ) + + src_lines.append(self._ast.src(0)) + + src_lines.append( # PERF(kgriffs): Explicit return of None is faster than implicit _TAB_STR + 'return None' ) - self._src = '\n'.join(self._code_lines) + self._finder_src = '\n'.join(src_lines) scope = {} - exec(compile(self._src, '', 'exec'), scope) + exec(compile(self._finder_src, '', 'exec'), scope) return scope['find'] @@ -294,11 +323,12 @@ class CompiledRouterNode(object): self.is_var = False self.is_complex = False self.var_name = None + self.var_pattern = None # NOTE(kgriffs): CompiledRouter.add_route validates field names, # so here we can just assume they are OK and use the simple - # _FIELD_REGEX to match them. - matches = list(_FIELD_REGEX.finditer(raw_segment)) + # _FIELD_PATTERN to match them. + matches = list(_FIELD_PATTERN.finditer(raw_segment)) if not matches: self.is_var = False @@ -334,11 +364,11 @@ class CompiledRouterNode(object): # trick the parser into doing the right thing. escaped_segment = re.sub(r'[\.\(\)\[\]\?\$\*\+\^\|]', r'\\\g<0>', raw_segment) - seg_pattern = _FIELD_REGEX.sub(r'(?P<\1>.+)', escaped_segment) - seg_pattern = '^' + seg_pattern + '$' + pattern_text = _FIELD_PATTERN.sub(r'(?P<\1>.+)', escaped_segment) + pattern_text = '^' + pattern_text + '$' self.is_complex = True - self.var_regex = re.compile(seg_pattern) + self.var_pattern = re.compile(pattern_text) def matches(self, segment): """Returns True if this node matches the supplied template segment.""" @@ -360,7 +390,7 @@ class CompiledRouterNode(object): # simple, complex ==> False # simple, string ==> False # complex, simple ==> False - # complex, complex ==> (Depend) + # complex, complex ==> (Maybe) # complex, string ==> False # string, simple ==> False # string, complex ==> False @@ -389,8 +419,8 @@ class CompiledRouterNode(object): # if self.is_complex: if other.is_complex: - return (_FIELD_REGEX.sub('v', self.raw_segment) == - _FIELD_REGEX.sub('v', segment)) + return (_FIELD_PATTERN.sub('v', self.raw_segment) == + _FIELD_PATTERN.sub('v', segment)) return False else: @@ -399,3 +429,125 @@ class CompiledRouterNode(object): # NOTE(kgriffs): If self is a static string match, then all the cases # for other are False, so no need to check. return False + + +# -------------------------------------------------------------------- +# AST Constructs +# +# NOTE(kgriffs): These constructs are used to create a very coarse +# AST that can then be used to generate Python source code for the +# router. Using an AST like this makes it easier to reason about +# the compilation process, and affords syntactical transformations +# that would otherwise be at best confusing and at worst extremely +# tedious and error-prone if they were to be attempted directly +# against the Python source code. +# -------------------------------------------------------------------- + + +class _CxParent(object): + def __init__(self): + self._children = [] + + def append(self, construct): + self._children.append(construct) + + def src(self, indentation): + return self._children_src(indentation + 1) + + def _children_src(self, indentation): + src_lines = [ + child.src(indentation) + for child in self._children + ] + + return '\n'.join(src_lines) + + +class _CxIfPathLength(_CxParent): + def __init__(self, comparison, length): + super(_CxIfPathLength, self).__init__() + self._comparison = comparison + self._length = length + + def src(self, indentation): + template = '{0}if path_len {1} {2}:\n{3}' + return template.format( + _TAB_STR * indentation, + self._comparison, + self._length, + self._children_src(indentation + 1) + ) + + +class _CxIfPathSegmentLiteral(_CxParent): + def __init__(self, segment_idx, literal): + super(_CxIfPathSegmentLiteral, self).__init__() + self._segment_idx = segment_idx + self._literal = literal + + def src(self, indentation): + template = "{0}if path[{1}] == '{2}':\n{3}" + return template.format( + _TAB_STR * indentation, + self._segment_idx, + self._literal, + self._children_src(indentation + 1) + ) + + +class _CxIfPathSegmentPattern(_CxParent): + def __init__(self, segment_idx, pattern_idx, pattern_text): + super(_CxIfPathSegmentPattern, self).__init__() + self._segment_idx = segment_idx + self._pattern_idx = pattern_idx + self._pattern_text = pattern_text + + def src(self, indentation): + lines = [] + + lines.append( + '{0}match = patterns[{1}].match(path[{2}]) # {3}'.format( + _TAB_STR * indentation, + self._pattern_idx, + self._segment_idx, + self._pattern_text, + ) + ) + + lines.append('{0}if match is not None:'.format(_TAB_STR * indentation)) + lines.append('{0}params.update(match.groupdict())'.format( + _TAB_STR * (indentation + 1) + )) + + lines.append(self._children_src(indentation + 1)) + + return '\n'.join(lines) + + +class _CxReturnNone(object): + def src(self, indentation): + return '{0}return None'.format(_TAB_STR * indentation) + + +class _CxReturnValue(object): + def __init__(self, value_idx): + self._value_idx = value_idx + + def src(self, indentation): + return '{0}return return_values[{1}]'.format( + _TAB_STR * indentation, + self._value_idx + ) + + +class _CxSetParam(object): + def __init__(self, param_name, segment_idx): + self._param_name = param_name + self._segment_idx = segment_idx + + def src(self, indentation): + return "{0}params['{1}'] = path[{2}]".format( + _TAB_STR * indentation, + self._param_name, + self._segment_idx, + ) diff --git a/tests/test_custom_router.py b/tests/test_custom_router.py index 29b43a6..b0610aa 100644 --- a/tests/test_custom_router.py +++ b/tests/test_custom_router.py @@ -5,7 +5,6 @@ from falcon import testing class TestCustomRouter(testing.TestBase): def test_custom_router_add_route_should_be_used(self): - check = [] class CustomRouter(object): diff --git a/tests/test_default_router.py b/tests/test_default_router.py index 39fdd07..60fe9af 100644 --- a/tests/test_default_router.py +++ b/tests/test_default_router.py @@ -1,3 +1,5 @@ +import textwrap + import pytest import falcon @@ -224,6 +226,20 @@ def test_root_path(): resource, __, __, __ = router.find('/') assert resource.resource_id == 42 + expected_src = textwrap.dedent(""" + def find(path, return_values, patterns, params): + path_len = len(path) + if path_len > 0: + if path[0] == '': + if path_len == 1: + return return_values[0] + return None + return None + return None + """).strip() + + assert router.finder_src == expected_src + @pytest.mark.parametrize('uri_template', [ '/{field}{field}', @@ -309,8 +325,14 @@ def test_invalid_field_name(router, uri_template): router.add_route(uri_template, {}, ResourceWithId(-1)) -def test_dump(router): - print(router._src) +def test_print_src(router): + """Diagnostic test that simply prints the router's find() source code. + + Example: + + $ tox -e py27_debug -- -k test_print_src -s + """ + print('\n\n' + router.finder_src + '\n') def test_override(router):