perf: Further optimizations for falcon.uri

2014-01-14 14:59:30 -06:00
parent 067e1b4ef2
commit 89e8b31bb2
3 changed files with 19 additions and 17 deletions
--- a/falcon/bench/bench.py
+++ b/falcon/bench/bench.py
@@ -128,7 +128,8 @@ def queues_env():
    path = ('/v1/852809/queues/0fd4c8c6-bd72-11e2-8e47-db5ebd4c8125'
            '/claims/db5ebd4c8125')

-    return helpers.create_environ(path, query_string='limit=10&thing=a%20b',
+    qs = 'limit=10&thing=a%20b&x=%23%24'
+    return helpers.create_environ(path, query_string=qs,
                                  headers=request_headers)


--- a/falcon/util/misc.py
+++ b/falcon/util/misc.py
@@ -104,7 +104,7 @@ def http_date_to_dt(http_date):


 def to_query_str(params):
-    """Converts a dict of params to afaln actual query string.
+    """Converts a dict of params to an actual query string.

    Args:
        params: dict of simple key-value types, where key is a string and
@@ -129,8 +129,6 @@ def to_query_str(params):
        elif v is False:
            v = 'false'
        elif isinstance(v, list):
-            # PERF(kgriffs): map is faster than list comprehension in
-            # py26 and py33. No significant different in py27
            v = ','.join(map(str, v))
        else:
            v = str(v)
--- a/falcon/util/uri.py
+++ b/falcon/util/uri.py
@@ -190,21 +190,20 @@ if six.PY2:  # pragma: no cover
            # do, let's encode in a non-lossy format.
            decoded_uri = decoded_uri.encode('utf-8')

-        # PERF(kgriffs): Use a closure instead of a class.
-        only_ascii = [True]
+        only_ascii = True

-        def unescape(matchobj):
-            # NOTE(kgriffs): Strip '%' and convert the hex number
-            char, byte = _HEX_TO_BYTE[matchobj.group(0)[1:]]
-            only_ascii[0] = only_ascii[0] and (byte <= _UTF8_MAX)
+        tokens = decoded_uri.split('%')
+        decoded_uri = tokens[0]
+        for token in tokens[1:]:
+            char, byte = _HEX_TO_BYTE[token[:2]]
+            decoded_uri += char + token[2:]

-            return char
-
-        decoded_uri = _ESCAPE_SEQUENCE.sub(unescape, decoded_uri)
+            if only_ascii:
+                only_ascii = (byte <= 127)

        # PERF(kgriffs): Only spend the time to do this if there
        # were non-ascii bytes found in the string.
-        if not only_ascii[0]:
+        if not only_ascii:
            decoded_uri = decoded_uri.decode('utf-8', 'replace')

        return decoded_uri
@@ -253,10 +252,14 @@ else:  # pragma: no cover
        # do, let's encode into a non-lossy format.
        decoded_uri = decoded_uri.encode('utf-8')

-        # Replace escape sequences
-        decoded_uri = _ESCAPE_SEQUENCE.sub(_unescape, decoded_uri)
+        # PERF(kgriffs): This was found to be faster than using
+        # a regex sub call or list comprehension with a join.
+        tokens = decoded_uri.split(b'%')
+        decoded_uri = tokens[0]
+        for token in tokens[1:]:
+            decoded_uri += _HEX_TO_BYTE[token[:2]] + token[2:]

-        # Back to str
+        # Convert back to str
        return decoded_uri.decode('utf-8', 'replace')