From 89e8b31bb23412c9d601cb50e83fa582cb921141 Mon Sep 17 00:00:00 2001 From: kgriffs Date: Tue, 14 Jan 2014 14:59:30 -0600 Subject: [PATCH] perf: Further optimizations for falcon.uri --- falcon/bench/bench.py | 3 ++- falcon/util/misc.py | 4 +--- falcon/util/uri.py | 29 ++++++++++++++++------------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/falcon/bench/bench.py b/falcon/bench/bench.py index 298a5c4..b38296b 100755 --- a/falcon/bench/bench.py +++ b/falcon/bench/bench.py @@ -128,7 +128,8 @@ def queues_env(): path = ('/v1/852809/queues/0fd4c8c6-bd72-11e2-8e47-db5ebd4c8125' '/claims/db5ebd4c8125') - return helpers.create_environ(path, query_string='limit=10&thing=a%20b', + qs = 'limit=10&thing=a%20b&x=%23%24' + return helpers.create_environ(path, query_string=qs, headers=request_headers) diff --git a/falcon/util/misc.py b/falcon/util/misc.py index 85d0879..bb57794 100644 --- a/falcon/util/misc.py +++ b/falcon/util/misc.py @@ -104,7 +104,7 @@ def http_date_to_dt(http_date): def to_query_str(params): - """Converts a dict of params to afaln actual query string. + """Converts a dict of params to an actual query string. Args: params: dict of simple key-value types, where key is a string and @@ -129,8 +129,6 @@ def to_query_str(params): elif v is False: v = 'false' elif isinstance(v, list): - # PERF(kgriffs): map is faster than list comprehension in - # py26 and py33. No significant different in py27 v = ','.join(map(str, v)) else: v = str(v) diff --git a/falcon/util/uri.py b/falcon/util/uri.py index aa5e6c2..ec9b895 100644 --- a/falcon/util/uri.py +++ b/falcon/util/uri.py @@ -190,21 +190,20 @@ if six.PY2: # pragma: no cover # do, let's encode in a non-lossy format. decoded_uri = decoded_uri.encode('utf-8') - # PERF(kgriffs): Use a closure instead of a class. - only_ascii = [True] + only_ascii = True - def unescape(matchobj): - # NOTE(kgriffs): Strip '%' and convert the hex number - char, byte = _HEX_TO_BYTE[matchobj.group(0)[1:]] - only_ascii[0] = only_ascii[0] and (byte <= _UTF8_MAX) + tokens = decoded_uri.split('%') + decoded_uri = tokens[0] + for token in tokens[1:]: + char, byte = _HEX_TO_BYTE[token[:2]] + decoded_uri += char + token[2:] - return char - - decoded_uri = _ESCAPE_SEQUENCE.sub(unescape, decoded_uri) + if only_ascii: + only_ascii = (byte <= 127) # PERF(kgriffs): Only spend the time to do this if there # were non-ascii bytes found in the string. - if not only_ascii[0]: + if not only_ascii: decoded_uri = decoded_uri.decode('utf-8', 'replace') return decoded_uri @@ -253,10 +252,14 @@ else: # pragma: no cover # do, let's encode into a non-lossy format. decoded_uri = decoded_uri.encode('utf-8') - # Replace escape sequences - decoded_uri = _ESCAPE_SEQUENCE.sub(_unescape, decoded_uri) + # PERF(kgriffs): This was found to be faster than using + # a regex sub call or list comprehension with a join. + tokens = decoded_uri.split(b'%') + decoded_uri = tokens[0] + for token in tokens[1:]: + decoded_uri += _HEX_TO_BYTE[token[:2]] + token[2:] - # Back to str + # Convert back to str return decoded_uri.decode('utf-8', 'replace')