perf(Request): Optimize percent_unescape
Do less work by checking for existence of '+' before creating a new string, and also only decoding from UTF-8 when there is a chance the source string has some multibyte sequences in it.
This commit is contained in:
@@ -35,8 +35,10 @@ import six
|
||||
|
||||
from falcon.exceptions import HTTPBadRequest
|
||||
from falcon import util
|
||||
from falcon.util import uri
|
||||
from falcon import request_helpers as helpers
|
||||
|
||||
|
||||
DEFAULT_ERROR_LOG_FORMAT = (u'{0:%Y-%m-%d %H:%M:%S} [FALCON] [ERROR]'
|
||||
u' {1} {2}{3} => ')
|
||||
|
||||
@@ -108,7 +110,18 @@ class Request(object):
|
||||
# QUERY_STRING isn't required to be in env, so let's check
|
||||
# PERF: if...in is faster than using env.get(...)
|
||||
if 'QUERY_STRING' in env and env['QUERY_STRING']:
|
||||
self.query_string = util.percent_unescape(env['QUERY_STRING'])
|
||||
|
||||
# TODO(kgriffs): Should this escape individual values instead
|
||||
# of the entire string? The way it is now, this:
|
||||
#
|
||||
# x=ab%2Bcd%3D42%2C9
|
||||
#
|
||||
# becomes this:
|
||||
#
|
||||
# x=ab+cd=42,9
|
||||
#
|
||||
self.query_string = uri.decode(env['QUERY_STRING'])
|
||||
|
||||
else:
|
||||
self.query_string = six.text_type()
|
||||
|
||||
|
||||
@@ -18,15 +18,9 @@ limitations under the License.
|
||||
|
||||
import datetime
|
||||
import functools
|
||||
import six
|
||||
import inspect
|
||||
import warnings
|
||||
|
||||
if six.PY3: # pragma nocover
|
||||
import urllib.parse as urllib # pylint: disable=E0611
|
||||
else: # pragma nocover
|
||||
import urllib
|
||||
|
||||
from falcon.util import uri
|
||||
|
||||
|
||||
@@ -149,18 +143,5 @@ def to_query_str(params):
|
||||
# TODO(kgriffs): Remove this alias in Falcon v0.2.0
|
||||
percent_escape = uri.encode
|
||||
|
||||
|
||||
# TODO(kgriffs): Move this to falcon.uri.decode in Falcon v0.2.0
|
||||
def percent_unescape(nstr):
|
||||
"""Percent-unescape an input native string into a url.
|
||||
|
||||
Args:
|
||||
nstr: A URL in native string (\u0000 - \u00FF).
|
||||
|
||||
Returns:
|
||||
A URL as a python string, decoded as UTF-8.
|
||||
"""
|
||||
|
||||
s = urllib.unquote_plus(nstr)
|
||||
|
||||
return s if six.PY3 else s.decode('utf-8', 'replace')
|
||||
# TODO(kgriffs): Remove this alias in Falcon v0.2.0
|
||||
percent_unescape = uri.decode
|
||||
|
||||
@@ -18,6 +18,11 @@ limitations under the License.
|
||||
|
||||
import six
|
||||
|
||||
if six.PY3: # pragma nocover
|
||||
import urllib.parse as urllib # pylint: disable=E0611
|
||||
else: # pragma nocover
|
||||
import urllib
|
||||
|
||||
|
||||
# NOTE(kgriffs): See also RFC 3986
|
||||
_UNRESERVED = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
@@ -134,3 +139,57 @@ Returns:
|
||||
have been percent-encoded.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def decode(uri):
|
||||
"""Decode any percent-encoded characters in a URI or query string.
|
||||
|
||||
Args:
|
||||
uri: An encoded URI (full or partial). If of type str on Python 2,
|
||||
UTF-8 is assumed.
|
||||
|
||||
Returns:
|
||||
A decoded URL. Will be of type `unicode` on Python 2 IFF `uri`
|
||||
contains percent-encoded chars (in which case there is a chance
|
||||
they might contain multibyte Unicode sequences).
|
||||
|
||||
"""
|
||||
|
||||
encoded_uri = uri
|
||||
|
||||
#
|
||||
# TODO(kgriffs): urllib is broken when it comes to decoding
|
||||
# non-ASCII strings on Python 2. The problem is, if you pass
|
||||
# it a str type, it doesn't even try to decode the character
|
||||
# set. On the other hand, if you pass it a unicode type, urllib
|
||||
# simply decodes code points as latin1 (not exactly a sensible
|
||||
# default, eh?).
|
||||
#
|
||||
# So, we could just let urllib do its thing and after the fact
|
||||
# decode the result like so:
|
||||
#
|
||||
# if six.PY2 and isinstance(encoded_uri, str): # pragma nocover
|
||||
# encoded_uri = encoded_uri.decode('utf-8', 'replace')
|
||||
#
|
||||
# However, that adds several microseconds and will rarely be
|
||||
# needed by the caller who is probably just decoding a query
|
||||
# string, and it is not common to put non-ASCII characters in
|
||||
# a cloud API's query string (please contact me if I am wrong!).
|
||||
#
|
||||
|
||||
# PERF(kgriffs): unquote_plus can do this, but if there are
|
||||
# *only* plusses in the string, no '%', we can save a lot of
|
||||
# time!
|
||||
if '+' in encoded_uri:
|
||||
encoded_uri = encoded_uri.replace('+', ' ')
|
||||
|
||||
if '%' in encoded_uri:
|
||||
encoded_uri = urllib.unquote(encoded_uri)
|
||||
|
||||
# PERF(kgriffs): Only spend the time to do this if there
|
||||
# is a chance there were multibyte, UTF-8 encoded
|
||||
# sequences that were percent-encoded.
|
||||
if six.PY2 and isinstance(encoded_uri, str): # pragma nocover
|
||||
encoded_uri = encoded_uri.decode('utf-8', 'replace')
|
||||
|
||||
return encoded_uri
|
||||
|
||||
@@ -73,19 +73,34 @@ class TestFalconUtils(testtools.TestCase):
|
||||
|
||||
self.assertEqual(expected, garbage_out)
|
||||
|
||||
def test_percent_escape(self):
|
||||
def test_uri_encode(self):
|
||||
url = 'http://example.com/v1/fizbit/messages?limit=3&echo=true'
|
||||
self.assertEqual(falcon.percent_escape(url), url)
|
||||
self.assertEqual(uri.encode(url), url)
|
||||
|
||||
url2a = u'http://example.com/v1/fizbit/messages?limit=3&e\u00e7ho=true'
|
||||
url2b = 'http://example.com/v1/fizbit/messages?limit=3&e%C3%A7ho=true'
|
||||
self.assertEqual(falcon.percent_escape(url2a), url2b)
|
||||
url = 'http://example.com/v1/fiz bit/messages'
|
||||
expected = 'http://example.com/v1/fiz%20bit/messages'
|
||||
self.assertEqual(uri.encode(url), expected)
|
||||
|
||||
def test_decode_value(self):
|
||||
url = u'http://example.com/v1/fizbit/messages?limit=3&e\u00e7ho=true'
|
||||
expected = ('http://example.com/v1/fizbit/messages'
|
||||
'?limit=3&e%C3%A7ho=true')
|
||||
self.assertEqual(uri.encode(url), expected)
|
||||
|
||||
def test_uri_encode_value(self):
|
||||
self.assertEqual(uri.encode_value('abcd'), 'abcd')
|
||||
self.assertEqual(uri.encode_value(u'abcd'), u'abcd')
|
||||
self.assertEqual(uri.encode_value(u'ab cd'), u'ab%20cd')
|
||||
self.assertEqual(uri.encode_value(u'\u00e7'), u'%C3%A7')
|
||||
self.assertEqual(uri.encode_value(u'\u00e7'), '%C3%A7')
|
||||
self.assertEqual(uri.encode_value('ab/cd'), 'ab%2Fcd')
|
||||
self.assertEqual(uri.encode_value('ab+cd=42,9'),
|
||||
'ab%2Bcd%3D42%2C9')
|
||||
|
||||
def test_uri_decode(self):
|
||||
self.assertEqual(uri.decode('abcd'), 'abcd')
|
||||
self.assertEqual(uri.decode(u'abcd'), u'abcd')
|
||||
self.assertEqual(uri.decode(u'ab%20cd'), u'ab cd')
|
||||
self.assertEqual(uri.decode('%C3%A7'), u'\u00e7')
|
||||
self.assertEqual(uri.decode('ab%2Fcd'), 'ab/cd')
|
||||
|
||||
self.assertEqual(uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9'),
|
||||
'http://example.com?x=ab+cd=42,9')
|
||||
|
||||
Reference in New Issue
Block a user