From 4fe2879017f3db7dfdc08919840102148565deb2 Mon Sep 17 00:00:00 2001 From: Zhihao Yuan Date: Fri, 11 Oct 2013 16:30:44 -0400 Subject: [PATCH] feat(uri): decode percent encoded query string Per RFC3986's requirement, we decode the query string, plus the '+' -> ' ' conversion. We also assume the source string before percent-encoded is a UTF-8 string, so we decode it as well. Related bug: https://bugs.launchpad.net/marconi/+bug/1237591 --- falcon/request.py | 6 ++++-- falcon/tests/test_query_params.py | 8 ++++++++ falcon/util.py | 21 ++++++++++++++++++--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/falcon/request.py b/falcon/request.py index 1d73498..a02ecef 100644 --- a/falcon/request.py +++ b/falcon/request.py @@ -18,6 +18,8 @@ limitations under the License. from datetime import datetime +import six + try: # NOTE(kgrifs): In Python 2.6 and 2.7, socket._fileobject is a # standard way of exposing a socket as a file-like object, and @@ -108,9 +110,9 @@ class Request(object): # QUERY_STRING isn't required to be in env, so let's check # PERF: if...in is faster than using env.get(...) if 'QUERY_STRING' in env: - self.query_string = env['QUERY_STRING'] + self.query_string = util.percent_unescape(env['QUERY_STRING']) else: - self.query_string = '' + self.query_string = six.text_type() # PERF: Don't parse it if we don't have to! if self.query_string: diff --git a/falcon/tests/test_query_params.py b/falcon/tests/test_query_params.py index 5693701..2734892 100644 --- a/falcon/tests/test_query_params.py +++ b/falcon/tests/test_query_params.py @@ -45,6 +45,14 @@ class TestQueryParams(testing.TestBase): self.assertEquals(store['marker'], 'deadbeef') self.assertEquals(store['limit'], '25') + def test_percent_encoded(self): + query_string = 'id=23%2c42&q=%e8%b1%86+%e7%93%a3' + self.simulate_request('/', query_string=query_string) + + req = self.resource.req + self.assertEquals(req.get_param('id'), u'23,42') + self.assertEquals(req.get_param('q'), u'\u8c46 \u74e3') + def test_allowed_names(self): query_string = ('p=0&p1=23&2p=foo&some-thing=that&blank=&some_thing=x&' '-bogus=foo&more.things=blah') diff --git a/falcon/util.py b/falcon/util.py index 3dd61b4..3710750 100644 --- a/falcon/util.py +++ b/falcon/util.py @@ -20,9 +20,9 @@ import datetime import six if six.PY3: # pragma nocover - from urllib.parse import quote as url_quote + import urllib.parse as urllib else: # pragma nocover - from urllib import quote as url_quote + import urllib __all__ = ('dt_to_http', 'http_date_to_dt', 'to_query_str', 'percent_escape') @@ -112,4 +112,19 @@ def percent_escape(url): if not six.PY3 and isinstance(url, six.text_type): # pragma nocover url = url.encode('utf-8') - return url_quote(url, safe='/:,=?&-_') + return urllib.quote(url, safe='/:,=?&-_') + + +def percent_unescape(nstr): + """Percent-unescape an input native string into a url. + + Args: + nstr: A URL in native string (\u0000 - \u00FF). + + Returns: + A URL as a python string, decoded as UTF-8. + """ + + s = urllib.unquote_plus(nstr) + + return s if six.PY3 else s.decode('utf-8', 'replace')