feat(uri): decode percent encoded query string

Per RFC3986's requirement, we decode the query string, plus the '+' -> ' ' conversion. We also assume the source string before percent-encoded is a UTF-8 string, so we decode it as well. Related bug: https://bugs.launchpad.net/marconi/+bug/1237591
2013-10-11 16:30:44 -04:00
parent 82649d5c6d
commit 4fe2879017
3 changed files with 30 additions and 5 deletions
--- a/falcon/request.py
+++ b/falcon/request.py
@@ -18,6 +18,8 @@ limitations under the License.

 from datetime import datetime

+import six
+
 try:
    # NOTE(kgrifs): In Python 2.6 and 2.7, socket._fileobject is a
    # standard way of exposing a socket as a file-like object, and
@@ -108,9 +110,9 @@ class Request(object):
        # QUERY_STRING isn't required to be in env, so let's check
        # PERF: if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env:
-            self.query_string = env['QUERY_STRING']
+            self.query_string = util.percent_unescape(env['QUERY_STRING'])
        else:
-            self.query_string = ''
+            self.query_string = six.text_type()

        # PERF: Don't parse it if we don't have to!
        if self.query_string:
--- a/falcon/tests/test_query_params.py
+++ b/falcon/tests/test_query_params.py
@@ -45,6 +45,14 @@ class TestQueryParams(testing.TestBase):
        self.assertEquals(store['marker'], 'deadbeef')
        self.assertEquals(store['limit'], '25')

+    def test_percent_encoded(self):
+        query_string = 'id=23%2c42&q=%e8%b1%86+%e7%93%a3'
+        self.simulate_request('/', query_string=query_string)
+
+        req = self.resource.req
+        self.assertEquals(req.get_param('id'), u'23,42')
+        self.assertEquals(req.get_param('q'), u'\u8c46 \u74e3')
+
    def test_allowed_names(self):
        query_string = ('p=0&p1=23&2p=foo&some-thing=that&blank=&some_thing=x&'
                        '-bogus=foo&more.things=blah')
--- a/falcon/util.py
+++ b/falcon/util.py
@@ -20,9 +20,9 @@ import datetime
 import six

 if six.PY3:  # pragma nocover
-    from urllib.parse import quote as url_quote
+    import urllib.parse as urllib
 else:  # pragma nocover
-    from urllib import quote as url_quote
+    import urllib


 __all__ = ('dt_to_http', 'http_date_to_dt', 'to_query_str', 'percent_escape')
@@ -112,4 +112,19 @@ def percent_escape(url):
    if not six.PY3 and isinstance(url, six.text_type):  # pragma nocover
        url = url.encode('utf-8')

-    return url_quote(url, safe='/:,=?&-_')
+    return urllib.quote(url, safe='/:,=?&-_')
+
+
+def percent_unescape(nstr):
+    """Percent-unescape an input native string into a url.
+
+    Args:
+        nstr: A URL in native string (\u0000 - \u00FF).
+
+    Returns:
+        A URL as a python string, decoded as UTF-8.
+    """
+
+    s = urllib.unquote_plus(nstr)
+
+    return s if six.PY3 else s.decode('utf-8', 'replace')