feat(uri): decode percent encoded query string

Per RFC3986's requirement, we decode the query string, plus the
'+' -> ' ' conversion.  We also assume the source string before
percent-encoded is a UTF-8 string, so we decode it as well.

Related bug: https://bugs.launchpad.net/marconi/+bug/1237591
This commit is contained in:
Zhihao Yuan
2013-10-11 16:30:44 -04:00
parent 82649d5c6d
commit 4fe2879017
3 changed files with 30 additions and 5 deletions

View File

@@ -18,6 +18,8 @@ limitations under the License.
from datetime import datetime
import six
try:
# NOTE(kgrifs): In Python 2.6 and 2.7, socket._fileobject is a
# standard way of exposing a socket as a file-like object, and
@@ -108,9 +110,9 @@ class Request(object):
# QUERY_STRING isn't required to be in env, so let's check
# PERF: if...in is faster than using env.get(...)
if 'QUERY_STRING' in env:
self.query_string = env['QUERY_STRING']
self.query_string = util.percent_unescape(env['QUERY_STRING'])
else:
self.query_string = ''
self.query_string = six.text_type()
# PERF: Don't parse it if we don't have to!
if self.query_string:

View File

@@ -45,6 +45,14 @@ class TestQueryParams(testing.TestBase):
self.assertEquals(store['marker'], 'deadbeef')
self.assertEquals(store['limit'], '25')
def test_percent_encoded(self):
query_string = 'id=23%2c42&q=%e8%b1%86+%e7%93%a3'
self.simulate_request('/', query_string=query_string)
req = self.resource.req
self.assertEquals(req.get_param('id'), u'23,42')
self.assertEquals(req.get_param('q'), u'\u8c46 \u74e3')
def test_allowed_names(self):
query_string = ('p=0&p1=23&2p=foo&some-thing=that&blank=&some_thing=x&'
'-bogus=foo&more.things=blah')

View File

@@ -20,9 +20,9 @@ import datetime
import six
if six.PY3: # pragma nocover
from urllib.parse import quote as url_quote
import urllib.parse as urllib
else: # pragma nocover
from urllib import quote as url_quote
import urllib
__all__ = ('dt_to_http', 'http_date_to_dt', 'to_query_str', 'percent_escape')
@@ -112,4 +112,19 @@ def percent_escape(url):
if not six.PY3 and isinstance(url, six.text_type): # pragma nocover
url = url.encode('utf-8')
return url_quote(url, safe='/:,=?&-_')
return urllib.quote(url, safe='/:,=?&-_')
def percent_unescape(nstr):
"""Percent-unescape an input native string into a url.
Args:
nstr: A URL in native string (\u0000 - \u00FF).
Returns:
A URL as a python string, decoded as UTF-8.
"""
s = urllib.unquote_plus(nstr)
return s if six.PY3 else s.decode('utf-8', 'replace')