diff --git a/falcon/testing/helpers.py b/falcon/testing/helpers.py index 649c76c..bc20ee8 100644 --- a/falcon/testing/helpers.py +++ b/falcon/testing/helpers.py @@ -130,6 +130,23 @@ def create_environ(path='/', query_string='', protocol='HTTP/1.1', # the paths before setting PATH_INFO path = uri.decode(path) + if six.PY3: + # NOTE(kgriffs): The decoded path may contain UTF-8 characters. + # But according to the WSGI spec, no strings can contain chars + # outside ISO-8859-1. Therefore, to reconcile the URI + # encoding standard that allows UTF-8 with the WSGI spec + # that does not, WSGI servers tunnel the string via + # ISO-8859-1. falcon.testing.create_environ() mimics this + # behavior, e.g.: + # + # tunnelled_path = path.encode('utf-8').decode('iso-8859-1') + # + # falcon.Request does the following to reverse the process: + # + # path = tunnelled_path.encode('iso-8859-1').decode('utf-8', 'replace') + # + path = path.encode('utf-8').decode('iso-8859-1') + if six.PY2 and isinstance(path, six.text_type): path = path.encode('utf-8') diff --git a/tests/test_req_vars.py b/tests/test_req_vars.py index 985a3a2..4e48134 100644 --- a/tests/test_req_vars.py +++ b/tests/test_req_vars.py @@ -7,6 +7,7 @@ import testtools import falcon from falcon.request import Request import falcon.testing as testing +import falcon.uri @ddt.ddt @@ -109,15 +110,35 @@ class TestReqVars(testing.TestBase): self.assertEqual(expected_uri, req.uri) + @ddt.data( + u'/hello_\u043f\u0440\u0438\u0432\u0435\u0442', + u'/test/%E5%BB%B6%E5%AE%89', + u'/test/%C3%A4%C3%B6%C3%BC%C3%9F%E2%82%AC', + ) @testtools.skipUnless(six.PY3, 'Test only applies to Python 3') - def test_nonlatin_path(self): - cyrillic_path = u'/hello_\u043f\u0440\u0438\u0432\u0435\u0442' - cyrillic_path_decoded = cyrillic_path.encode('utf-8').decode('latin1') + def test_nonlatin_path(self, test_path): + # NOTE(kgriffs): When a request comes in, web servers decode + # the path. The decoded path may contain UTF-8 characters, + # but according to the WSGI spec, no strings can contain chars + # outside ISO-8859-1. Therefore, to reconcile the URI + # encoding standard that allows UTF-8 with the WSGI spec + # that does not, WSGI servers tunnel the string via + # ISO-8859-1. falcon.testing.create_environ() mimics this + # behavior, e.g.: + # + # tunnelled_path = path.encode('utf-8').decode('iso-8859-1') + # + # falcon.Request does the following to reverse the process: + # + # path = tunnelled_path.encode('iso-8859-1').decode('utf-8', 'replace') + # + req = Request(testing.create_environ( host='com', - path=cyrillic_path_decoded, + path=test_path, headers=self.headers)) - self.assertEqual(req.path, cyrillic_path) + + self.assertEqual(req.path, falcon.uri.decode(test_path)) def test_uri(self): uri = ('http://' + testing.DEFAULT_HOST + ':8080' + diff --git a/tox.ini b/tox.ini index bb34d61..b4e6afc 100644 --- a/tox.ini +++ b/tox.ini @@ -61,6 +61,7 @@ deps = {[with-debug-tools]deps} funcsigs [testenv:py34_debug] +basepython = python3.4 deps = {[with-debug-tools]deps} # --------------------------------------------------------------------