fix: Unicode resp.body not encoded to a byte string

Required per PEP 333.

Fixes #51
This commit is contained in:
Kurt Griffiths
2013-02-04 15:37:55 -05:00
parent 57c03d7929
commit 850fe333a4
7 changed files with 89 additions and 25 deletions

View File

@@ -16,7 +16,7 @@ This requires some discipline on the part of the developer.
* Python automagically converts comma-delimited query param values to lists
* For 204, just set the status and no body. Falcon will ignore the body even if you set it.
* Falcon doesn't officially support Python 3; it's on our TODO list.
* Falcon is based on byte strings (str in Python 2, bytes in Python 3), and does no conversions to UTF-16 (for example). If your app needs to use wide strings, you'll need to do the conversion manually. However, we recommend just keeping everything UTF-8 as much as possible for efficiency's sake.
* If you set resp.body to a Unicode string, Falcon will encode it as UTF-8 before sending the content to the WSGI server (as required by PEP-333). If you already have encoded data (or it's a binary blob), use resp.data instead.
* Default content type for responses is 'application/json; charset=utf-8', and the default status is '200 OK.'
* resp.set_header assumes both params are strings. App may crash otherwise. Falcon trusts the caller. You *are* testing all your code paths, aren't you?
* If you need the protocol (http vs https) to construct hrefs in your responses (hypermedia is good, trust me), you can get it from req.scheme
@@ -28,7 +28,6 @@ This requires some discipline on the part of the developer.
* Don't set content-length. It will only be overridden.
* The order in which header fields are sent in the response is undefined. Headers are not grouped according to the recommendation in [RFC 2616](http://tools.ietf.org/html/rfc2616#section-4.2) in order to generate responses as quickly as possible.
* Header names are case-insensitive in req.get_header
* Set body to a byte string, as per PEP 333 - http://www.python.org/dev/peps/pep-0333/#unicode-issues - if it is textual, it's up to the app to set the proper media type
* For streaming large items, assign a generator or IO object to resp.stream. If you know the file size in advance, assign it to stream\_len. For dynamically-generated content, leave off stream\_len, and Falcon will then leave off the Content-Length header, and hopefully your WSGI server will do the right thing, assuming you've told it to enable keep-alive (PEP-333 prohibits apps from setting hop-by-hop headers itself, such as Transfer-Encoding).

View File

@@ -31,7 +31,7 @@ def bench(name, iterations=10000):
def create_bench(name):
srmock = helpers.StartResponseMock()
env = helpers.create_environ('/hello/584/test', query_string='limit=10')
body = helpers.rand_string(10240, 10240)
body = helpers.rand_string(0, 10240)
headers = {'X-Test': 'Funky Chicken'}
app = eval('create_{0}(body, headers)'.format(name.lower()))
@@ -55,6 +55,9 @@ if __name__ == '__main__':
if args.frameworks:
frameworks = args.frameworks
else:
# wheezy.http isn't really a framework - doesn't even have a router
del frameworks[frameworks.index('Wheezy')]
random.shuffle(frameworks)

View File

@@ -1,17 +1,19 @@
import sys
import re
import six
import wheezy.http as wheezy
from wheezy.core.collections import last_item_adapter
import bottle
import flask
import werkzeug.wrappers as werkzeug
from werkzeug.routing import Map, Rule
if not six.PY3:
import flask
import werkzeug.wrappers as werkzeug
from werkzeug.routing import Map, Rule
sys.path.append('./nuts/nuts')
import app as nuts
del sys.path[-1]
sys.path.append('./nuts/nuts')
import app as nuts
del sys.path[-1]
sys.path.append('..')
import falcon
@@ -25,7 +27,11 @@ def create_falcon(body, headers):
class HelloResource:
def on_get(self, req, resp, account_id):
limit = req.get_param('limit', '10')
resp.body = body
if six.PY3:
resp.body = body
else:
resp.data = body
resp.set_header('Content-Type', 'text/plain')
resp.set_headers(headers)

View File

@@ -99,12 +99,16 @@ class API(object):
# Return an iterable for the body, per the WSGI spec
if use_body:
if resp.body:
return [resp.body]
body = resp.body
if body is not None:
return [encode_body(body)]
elif resp.data is not None:
return [resp.data]
elif resp.stream is not None:
return resp.stream
# Default to returning an empty body
# Default: return an empty body
return []
def add_route(self, uri_template, resource):

View File

@@ -17,6 +17,8 @@ limitations under the License.
"""
import re
import six
from falcon import responders
HTTP_METHODS = (
@@ -82,6 +84,38 @@ def set_content_length(resp):
# No body given
resp.set_header('Content-Length', '0')
if six.PY3:
def encode_body(body):
"""Encodes body to a byte string, as required by PEP 333
Args:
body: A Unicode string
Returns:
Body encoded as UTF-8
"""
return body.encode('utf-8')
else:
def encode_body(body):
"""Encodes body to a byte string, as required by PEP 333
Args:
body: String to encode
Returns:
If body was a Unicode string, returns the string encoded as
UTF-8. On the other hand, if body is already a byte string, no
encoding is performed and the string is returned as-is.
"""
if isinstance(body, unicode):
body = body.encode('utf-8')
return body
def compile_uri_template(template):
"""Compile the given URI template string into a pattern matcher.

View File

@@ -23,7 +23,7 @@ CONTENT_TYPE_NAMES = set(['Content-Type', 'content-type', 'CONTENT-TYPE'])
class Response(object):
"""Represents an HTTP response to a client request"""
__slots__ = ('status', '_headers', 'body', 'stream', 'stream_len')
__slots__ = ('status', '_headers', 'body', 'data', 'stream', 'stream_len')
def __init__(self):
"""Initialize response attributes to default values
@@ -37,6 +37,7 @@ class Response(object):
self._headers = []
self.body = None
self.data = None
self.stream = None
self.stream_len = None

View File

@@ -4,11 +4,15 @@ import falcon
import io
from . import helpers
import six
class HelloResource:
sample_status = '200 OK'
sample_body = 'Hello World! ' + helpers.rand_string(0, 256 * 1024)
raw_body = sample_body.encode('utf-8')
sample_unicode = (u'Hello World! \x80' +
six.text_type(helpers.rand_string(0, 0)))
sample_utf8 = sample_unicode.encode('utf-8')
def __init__(self, mode):
self.called = False
@@ -22,13 +26,16 @@ class HelloResource:
resp.status = falcon.HTTP_200
if 'stream' in self.mode:
resp.stream = io.BytesIO(self.raw_body)
resp.stream = io.BytesIO(self.sample_utf8)
if 'stream_len' in self.mode:
resp.stream_len = len(self.raw_body)
resp.stream_len = len(self.sample_utf8)
if 'body' in self.mode:
resp.body = self.sample_body
if 'bytes' in self.mode:
resp.body = self.sample_utf8
else:
resp.body = self.sample_unicode
def on_head(self, req, resp):
self.on_get(req, resp)
@@ -45,6 +52,9 @@ class TestHelloWorld(helpers.TestSuite):
self.resource = HelloResource('body')
self.api.add_route(self.test_route, self.resource)
self.bytes_resource = HelloResource('body, bytes')
self.api.add_route('/bytes', self.bytes_resource)
self.chunked_resource = HelloResource('stream')
self.api.add_route('/chunked-stream', self.chunked_resource)
@@ -75,8 +85,17 @@ class TestHelloWorld(helpers.TestSuite):
self.assertEquals(self.srmock.status, self.resource.sample_status)
self.assertEquals(resp.status, self.resource.sample_status)
self.assertEquals(resp.body, self.resource.sample_body)
self.assertEquals(body, [self.resource.sample_body])
self.assertEquals(resp.body, self.resource.sample_unicode)
self.assertEquals(body, [self.resource.sample_utf8])
def test_body_bytes(self):
body = self._simulate_request('/bytes')
resp = self.bytes_resource.resp
self.assertEquals(self.srmock.status, self.resource.sample_status)
self.assertEquals(resp.status, self.resource.sample_status)
self.assertEquals(resp.body, self.resource.sample_utf8)
self.assertEquals(body, [self.resource.sample_utf8])
def test_no_body_on_head(self):
body = self._simulate_request(self.test_route, method='HEAD')
@@ -90,8 +109,7 @@ class TestHelloWorld(helpers.TestSuite):
for chunk in src:
dest.write(chunk)
self.assertEqual(dest.getvalue().decode('utf-8'),
self.chunked_resource.sample_body)
self.assertEqual(dest.getvalue(), self.chunked_resource.sample_utf8)
for header in self.srmock.headers:
self.assertNotEqual(header[0].lower(), 'content-length')
@@ -108,8 +126,7 @@ class TestHelloWorld(helpers.TestSuite):
self.assertThat(self.srmock.headers, Contains(content_length))
self.assertEqual(dest.tell(), expected_len)
self.assertEqual(dest.getvalue().decode('utf-8'),
self.chunked_resource.sample_body)
self.assertEqual(dest.getvalue(), self.chunked_resource.sample_utf8)
def test_status_not_set(self):
body = self._simulate_request('/nostatus')