swob: Stop auto-encoding unicode bodies

Instead, require that callers provide an encoding.

Related-Change: I31408f525ba9836f634a35581d4aee6fa2c9428f
Change-Id: I3e5ed9e4401eea76c375bb43ad4afc58b1d8006a
This commit is contained in:
Tim Burke 2018-06-26 15:56:56 -07:00
parent bea74a3260
commit d03fc9bc54
9 changed files with 99 additions and 72 deletions

View File

@ -24,7 +24,7 @@ from six.moves import urllib
from swift.common import utils, exceptions
from swift.common.swob import HTTPBadRequest, HTTPLengthRequired, \
HTTPRequestEntityTooLarge, HTTPPreconditionFailed, HTTPNotImplemented, \
HTTPException, wsgi_to_str
HTTPException, wsgi_to_str, wsgi_to_bytes
MAX_FILE_SIZE = 5368709122
MAX_META_NAME_LENGTH = 128
@ -132,38 +132,39 @@ def check_metadata(req, target_type):
if (isinstance(value, six.string_types)
and len(value) > MAX_HEADER_SIZE):
return HTTPBadRequest(body='Header value too long: %s' %
key[:MAX_META_NAME_LENGTH],
return HTTPBadRequest(body=b'Header value too long: %s' %
wsgi_to_bytes(key[:MAX_META_NAME_LENGTH]),
request=req, content_type='text/plain')
if not key.lower().startswith(prefix):
continue
key = key[len(prefix):]
if not key:
return HTTPBadRequest(body='Metadata name cannot be empty',
return HTTPBadRequest(body=b'Metadata name cannot be empty',
request=req, content_type='text/plain')
bad_key = not check_utf8(wsgi_to_str(key))
bad_value = value and not check_utf8(wsgi_to_str(value))
if target_type in ('account', 'container') and (bad_key or bad_value):
return HTTPBadRequest(body='Metadata must be valid UTF-8',
return HTTPBadRequest(body=b'Metadata must be valid UTF-8',
request=req, content_type='text/plain')
meta_count += 1
meta_size += len(key) + len(value)
if len(key) > MAX_META_NAME_LENGTH:
return HTTPBadRequest(
body='Metadata name too long: %s%s' % (prefix, key),
body=wsgi_to_bytes('Metadata name too long: %s%s' % (
prefix, key)),
request=req, content_type='text/plain')
if len(value) > MAX_META_VALUE_LENGTH:
return HTTPBadRequest(
body='Metadata value longer than %d: %s%s' % (
MAX_META_VALUE_LENGTH, prefix, key),
body=wsgi_to_bytes('Metadata value longer than %d: %s%s' % (
MAX_META_VALUE_LENGTH, prefix, key)),
request=req, content_type='text/plain')
if meta_count > MAX_META_COUNT:
return HTTPBadRequest(
body='Too many metadata items; max %d' % MAX_META_COUNT,
body=b'Too many metadata items; max %d' % MAX_META_COUNT,
request=req, content_type='text/plain')
if meta_size > MAX_META_OVERALL_SIZE:
return HTTPBadRequest(
body='Total metadata too large; max %d'
body=b'Total metadata too large; max %d'
% MAX_META_OVERALL_SIZE,
request=req, content_type='text/plain')
return None
@ -186,28 +187,28 @@ def check_object_creation(req, object_name):
ml = req.message_length()
except ValueError as e:
return HTTPBadRequest(request=req, content_type='text/plain',
body=str(e))
body=str(e).encode('ascii'))
except AttributeError as e:
return HTTPNotImplemented(request=req, content_type='text/plain',
body=str(e))
body=str(e).encode('ascii'))
if ml is not None and ml > MAX_FILE_SIZE:
return HTTPRequestEntityTooLarge(body='Your request is too large.',
return HTTPRequestEntityTooLarge(body=b'Your request is too large.',
request=req,
content_type='text/plain')
if req.content_length is None and \
req.headers.get('transfer-encoding') != 'chunked':
return HTTPLengthRequired(body='Missing Content-Length header.',
return HTTPLengthRequired(body=b'Missing Content-Length header.',
request=req,
content_type='text/plain')
if len(object_name) > MAX_OBJECT_NAME_LENGTH:
return HTTPBadRequest(body='Object name length of %d longer than %d' %
return HTTPBadRequest(body=b'Object name length of %d longer than %d' %
(len(object_name), MAX_OBJECT_NAME_LENGTH),
request=req, content_type='text/plain')
if 'Content-Type' not in req.headers:
return HTTPBadRequest(request=req, content_type='text/plain',
body='No content type')
body=b'No content type')
try:
req = check_delete_headers(req)
@ -216,7 +217,7 @@ def check_object_creation(req, object_name):
content_type='text/plain')
if not check_utf8(wsgi_to_str(req.headers['Content-Type'])):
return HTTPBadRequest(request=req, body='Invalid Content-Type',
return HTTPBadRequest(request=req, body=b'Invalid Content-Type',
content_type='text/plain')
return check_metadata(req, 'object')
@ -299,7 +300,7 @@ def valid_timestamp(request):
try:
return request.timestamp
except exceptions.InvalidTimestamp as e:
raise HTTPBadRequest(body=str(e), request=request,
raise HTTPBadRequest(body=str(e).encode('ascii'), request=request,
content_type='text/plain')
@ -324,13 +325,13 @@ def check_delete_headers(request):
except ValueError:
raise HTTPBadRequest(request=request,
content_type='text/plain',
body='Non-integer X-Delete-After')
body=b'Non-integer X-Delete-After')
actual_del_time = utils.normalize_delete_at_timestamp(
now + x_delete_after)
if int(actual_del_time) <= now:
raise HTTPBadRequest(request=request,
content_type='text/plain',
body='X-Delete-After in past')
body=b'X-Delete-After in past')
request.headers['x-delete-at'] = actual_del_time
del request.headers['x-delete-after']
@ -340,12 +341,12 @@ def check_delete_headers(request):
int(request.headers['x-delete-at'])))
except ValueError:
raise HTTPBadRequest(request=request, content_type='text/plain',
body='Non-integer X-Delete-At')
body=b'Non-integer X-Delete-At')
if x_delete_at <= now and not utils.config_true_value(
request.headers.get('x-backend-replication', 'f')):
raise HTTPBadRequest(request=request, content_type='text/plain',
body='X-Delete-At in past')
body=b'X-Delete-At in past')
return request
@ -406,19 +407,19 @@ def check_name_format(req, name, target_type):
if not name:
raise HTTPPreconditionFailed(
request=req,
body='%s name cannot be empty' % target_type)
body=b'%s name cannot be empty' % target_type)
if isinstance(name, six.text_type):
name = name.encode('utf-8')
if b'/' in name:
raise HTTPPreconditionFailed(
request=req,
body='%s name cannot contain slashes' % target_type)
body=b'%s name cannot contain slashes' % target_type)
return name
check_account_format = functools.partial(check_name_format,
target_type='Account')
target_type=b'Account')
check_container_format = functools.partial(check_name_format,
target_type='Container')
target_type=b'Container')
def valid_api_version(version):

View File

@ -158,7 +158,7 @@ class DomainRemapMiddleware(object):
container, account = None, parts_to_parse[0]
else:
resp = HTTPBadRequest(request=Request(env),
body='Bad domain in host header',
body=b'Bad domain in host header',
content_type='text/plain')
return resp(env, start_response)
if len(self.reseller_prefixes) > 0:

View File

@ -398,7 +398,7 @@ def parse_and_validate_input(req_body, req_path):
errors = []
for seg_index, seg_dict in enumerate(parsed_data):
if not isinstance(seg_dict, dict):
errors.append("Index %d: not a JSON object" % seg_index)
errors.append(b"Index %d: not a JSON object" % seg_index)
continue
for required in SLO_KEYS:
@ -407,36 +407,36 @@ def parse_and_validate_input(req_body, req_path):
break
else:
errors.append(
"Index %d: expected keys to include one of %s"
b"Index %d: expected keys to include one of %s"
% (seg_index,
" or ".join(repr(required) for required in SLO_KEYS)))
b" or ".join(repr(required) for required in SLO_KEYS)))
continue
allowed_keys = SLO_KEYS[segment_type].union([segment_type])
extraneous_keys = [k for k in seg_dict if k not in allowed_keys]
if extraneous_keys:
errors.append(
"Index %d: extraneous keys %s"
b"Index %d: extraneous keys %s"
% (seg_index,
", ".join('"%s"' % (ek,)
for ek in sorted(extraneous_keys))))
b", ".join(json.dumps(ek).encode('ascii')
for ek in sorted(extraneous_keys))))
continue
if segment_type == 'path':
if not isinstance(seg_dict['path'], six.string_types):
errors.append("Index %d: \"path\" must be a string" %
errors.append(b"Index %d: \"path\" must be a string" %
seg_index)
continue
if not (seg_dict.get('etag') is None or
isinstance(seg_dict['etag'], six.string_types)):
errors.append('Index %d: "etag" must be a string or null '
'(if provided)' % seg_index)
errors.append(b'Index %d: "etag" must be a string or null '
b'(if provided)' % seg_index)
continue
if '/' not in seg_dict['path'].strip('/'):
errors.append(
"Index %d: path does not refer to an object. Path must "
"be of the form /container/object." % seg_index)
b"Index %d: path does not refer to an object. Path must "
b"be of the form /container/object." % seg_index)
continue
seg_size = seg_dict.get('size_bytes')
@ -445,11 +445,11 @@ def parse_and_validate_input(req_body, req_path):
seg_size = int(seg_size)
seg_dict['size_bytes'] = seg_size
except (TypeError, ValueError):
errors.append("Index %d: invalid size_bytes" % seg_index)
errors.append(b"Index %d: invalid size_bytes" % seg_index)
continue
if seg_size < 1 and seg_index != (len(parsed_data) - 1):
errors.append("Index %d: too small; each segment must be "
"at least 1 byte."
errors.append(b"Index %d: too small; each segment must be "
b"at least 1 byte."
% (seg_index,))
continue
@ -457,7 +457,7 @@ def parse_and_validate_input(req_body, req_path):
seg_dict['path'].lstrip('/')])
if req_path == quote(obj_path):
errors.append(
"Index %d: manifest must not include itself as a segment"
b"Index %d: manifest must not include itself as a segment"
% seg_index)
continue
@ -465,12 +465,12 @@ def parse_and_validate_input(req_body, req_path):
try:
seg_dict['range'] = Range('bytes=%s' % seg_dict['range'])
except ValueError:
errors.append("Index %d: invalid range" % seg_index)
errors.append(b"Index %d: invalid range" % seg_index)
continue
if len(seg_dict['range'].ranges) > 1:
errors.append("Index %d: multiple ranges "
"(only one allowed)" % seg_index)
errors.append(b"Index %d: multiple ranges "
b"(only one allowed)" % seg_index)
continue
# If the user *told* us the object's size, we can check range
@ -478,7 +478,7 @@ def parse_and_validate_input(req_body, req_path):
# fail that validation later.
if (seg_size is not None and 1 != len(
seg_dict['range'].ranges_for_length(seg_size))):
errors.append("Index %d: unsatisfiable range" % seg_index)
errors.append(b"Index %d: unsatisfiable range" % seg_index)
continue
elif segment_type == 'data':
@ -487,22 +487,22 @@ def parse_and_validate_input(req_body, req_path):
data = strict_b64decode(seg_dict['data'])
except ValueError:
errors.append(
"Index %d: data must be valid base64" % seg_index)
b"Index %d: data must be valid base64" % seg_index)
continue
if len(data) < 1:
errors.append("Index %d: too small; each segment must be "
"at least 1 byte."
errors.append(b"Index %d: too small; each segment must be "
b"at least 1 byte."
% (seg_index,))
continue
# re-encode to normalize padding
seg_dict['data'] = base64.b64encode(data)
if parsed_data and all('data' in d for d in parsed_data):
errors.append("Inline data segments require at least one "
"object-backed segment.")
errors.append(b"Inline data segments require at least one "
b"object-backed segment.")
if errors:
error_message = "".join(e + "\n" for e in errors)
error_message = b"".join(e + b"\n" for e in errors)
raise HTTPBadRequest(error_message,
headers={"Content-Type": "text/plain"})

View File

@ -174,6 +174,7 @@ To generate a curl command line from the above::
from __future__ import print_function
import json
from time import time
from traceback import format_exc
from uuid import uuid4
@ -506,16 +507,19 @@ class TempAuth(object):
# on ACLs, TempAuth is not such an auth system. At this point,
# it thinks it is authoritative.
if key not in tempauth_acl_keys:
return "Key '%s' not recognized" % key
return "Key %s not recognized" % json.dumps(
key).encode('ascii')
for key in tempauth_acl_keys:
if key not in result:
continue
if not isinstance(result[key], list):
return "Value for key '%s' must be a list" % key
return "Value for key %s must be a list" % json.dumps(
key).encode('ascii')
for grantee in result[key]:
if not isinstance(grantee, six.string_types):
return "Elements of '%s' list must be strings" % key
return "Elements of %s list must be strings" % json.dumps(
key).encode('ascii')
# Everything looks fine, no errors found
internal_hdr = get_sys_meta_prefix('account') + 'core-access-control'

View File

@ -281,16 +281,28 @@ class HeaderEnvironProxy(MutableMapping):
return keys
def wsgi_to_bytes(wsgi_str):
if six.PY2:
return wsgi_str
return wsgi_str.encode('latin1')
def wsgi_to_str(wsgi_str):
if six.PY2:
return wsgi_str
return wsgi_str.encode('latin1').decode('utf8', errors='surrogateescape')
return wsgi_to_bytes(wsgi_str).decode('utf8', errors='surrogateescape')
def bytes_to_wsgi(byte_str):
if six.PY2:
return byte_str
return byte_str.decode('latin1')
def str_to_wsgi(native_str):
if six.PY2:
return native_str
return native_str.encode('utf8', errors='surrogateescape').decode('latin1')
return bytes_to_wsgi(native_str.encode('utf8', errors='surrogateescape'))
def _resp_status_property():
@ -334,7 +346,7 @@ def _resp_body_property():
def setter(self, value):
if isinstance(value, six.text_type):
value = value.encode('utf-8')
raise TypeError('WSGI responses must be bytes')
if isinstance(value, six.binary_type):
self.content_length = len(value)
self._app_iter = None

View File

@ -29,7 +29,7 @@ class FakeApp(object):
def __call__(self, env, start_response):
self.req = Request(env)
return Response(request=self.req, body='FAKE APP',
return Response(request=self.req, body=b'FAKE APP',
headers=self.headers)(env, start_response)
@ -224,7 +224,7 @@ class TestGatekeeper(unittest.TestCase):
class SelfishApp(FakeApp):
def __call__(self, env, start_response):
self.req = Request(env)
resp = Response(request=self.req, body='FAKE APP',
resp = Response(request=self.req, body=b'FAKE APP',
headers=self.headers)
# like webob, middlewares in the pipeline may rewrite
# location header from relative to absolute

View File

@ -25,7 +25,7 @@ from swift.common.middleware import healthcheck
class FakeApp(object):
def __call__(self, env, start_response):
req = Request(env)
return Response(request=req, body='FAKE APP')(
return Response(request=req, body=b'FAKE APP')(
env, start_response)

View File

@ -1342,7 +1342,15 @@ class TestAccountAcls(unittest.TestCase):
resp = req.get_response(test_auth)
self.assertEqual(resp.status_int, 400)
self.assertTrue(resp.body.startswith(
errmsg % "Key 'other-auth-system' not recognized"), resp.body)
errmsg % 'Key "other-auth-system" not recognized'), resp.body)
# and do something sane with crazy data
update = {'x-account-access-control': u'{"\u1234": []}'.encode('utf8')}
req = self._make_request(target, headers=dict(good_headers, **update))
resp = req.get_response(test_auth)
self.assertEqual(resp.status_int, 400)
self.assertTrue(resp.body.startswith(
errmsg % 'Key "\\u1234" not recognized'), resp.body)
# acls with good keys but bad values also get a 400
update = {'x-account-access-control': bad_value_acl}
@ -1350,7 +1358,7 @@ class TestAccountAcls(unittest.TestCase):
resp = req.get_response(test_auth)
self.assertEqual(resp.status_int, 400)
self.assertTrue(resp.body.startswith(
errmsg % "Value for key 'admin' must be a list"), resp.body)
errmsg % 'Value for key "admin" must be a list'), resp.body)
# acls with non-string-types in list also get a 400
update = {'x-account-access-control': bad_list_types}
@ -1358,7 +1366,7 @@ class TestAccountAcls(unittest.TestCase):
resp = req.get_response(test_auth)
self.assertEqual(resp.status_int, 400)
self.assertTrue(resp.body.startswith(
errmsg % "Elements of 'read-only' list must be strings"),
errmsg % 'Elements of "read-only" list must be strings'),
resp.body)
# acls with wrong json structure also get a 400

View File

@ -832,7 +832,7 @@ class TestRequest(unittest.TestCase):
@swift.common.swob.wsgify
def _wsgi_func(req):
used_req.append(req)
return swift.common.swob.Response('200 OK')
return swift.common.swob.Response(b'200 OK')
req = swift.common.swob.Request.blank('/hi/there')
resp = req.get_response(_wsgi_func)
@ -1117,13 +1117,15 @@ class TestResponse(unittest.TestCase):
def test_empty_body(self):
resp = self._get_response()
resp.body = ''
resp.body = b''
self.assertEqual(resp.body, b'')
def test_unicode_body(self):
resp = self._get_response()
resp.body = u'\N{SNOWMAN}'
self.assertEqual(resp.body, u'\N{SNOWMAN}'.encode('utf-8'))
with self.assertRaises(TypeError) as catcher:
resp.body = u'\N{SNOWMAN}'
self.assertEqual(str(catcher.exception),
'WSGI responses must be bytes')
def test_call_reifies_request_if_necessary(self):
"""
@ -1388,7 +1390,7 @@ class TestResponse(unittest.TestCase):
'/', headers={'Range': 'bytes=1-3'})
resp = swift.common.swob.Response(
body='1234567890', request=req,
body=b'1234567890', request=req,
conditional_response=True)
body = b''.join(resp({}, start_response))
self.assertEqual(body, b'234')
@ -1408,7 +1410,7 @@ class TestResponse(unittest.TestCase):
self.assertEqual(resp.content_range, 'bytes */10')
resp = swift.common.swob.Response(
body='1234567890', request=req,
body=b'1234567890', request=req,
conditional_response=True)
body = b''.join(resp({}, start_response))
self.assertIn(b'The Range requested is not available', body)
@ -1426,7 +1428,7 @@ class TestResponse(unittest.TestCase):
self.assertNotIn('Content-Range', resp.headers)
resp = swift.common.swob.Response(
body='1234567890', request=req,
body=b'1234567890', request=req,
conditional_response=True)
body = b''.join(resp({}, start_response))
self.assertEqual(body, b'1234567890')
@ -1575,7 +1577,7 @@ class TestResponse(unittest.TestCase):
# body, headers with content_length and app_iter exist
resp = swift.common.swob.Response(
body='ok', headers={'Content-Length': '5'}, app_iter=iter([]))
body=b'ok', headers={'Content-Length': '5'}, app_iter=iter([]))
self.assertEqual(resp.content_length, 5)
self.assertEqual(resp.body, b'')