s3api: Fix non-ascii MPUs

Previous problems included:
  - returning wsgi strings quoted assuming UTF-8 on py3 when initiating
    or completing multipart uploads
  - trying to str() some unicode on py2 when listing parts, leading to
    UnicodeEncodeErrors

Change-Id: Ibc1d42c8deffe41c557350a574ae80751e9bd565
This commit is contained in:
Tim Burke 2022-01-25 11:35:42 -08:00
parent 8ef530d795
commit 5f25e1cc77
4 changed files with 38 additions and 13 deletions

View File

@ -144,7 +144,7 @@ def _make_complete_body(req, s3_etag, yielded_anything):
SubElement(result_elem, 'Location').text = host_url + req.path
SubElement(result_elem, 'Bucket').text = req.container_name
SubElement(result_elem, 'Key').text = req.object_name
SubElement(result_elem, 'Key').text = wsgi_to_str(req.object_name)
SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
body = tostring(result_elem, xml_declaration=not yielded_anything)
if yielded_anything:
@ -452,7 +452,7 @@ class UploadsController(Controller):
result_elem = Element('InitiateMultipartUploadResult')
SubElement(result_elem, 'Bucket').text = req.container_name
SubElement(result_elem, 'Key').text = req.object_name
SubElement(result_elem, 'Key').text = wsgi_to_str(req.object_name)
SubElement(result_elem, 'UploadId').text = upload_id
body = tostring(result_elem)
@ -498,9 +498,10 @@ class UploadController(Controller):
part_num_marker = req.get_validated_param(
'part-number-marker', 0)
object_name = wsgi_to_str(req.object_name)
query = {
'format': 'json',
'prefix': '%s/%s/' % (req.object_name, upload_id),
'prefix': '%s/%s/' % (object_name, upload_id),
'delimiter': '/',
'marker': '',
}
@ -516,7 +517,10 @@ class UploadController(Controller):
if not new_objects:
break
objects.extend(new_objects)
query['marker'] = new_objects[-1]['name']
if six.PY2:
query['marker'] = new_objects[-1]['name'].encode('utf-8')
else:
query['marker'] = new_objects[-1]['name']
last_part = 0
@ -542,10 +546,9 @@ class UploadController(Controller):
result_elem = Element('ListPartsResult')
SubElement(result_elem, 'Bucket').text = req.container_name
name = req.object_name
if encoding_type == 'url':
name = quote(name)
SubElement(result_elem, 'Key').text = name
object_name = quote(object_name)
SubElement(result_elem, 'Key').text = object_name
SubElement(result_elem, 'UploadId').text = upload_id
initiator_elem = SubElement(result_elem, 'Initiator')
@ -613,7 +616,10 @@ class UploadController(Controller):
container = req.container_name + MULTIUPLOAD_SUFFIX
obj = bytes_to_wsgi(o['name'].encode('utf-8'))
req.get_response(self.app, container=container, obj=obj)
query['marker'] = objects[-1]['name']
if six.PY2:
query['marker'] = objects[-1]['name'].encode('utf-8')
else:
query['marker'] = objects[-1]['name']
resp = req.get_response(self.app, 'GET', container, '',
query=query)
objects = json.loads(resp.body)

View File

@ -626,6 +626,7 @@ class S3Request(swob.Request):
return None
def _parse_uri(self):
# NB: returns WSGI strings
if not check_utf8(swob.wsgi_to_str(self.environ['PATH_INFO'])):
raise InvalidURI(self.path)

View File

@ -98,6 +98,13 @@ class Connection(object):
upload.cancel_upload()
for obj in bucket.list_versions():
if six.PY2:
if not isinstance(obj.name, bytes):
obj.name = obj.name.encode('utf-8')
if obj.version_id is not None and \
not isinstance(obj.version_id, bytes):
obj.version_id = \
obj.version_id.encode('utf-8')
bucket.delete_key(
obj.name, version_id=obj.version_id)

View File

@ -23,6 +23,7 @@ import boto
# pylint: disable-msg=E0611,F0401
from distutils.version import StrictVersion
import six
from six.moves import zip, zip_longest
import test.functional as tf
@ -111,7 +112,7 @@ class TestS3ApiMultiUpload(S3ApiBase):
def test_object_multi_upload(self):
bucket = 'bucket'
keys = ['obj1', 'obj2', 'obj3']
keys = [u'obj1\N{SNOWMAN}', 'obj2', 'obj3']
bad_content_md5 = base64.b64encode(b'a' * 16).strip().decode('ascii')
headers = [{'Content-Type': 'foo/bar', 'x-amz-meta-baz': 'quux'},
{'Content-MD5': bad_content_md5},
@ -133,6 +134,8 @@ class TestS3ApiMultiUpload(S3ApiBase):
elem = fromstring(body, 'InitiateMultipartUploadResult')
self.assertEqual(elem.find('Bucket').text, bucket)
key = elem.find('Key').text
if six.PY2:
expected_key = expected_key.encode('utf-8')
self.assertEqual(expected_key, key)
upload_id = elem.find('UploadId').text
self.assertIsNotNone(upload_id)
@ -200,7 +203,9 @@ class TestS3ApiMultiUpload(S3ApiBase):
# prepare src obj
self.conn.make_request('PUT', src_bucket)
self.conn.make_request('PUT', src_bucket, src_obj, body=src_content)
with self.quiet_boto_logging():
self.conn.make_request('PUT', src_bucket, src_obj,
body=src_content)
_, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj)
self.assertCommonResponseHeaders(headers)
@ -305,7 +310,8 @@ class TestS3ApiMultiUpload(S3ApiBase):
self.assertTrue(lines[0].endswith(b'?>'), body)
elem = fromstring(body, 'CompleteMultipartUploadResult')
self.assertEqual(
'%s/bucket/obj1' % tf.config['s3_storage_url'].rstrip('/'),
'%s/bucket/obj1%%E2%%98%%83' %
tf.config['s3_storage_url'].rstrip('/'),
elem.find('Location').text)
self.assertEqual(elem.find('Bucket').text, bucket)
self.assertEqual(elem.find('Key').text, key)
@ -346,7 +352,8 @@ class TestS3ApiMultiUpload(S3ApiBase):
self.assertTrue(lines[0].endswith(b'?>'), body)
elem = fromstring(body, 'CompleteMultipartUploadResult')
self.assertEqual(
'%s/bucket/obj1' % tf.config['s3_storage_url'].rstrip('/'),
'%s/bucket/obj1%%E2%%98%%83' %
tf.config['s3_storage_url'].rstrip('/'),
elem.find('Location').text)
self.assertEqual(elem.find('Bucket').text, bucket)
self.assertEqual(elem.find('Key').text, key)
@ -448,7 +455,11 @@ class TestS3ApiMultiUpload(S3ApiBase):
resp_objects = list(elem.findall('./Contents'))
self.assertEqual(len(resp_objects), 1)
o = resp_objects[0]
self.assertEqual(o.find('Key').text, keys[0])
if six.PY2:
expected_key = keys[0].encode('utf-8')
else:
expected_key = keys[0]
self.assertEqual(o.find('Key').text, expected_key)
self.assertIsNotNone(o.find('LastModified').text)
self.assertRegexpMatches(
o.find('LastModified').text,