swift/test/functional/s3api/test_multi_upload.py

1288 lines
55 KiB
Python
Raw Normal View History

# Copyright (c) 2015 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import binascii
import unittest
import six
from six.moves import urllib, zip, zip_longest
import test.functional as tf
from swift.common.middleware.s3api.etree import fromstring, tostring, \
Element, SubElement
from swift.common.middleware.s3api.utils import MULTIUPLOAD_SUFFIX, mktime, \
S3Timestamp
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
from swift.common.utils import md5
from test.functional.s3api import S3ApiBase, SigV4Mixin, \
skip_boto2_sort_header_bug
from test.functional.s3api.s3_test_client import Connection
from test.functional.s3api.utils import get_error_code, get_error_msg, \
calculate_md5
from test.functional.swift_test_client import Connection as SwiftConnection
def setUpModule():
tf.setup_package()
def tearDownModule():
tf.teardown_package()
class TestS3ApiMultiUpload(S3ApiBase):
def setUp(self):
super(TestS3ApiMultiUpload, self).setUp()
if not tf.cluster_info['s3api'].get('allow_multipart_uploads', False):
self.skipTest('multipart upload is not enebled')
self.min_segment_size = int(tf.cluster_info['s3api'].get(
'min_segment_size', 5242880))
def _gen_comp_xml(self, etags, step=1):
elem = Element('CompleteMultipartUpload')
for i, etag in enumerate(etags):
elem_part = SubElement(elem, 'Part')
SubElement(elem_part, 'PartNumber').text = str(i * step + 1)
SubElement(elem_part, 'ETag').text = etag
return tostring(elem)
def _initiate_multi_uploads_result_generator(self, bucket, keys,
headers=None, trials=1):
if headers is None:
headers = [None] * len(keys)
self.conn.make_request('PUT', bucket)
query = 'uploads'
for key, key_headers in zip_longest(keys, headers):
for i in range(trials):
status, resp_headers, body = \
self.conn.make_request('POST', bucket, key,
headers=key_headers, query=query)
yield status, resp_headers, body
def _upload_part(self, bucket, key, upload_id, content=None, part_num=1):
query = 'partNumber=%s&uploadId=%s' % (part_num, upload_id)
content = content if content else b'a' * self.min_segment_size
with self.quiet_boto_logging():
status, headers, body = self.conn.make_request(
'PUT', bucket, key, body=content, query=query)
return status, headers, body
def _upload_part_copy(self, src_bucket, src_obj, dst_bucket, dst_key,
upload_id, part_num=1, src_range=None,
src_version_id=None):
src_path = '%s/%s' % (src_bucket, src_obj)
if src_version_id:
src_path += '?versionId=%s' % src_version_id
query = 'partNumber=%s&uploadId=%s' % (part_num, upload_id)
req_headers = {'X-Amz-Copy-Source': src_path}
if src_range:
req_headers['X-Amz-Copy-Source-Range'] = src_range
status, headers, body = \
self.conn.make_request('PUT', dst_bucket, dst_key,
headers=req_headers,
query=query)
elem = fromstring(body, 'CopyPartResult')
etag = elem.find('ETag').text.strip('"')
return status, headers, body, etag
def _complete_multi_upload(self, bucket, key, upload_id, xml):
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
return status, headers, body
def test_object_multi_upload(self):
bucket = 'bucket'
keys = [u'obj1\N{SNOWMAN}', u'obj2\N{SNOWMAN}', 'obj3']
bad_content_md5 = base64.b64encode(b'a' * 16).strip().decode('ascii')
headers = [{'Content-Type': 'foo/bar', 'x-amz-meta-baz': 'quux',
'Content-Encoding': 'gzip', 'Content-Language': 'en-US',
'Expires': 'Thu, 01 Dec 1994 16:00:00 GMT',
'Cache-Control': 'no-cache',
'Content-Disposition': 'attachment'},
{'Content-MD5': bad_content_md5},
{'Etag': 'nonsense'}]
uploads = []
results_generator = self._initiate_multi_uploads_result_generator(
bucket, keys, headers=headers)
# Initiate Multipart Upload
for expected_key, (status, headers, body) in \
zip(keys, results_generator):
self.assertEqual(status, 200, body)
self.assertCommonResponseHeaders(headers)
self.assertIn('content-type', headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertIn('content-length', headers)
self.assertEqual(headers['content-length'], str(len(body)))
elem = fromstring(body, 'InitiateMultipartUploadResult')
self.assertEqual(elem.find('Bucket').text, bucket)
key = elem.find('Key').text
if six.PY2:
expected_key = expected_key.encode('utf-8')
self.assertEqual(expected_key, key)
upload_id = elem.find('UploadId').text
self.assertIsNotNone(upload_id)
self.assertNotIn((key, upload_id), uploads)
uploads.append((key, upload_id))
self.assertEqual(len(uploads), len(keys)) # sanity
# List Multipart Uploads
expected_uploads_list = [uploads]
for upload in uploads:
expected_uploads_list.append([upload])
for expected_uploads in expected_uploads_list:
query = 'uploads'
if len(expected_uploads) == 1:
query += '&' + urllib.parse.urlencode(
{'prefix': expected_uploads[0][0]})
status, headers, body = \
self.conn.make_request('GET', bucket, query=query)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
elem = fromstring(body, 'ListMultipartUploadsResult')
self.assertEqual(elem.find('Bucket').text, bucket)
self.assertIsNone(elem.find('KeyMarker').text)
if len(expected_uploads) > 1:
self.assertEqual(elem.find('NextKeyMarker').text,
expected_uploads[-1][0])
else:
self.assertIsNone(elem.find('NextKeyMarker').text)
self.assertIsNone(elem.find('UploadIdMarker').text)
if len(expected_uploads) > 1:
self.assertEqual(elem.find('NextUploadIdMarker').text,
expected_uploads[-1][1])
else:
self.assertIsNone(elem.find('NextUploadIdMarker').text)
self.assertEqual(elem.find('MaxUploads').text, '1000')
self.assertTrue(elem.find('EncodingType') is None)
self.assertEqual(elem.find('IsTruncated').text, 'false')
self.assertEqual(len(elem.findall('Upload')),
len(expected_uploads))
for (expected_key, expected_upload_id), u in \
zip(expected_uploads, elem.findall('Upload')):
key = u.find('Key').text
upload_id = u.find('UploadId').text
self.assertEqual(expected_key, key)
self.assertEqual(expected_upload_id, upload_id)
self.assertEqual(u.find('Initiator/ID').text,
self.conn.user_id)
self.assertEqual(u.find('Initiator/DisplayName').text,
self.conn.user_id)
self.assertEqual(u.find('Owner/ID').text, self.conn.user_id)
self.assertEqual(u.find('Owner/DisplayName').text,
self.conn.user_id)
self.assertEqual(u.find('StorageClass').text, 'STANDARD')
self.assertTrue(u.find('Initiated').text is not None)
# Upload Part
key, upload_id = uploads[0]
content = b'a' * self.min_segment_size
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
etag = md5(content, usedforsecurity=False).hexdigest()
status, headers, body = \
self._upload_part(bucket, key, upload_id, content)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers, etag)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'text/html; charset=UTF-8')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], '0')
expected_parts_list = [(headers['etag'],
mktime(headers['last-modified']))]
# Upload Part Copy
key, upload_id = uploads[1]
src_bucket = 'bucket2'
src_obj = 'obj3'
src_content = b'b' * self.min_segment_size
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
etag = md5(src_content, usedforsecurity=False).hexdigest()
# prepare src obj
self.conn.make_request('PUT', src_bucket)
with self.quiet_boto_logging():
self.conn.make_request('PUT', src_bucket, src_obj,
body=src_content)
_, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj)
self.assertCommonResponseHeaders(headers)
status, headers, body, resp_etag = \
self._upload_part_copy(src_bucket, src_obj, bucket,
key, upload_id)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
self.assertTrue('etag' not in headers)
elem = fromstring(body, 'CopyPartResult')
copy_resp_last_modified = elem.find('LastModified').text
self.assertIsNotNone(copy_resp_last_modified)
self.assertEqual(resp_etag, etag)
# Check last-modified timestamp
key, upload_id = uploads[1]
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('GET', bucket, key, query=query)
self.assertEqual(200, status)
elem = fromstring(body, 'ListPartsResult')
listing_last_modified = [p.find('LastModified').text
for p in elem.iterfind('Part')]
# There should be *exactly* one parts in the result
self.assertEqual(listing_last_modified, [copy_resp_last_modified])
# List Parts
key, upload_id = uploads[0]
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('GET', bucket, key, query=query)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
elem = fromstring(body, 'ListPartsResult')
self.assertEqual(elem.find('Bucket').text, bucket)
self.assertEqual(elem.find('Key').text, key)
self.assertEqual(elem.find('UploadId').text, upload_id)
self.assertEqual(elem.find('Initiator/ID').text, self.conn.user_id)
self.assertEqual(elem.find('Initiator/DisplayName').text,
self.conn.user_id)
self.assertEqual(elem.find('Owner/ID').text, self.conn.user_id)
self.assertEqual(elem.find('Owner/DisplayName').text,
self.conn.user_id)
self.assertEqual(elem.find('StorageClass').text, 'STANDARD')
self.assertEqual(elem.find('PartNumberMarker').text, '0')
self.assertEqual(elem.find('NextPartNumberMarker').text, '1')
self.assertEqual(elem.find('MaxParts').text, '1000')
self.assertEqual(elem.find('IsTruncated').text, 'false')
self.assertEqual(len(elem.findall('Part')), 1)
# etags will be used to generate xml for Complete Multipart Upload
etags = []
for (expected_etag, expected_date), p in \
zip(expected_parts_list, elem.findall('Part')):
last_modified = p.find('LastModified').text
self.assertIsNotNone(last_modified)
last_modified_from_xml = S3Timestamp.from_s3xmlformat(
last_modified)
self.assertEqual(expected_date, float(last_modified_from_xml))
self.assertEqual(expected_etag, p.find('ETag').text)
self.assertEqual(self.min_segment_size, int(p.find('Size').text))
etags.append(p.find('ETag').text)
# Complete Multipart Upload
key, upload_id = uploads[0]
xml = self._gen_comp_xml(etags)
status, headers, body = \
self._complete_multi_upload(bucket, key, upload_id, xml)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertIn('content-type', headers)
self.assertEqual(headers['content-type'], 'application/xml')
if 'content-length' in headers:
self.assertEqual(headers['content-length'], str(len(body)))
else:
self.assertIn('transfer-encoding', headers)
self.assertEqual(headers['transfer-encoding'], 'chunked')
lines = body.split(b'\n')
self.assertTrue(lines[0].startswith(b'<?xml'), body)
self.assertTrue(lines[0].endswith(b'?>'), body)
elem = fromstring(body, 'CompleteMultipartUploadResult')
self.assertEqual(
'%s/bucket/obj1%%E2%%98%%83' %
tf.config['s3_storage_url'].rstrip('/'),
elem.find('Location').text)
self.assertEqual(elem.find('Bucket').text, bucket)
self.assertEqual(elem.find('Key').text, key)
concatted_etags = b''.join(
etag.strip('"').encode('ascii') for etag in etags)
s3api: Include '-' in multipart ETags Multipart uploads in AWS (seem to) have ETags like: '"' + MD5_hex(MD5(part1) + ... + MD5(partN)) + '-' + N + '"' On the other hand, Swift SLOs have Etags like: MD5_hex(MD5_hex(part1) + ... + MD5_hex(partN)) (In both examples, MD5 gets the raw 16-byte digest while MD5_hex gets the 32-byte hex-encoded digest.) Some clients (such as aws-sdk-java) use the presence of a dash to decide whether to perform client-side validation of downloads. Other clients (like s3cmd) use the presence of a dash *in bucket listings* to decide whether or not to perform additional HEAD requests to look for MD5 metadata that can be used to compare against the MD5s of local files. Now we include a dash as well, to prevent spurious errors like > Unable to verify integrity of data download. Client calculated > content hash didn't match hash calculated by Amazon S3. The data > may be corrupt. or unnecessary uploads/downloads because the client assumes data has changed that hasn't. For new multipart-uploads via the S3 API, the ETag that is stored will be calculated in the same way that AWS uses. This ETag will be used in GET/HEAD responses, bucket listings, and conditional requests via the S3 API. Accessing the same object via the Swift API will use the SLO Etag; however, in JSON container listings the multipart upload etag will be exposed in a new "s3_etag" key. New SLOs and pre-existing multipart-uploads will continue to behave as before; there is no data migration or mitigation as part of this patch. Change-Id: Ibe68c44bef6c17605863e9084503e8f5dc577fab Closes-Bug: 1522578
2018-06-15 15:04:18 -07:00
exp_etag = '"%s-%s"' % (
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
md5(binascii.unhexlify(concatted_etags),
usedforsecurity=False).hexdigest(), len(etags))
s3api: Include '-' in multipart ETags Multipart uploads in AWS (seem to) have ETags like: '"' + MD5_hex(MD5(part1) + ... + MD5(partN)) + '-' + N + '"' On the other hand, Swift SLOs have Etags like: MD5_hex(MD5_hex(part1) + ... + MD5_hex(partN)) (In both examples, MD5 gets the raw 16-byte digest while MD5_hex gets the 32-byte hex-encoded digest.) Some clients (such as aws-sdk-java) use the presence of a dash to decide whether to perform client-side validation of downloads. Other clients (like s3cmd) use the presence of a dash *in bucket listings* to decide whether or not to perform additional HEAD requests to look for MD5 metadata that can be used to compare against the MD5s of local files. Now we include a dash as well, to prevent spurious errors like > Unable to verify integrity of data download. Client calculated > content hash didn't match hash calculated by Amazon S3. The data > may be corrupt. or unnecessary uploads/downloads because the client assumes data has changed that hasn't. For new multipart-uploads via the S3 API, the ETag that is stored will be calculated in the same way that AWS uses. This ETag will be used in GET/HEAD responses, bucket listings, and conditional requests via the S3 API. Accessing the same object via the Swift API will use the SLO Etag; however, in JSON container listings the multipart upload etag will be exposed in a new "s3_etag" key. New SLOs and pre-existing multipart-uploads will continue to behave as before; there is no data migration or mitigation as part of this patch. Change-Id: Ibe68c44bef6c17605863e9084503e8f5dc577fab Closes-Bug: 1522578
2018-06-15 15:04:18 -07:00
etag = elem.find('ETag').text
self.assertEqual(etag, exp_etag)
exp_size = self.min_segment_size * len(etags)
status, headers, body = \
self.conn.make_request('HEAD', bucket, key)
self.assertEqual(status, 200)
self.assertEqual(headers['content-length'], str(exp_size))
self.assertEqual(headers['content-type'], 'foo/bar')
self.assertEqual(headers['content-encoding'], 'gzip')
self.assertEqual(headers['content-language'], 'en-US')
self.assertEqual(headers['content-disposition'], 'attachment')
self.assertEqual(headers['expires'], 'Thu, 01 Dec 1994 16:00:00 GMT')
self.assertEqual(headers['cache-control'], 'no-cache')
self.assertEqual(headers['x-amz-meta-baz'], 'quux')
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
swift_etag = '"%s"' % md5(
concatted_etags, usedforsecurity=False).hexdigest()
s3api: Include '-' in multipart ETags Multipart uploads in AWS (seem to) have ETags like: '"' + MD5_hex(MD5(part1) + ... + MD5(partN)) + '-' + N + '"' On the other hand, Swift SLOs have Etags like: MD5_hex(MD5_hex(part1) + ... + MD5_hex(partN)) (In both examples, MD5 gets the raw 16-byte digest while MD5_hex gets the 32-byte hex-encoded digest.) Some clients (such as aws-sdk-java) use the presence of a dash to decide whether to perform client-side validation of downloads. Other clients (like s3cmd) use the presence of a dash *in bucket listings* to decide whether or not to perform additional HEAD requests to look for MD5 metadata that can be used to compare against the MD5s of local files. Now we include a dash as well, to prevent spurious errors like > Unable to verify integrity of data download. Client calculated > content hash didn't match hash calculated by Amazon S3. The data > may be corrupt. or unnecessary uploads/downloads because the client assumes data has changed that hasn't. For new multipart-uploads via the S3 API, the ETag that is stored will be calculated in the same way that AWS uses. This ETag will be used in GET/HEAD responses, bucket listings, and conditional requests via the S3 API. Accessing the same object via the Swift API will use the SLO Etag; however, in JSON container listings the multipart upload etag will be exposed in a new "s3_etag" key. New SLOs and pre-existing multipart-uploads will continue to behave as before; there is no data migration or mitigation as part of this patch. Change-Id: Ibe68c44bef6c17605863e9084503e8f5dc577fab Closes-Bug: 1522578
2018-06-15 15:04:18 -07:00
# TODO: GET via swift api, check against swift_etag
# Should be safe to retry
status, headers, body = \
self._complete_multi_upload(bucket, key, upload_id, xml)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertIn('content-type', headers)
self.assertEqual(headers['content-type'], 'application/xml')
if 'content-length' in headers:
self.assertEqual(headers['content-length'], str(len(body)))
else:
self.assertIn('transfer-encoding', headers)
self.assertEqual(headers['transfer-encoding'], 'chunked')
lines = body.split(b'\n')
self.assertTrue(lines[0].startswith(b'<?xml'), body)
self.assertTrue(lines[0].endswith(b'?>'), body)
elem = fromstring(body, 'CompleteMultipartUploadResult')
self.assertEqual(
'%s/bucket/obj1%%E2%%98%%83' %
tf.config['s3_storage_url'].rstrip('/'),
elem.find('Location').text)
self.assertEqual(elem.find('Bucket').text, bucket)
self.assertEqual(elem.find('Key').text, key)
self.assertEqual(elem.find('ETag').text, exp_etag)
status, headers, body = \
self.conn.make_request('HEAD', bucket, key)
self.assertEqual(status, 200)
self.assertEqual(headers['content-length'], str(exp_size))
self.assertEqual(headers['content-type'], 'foo/bar')
self.assertEqual(headers['x-amz-meta-baz'], 'quux')
# Upload Part Copy -- MU as source
key, upload_id = uploads[1]
status, headers, body, resp_etag = \
self._upload_part_copy(bucket, keys[0], bucket,
key, upload_id, part_num=2)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertIn('content-type', headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertIn('content-length', headers)
self.assertEqual(headers['content-length'], str(len(body)))
self.assertNotIn('etag', headers)
elem = fromstring(body, 'CopyPartResult')
last_modified = elem.find('LastModified').text
self.assertIsNotNone(last_modified)
exp_content = b'a' * self.min_segment_size
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
etag = md5(exp_content, usedforsecurity=False).hexdigest()
self.assertEqual(resp_etag, etag)
# Also check that the etag is correct in part listings
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('GET', bucket, key, query=query)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
elem = fromstring(body, 'ListPartsResult')
self.assertEqual(len(elem.findall('Part')), 2)
self.assertEqual(elem.findall('Part')[1].find('PartNumber').text, '2')
self.assertEqual(elem.findall('Part')[1].find('ETag').text,
'"%s"' % etag)
# Abort Multipart Uploads
# note that uploads[1] has part data while uploads[2] does not
sw_conn = SwiftConnection(tf.config)
sw_conn.authenticate()
for key, upload_id in uploads[1:]:
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('DELETE', bucket, key, query=query)
self.assertEqual(status, 204)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'],
'text/html; charset=UTF-8')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], '0')
# Check if all parts have been deleted
segments = sw_conn.get_account().container(
bucket + MULTIUPLOAD_SUFFIX).files(
parms={'prefix': '%s/%s' % (key, upload_id)})
self.assertFalse(segments)
s3api: Include '-' in multipart ETags Multipart uploads in AWS (seem to) have ETags like: '"' + MD5_hex(MD5(part1) + ... + MD5(partN)) + '-' + N + '"' On the other hand, Swift SLOs have Etags like: MD5_hex(MD5_hex(part1) + ... + MD5_hex(partN)) (In both examples, MD5 gets the raw 16-byte digest while MD5_hex gets the 32-byte hex-encoded digest.) Some clients (such as aws-sdk-java) use the presence of a dash to decide whether to perform client-side validation of downloads. Other clients (like s3cmd) use the presence of a dash *in bucket listings* to decide whether or not to perform additional HEAD requests to look for MD5 metadata that can be used to compare against the MD5s of local files. Now we include a dash as well, to prevent spurious errors like > Unable to verify integrity of data download. Client calculated > content hash didn't match hash calculated by Amazon S3. The data > may be corrupt. or unnecessary uploads/downloads because the client assumes data has changed that hasn't. For new multipart-uploads via the S3 API, the ETag that is stored will be calculated in the same way that AWS uses. This ETag will be used in GET/HEAD responses, bucket listings, and conditional requests via the S3 API. Accessing the same object via the Swift API will use the SLO Etag; however, in JSON container listings the multipart upload etag will be exposed in a new "s3_etag" key. New SLOs and pre-existing multipart-uploads will continue to behave as before; there is no data migration or mitigation as part of this patch. Change-Id: Ibe68c44bef6c17605863e9084503e8f5dc577fab Closes-Bug: 1522578
2018-06-15 15:04:18 -07:00
# Check object
def check_obj(req_headers, exp_status):
status, headers, body = \
self.conn.make_request('HEAD', bucket, keys[0], req_headers)
s3api: Include '-' in multipart ETags Multipart uploads in AWS (seem to) have ETags like: '"' + MD5_hex(MD5(part1) + ... + MD5(partN)) + '-' + N + '"' On the other hand, Swift SLOs have Etags like: MD5_hex(MD5_hex(part1) + ... + MD5_hex(partN)) (In both examples, MD5 gets the raw 16-byte digest while MD5_hex gets the 32-byte hex-encoded digest.) Some clients (such as aws-sdk-java) use the presence of a dash to decide whether to perform client-side validation of downloads. Other clients (like s3cmd) use the presence of a dash *in bucket listings* to decide whether or not to perform additional HEAD requests to look for MD5 metadata that can be used to compare against the MD5s of local files. Now we include a dash as well, to prevent spurious errors like > Unable to verify integrity of data download. Client calculated > content hash didn't match hash calculated by Amazon S3. The data > may be corrupt. or unnecessary uploads/downloads because the client assumes data has changed that hasn't. For new multipart-uploads via the S3 API, the ETag that is stored will be calculated in the same way that AWS uses. This ETag will be used in GET/HEAD responses, bucket listings, and conditional requests via the S3 API. Accessing the same object via the Swift API will use the SLO Etag; however, in JSON container listings the multipart upload etag will be exposed in a new "s3_etag" key. New SLOs and pre-existing multipart-uploads will continue to behave as before; there is no data migration or mitigation as part of this patch. Change-Id: Ibe68c44bef6c17605863e9084503e8f5dc577fab Closes-Bug: 1522578
2018-06-15 15:04:18 -07:00
self.assertEqual(status, exp_status)
self.assertCommonResponseHeaders(headers)
self.assertIn('content-length', headers)
if exp_status == 412:
self.assertNotIn('etag', headers)
self.assertEqual(headers['content-length'], '0')
else:
self.assertIn('etag', headers)
self.assertEqual(headers['etag'], exp_etag)
if exp_status == 304:
self.assertEqual(headers['content-length'], '0')
else:
self.assertEqual(headers['content-length'], str(exp_size))
check_obj({}, 200)
# Sanity check conditionals
check_obj({'If-Match': 'some other thing'}, 412)
check_obj({'If-None-Match': 'some other thing'}, 200)
# More interesting conditional cases
check_obj({'If-Match': exp_etag}, 200)
check_obj({'If-Match': swift_etag}, 412)
check_obj({'If-None-Match': swift_etag}, 200)
check_obj({'If-None-Match': exp_etag}, 304)
# Check listings
status, headers, body = self.conn.make_request('GET', bucket)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
resp_objects = list(elem.findall('./Contents'))
self.assertEqual(len(resp_objects), 1)
o = resp_objects[0]
if six.PY2:
expected_key = keys[0].encode('utf-8')
else:
expected_key = keys[0]
self.assertEqual(o.find('Key').text, expected_key)
self.assertIsNotNone(o.find('LastModified').text)
self.assertRegex(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.000Z$')
self.assertEqual(o.find('ETag').text, exp_etag)
self.assertEqual(o.find('Size').text, str(exp_size))
self.assertIsNotNone(o.find('StorageClass').text)
self.assertEqual(o.find('Owner/ID').text, self.conn.user_id)
self.assertEqual(o.find('Owner/DisplayName').text,
self.conn.user_id)
def test_initiate_multi_upload_error(self):
bucket = 'bucket'
key = 'obj'
self.conn.make_request('PUT', bucket)
query = 'uploads'
auth_error_conn = Connection(tf.config['s3_access_key'], 'invalid')
status, headers, body = \
auth_error_conn.make_request('POST', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch')
status, resp_headers, body = \
self.conn.make_request('POST', 'nothing', key, query=query)
self.assertEqual(get_error_code(body), 'NoSuchBucket')
status, resp_headers, body = self.conn.make_request(
'POST', bucket,
'x' * (tf.cluster_info['swift']['max_object_name_length'] + 1),
query=query)
self.assertEqual(get_error_code(body), 'KeyTooLongError')
def test_list_multi_uploads_error(self):
bucket = 'bucket'
self.conn.make_request('PUT', bucket)
query = 'uploads'
auth_error_conn = Connection(tf.config['s3_access_key'], 'invalid')
status, headers, body = \
auth_error_conn.make_request('GET', bucket, query=query)
self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch')
status, headers, body = \
self.conn.make_request('GET', 'nothing', query=query)
self.assertEqual(get_error_code(body), 'NoSuchBucket')
def test_upload_part_error(self):
bucket = 'bucket'
self.conn.make_request('PUT', bucket)
query = 'uploads'
key = 'obj'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
query = 'partNumber=%s&uploadId=%s' % (1, upload_id)
auth_error_conn = Connection(tf.config['s3_access_key'], 'invalid')
status, headers, body = \
auth_error_conn.make_request('PUT', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch')
status, headers, body = \
self.conn.make_request('PUT', 'nothing', key, query=query)
self.assertEqual(get_error_code(body), 'NoSuchBucket')
query = 'partNumber=%s&uploadId=%s' % (1, 'nothing')
status, headers, body = \
self.conn.make_request('PUT', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'NoSuchUpload')
query = 'partNumber=%s&uploadId=%s' % (0, upload_id)
status, headers, body = \
self.conn.make_request('PUT', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'InvalidArgument')
err_msg = 'Part number must be an integer between 1 and'
self.assertTrue(err_msg in get_error_msg(body))
def test_upload_part_copy_error(self):
src_bucket = 'src'
src_obj = 'src'
self.conn.make_request('PUT', src_bucket)
self.conn.make_request('PUT', src_bucket, src_obj)
src_path = '%s/%s' % (src_bucket, src_obj)
bucket = 'bucket'
self.conn.make_request('PUT', bucket)
key = 'obj'
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
query = 'partNumber=%s&uploadId=%s' % (1, upload_id)
auth_error_conn = Connection(tf.config['s3_access_key'], 'invalid')
status, headers, body = \
auth_error_conn.make_request('PUT', bucket, key,
headers={
'X-Amz-Copy-Source': src_path
},
query=query)
self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch')
status, headers, body = \
self.conn.make_request('PUT', 'nothing', key,
headers={'X-Amz-Copy-Source': src_path},
query=query)
self.assertEqual(get_error_code(body), 'NoSuchBucket')
query = 'partNumber=%s&uploadId=%s' % (1, 'nothing')
status, headers, body = \
self.conn.make_request('PUT', bucket, key,
headers={'X-Amz-Copy-Source': src_path},
query=query)
self.assertEqual(get_error_code(body), 'NoSuchUpload')
src_path = '%s/%s' % (src_bucket, 'nothing')
query = 'partNumber=%s&uploadId=%s' % (1, upload_id)
status, headers, body = \
self.conn.make_request('PUT', bucket, key,
headers={'X-Amz-Copy-Source': src_path},
query=query)
self.assertEqual(get_error_code(body), 'NoSuchKey')
def test_list_parts_error(self):
bucket = 'bucket'
self.conn.make_request('PUT', bucket)
key = 'obj'
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
query = 'uploadId=%s' % upload_id
auth_error_conn = Connection(tf.config['s3_access_key'], 'invalid')
status, headers, body = \
auth_error_conn.make_request('GET', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch')
status, headers, body = \
self.conn.make_request('GET', 'nothing', key, query=query)
self.assertEqual(get_error_code(body), 'NoSuchBucket')
query = 'uploadId=%s' % 'nothing'
status, headers, body = \
self.conn.make_request('GET', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'NoSuchUpload')
def test_abort_multi_upload_error(self):
bucket = 'bucket'
self.conn.make_request('PUT', bucket)
key = 'obj'
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
self._upload_part(bucket, key, upload_id)
query = 'uploadId=%s' % upload_id
auth_error_conn = Connection(tf.config['s3_access_key'], 'invalid')
status, headers, body = \
auth_error_conn.make_request('DELETE', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch')
status, headers, body = \
self.conn.make_request('DELETE', 'nothing', key, query=query)
self.assertEqual(get_error_code(body), 'NoSuchBucket')
status, headers, body = \
self.conn.make_request('DELETE', bucket, 'nothing', query=query)
self.assertEqual(get_error_code(body), 'NoSuchUpload')
query = 'uploadId=%s' % 'nothing'
status, headers, body = \
self.conn.make_request('DELETE', bucket, key, query=query)
self.assertEqual(get_error_code(body), 'NoSuchUpload')
def test_complete_multi_upload_error(self):
bucket = 'bucket'
keys = ['obj', 'obj2']
self.conn.make_request('PUT', bucket)
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, keys[0], query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
etags = []
for i in range(1, 3):
query = 'partNumber=%s&uploadId=%s' % (i, upload_id)
status, headers, body = \
self.conn.make_request('PUT', bucket, keys[0], query=query)
etags.append(headers['etag'])
xml = self._gen_comp_xml(etags)
# part 1 too small
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('POST', bucket, keys[0], body=xml,
query=query)
self.assertEqual(get_error_code(body), 'EntityTooSmall')
# invalid credentials
auth_error_conn = Connection(tf.config['s3_access_key'], 'invalid')
status, headers, body = \
auth_error_conn.make_request('POST', bucket, keys[0], body=xml,
query=query)
self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch')
# wrong/missing bucket
status, headers, body = \
self.conn.make_request('POST', 'nothing', keys[0], query=query)
self.assertEqual(get_error_code(body), 'NoSuchBucket')
# wrong upload ID
query = 'uploadId=%s' % 'nothing'
status, headers, body = \
self.conn.make_request('POST', bucket, keys[0], body=xml,
query=query)
self.assertEqual(get_error_code(body), 'NoSuchUpload')
# without Part tag in xml
query = 'uploadId=%s' % upload_id
xml = self._gen_comp_xml([])
status, headers, body = \
self.conn.make_request('POST', bucket, keys[0], body=xml,
query=query)
self.assertEqual(get_error_code(body), 'MalformedXML')
# with invalid etag in xml
invalid_etag = 'invalid'
xml = self._gen_comp_xml([invalid_etag])
status, headers, body = \
self.conn.make_request('POST', bucket, keys[0], body=xml,
query=query)
self.assertEqual(get_error_code(body), 'InvalidPart')
# without part in Swift
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, keys[1], query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
query = 'uploadId=%s' % upload_id
xml = self._gen_comp_xml([etags[0]])
status, headers, body = \
self.conn.make_request('POST', bucket, keys[1], body=xml,
query=query)
self.assertEqual(get_error_code(body), 'InvalidPart')
def test_complete_upload_min_segment_size(self):
bucket = 'bucket'
key = 'obj'
self.conn.make_request('PUT', bucket)
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
# multi parts with no body
etags = []
for i in range(1, 3):
query = 'partNumber=%s&uploadId=%s' % (i, upload_id)
status, headers, body = \
self.conn.make_request('PUT', bucket, key, query=query)
etags.append(headers['etag'])
xml = self._gen_comp_xml(etags)
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
self.assertEqual(get_error_code(body), 'EntityTooSmall')
# multi parts with all parts less than min segment size
etags = []
for i in range(1, 3):
query = 'partNumber=%s&uploadId=%s' % (i, upload_id)
status, headers, body = \
self.conn.make_request('PUT', bucket, key, query=query,
body='AA')
etags.append(headers['etag'])
xml = self._gen_comp_xml(etags)
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
self.assertEqual(get_error_code(body), 'EntityTooSmall')
# one part and less than min segment size
etags = []
query = 'partNumber=1&uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('PUT', bucket, key, query=query,
body='AA')
etags.append(headers['etag'])
xml = self._gen_comp_xml(etags)
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
self.assertEqual(status, 200)
# multi parts with all parts except the first part less than min
# segment size
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
etags = []
body_size = [self.min_segment_size, self.min_segment_size - 1, 2]
for i in range(1, 3):
query = 'partNumber=%s&uploadId=%s' % (i, upload_id)
status, headers, body = \
self.conn.make_request('PUT', bucket, key, query=query,
body=b'A' * body_size[i])
etags.append(headers['etag'])
xml = self._gen_comp_xml(etags)
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
self.assertEqual(get_error_code(body), 'EntityTooSmall')
# multi parts with all parts except last part more than min segment
# size
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
etags = []
body_size = [self.min_segment_size, self.min_segment_size, 2]
for i in range(1, 3):
query = 'partNumber=%s&uploadId=%s' % (i, upload_id)
status, headers, body = \
self.conn.make_request('PUT', bucket, key, query=query,
body=b'A' * body_size[i])
etags.append(headers['etag'])
xml = self._gen_comp_xml(etags)
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
self.assertEqual(status, 200)
def test_complete_upload_with_fewer_etags(self):
bucket = 'bucket'
key = 'obj'
self.conn.make_request('PUT', bucket)
query = 'uploads'
status, headers, body = \
self.conn.make_request('POST', bucket, key, query=query)
elem = fromstring(body, 'InitiateMultipartUploadResult')
upload_id = elem.find('UploadId').text
etags = []
for i in range(1, 4):
query = 'partNumber=%s&uploadId=%s' % (2 * i - 1, upload_id)
status, headers, body = self.conn.make_request(
'PUT', bucket, key, body=b'A' * 1024 * 1024 * 5,
query=query)
etags.append(headers['etag'])
query = 'uploadId=%s' % upload_id
xml = self._gen_comp_xml(etags[:-1], step=2)
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
self.assertEqual(status, 200)
def _initiate_mpu_upload(self, bucket, key):
keys = [key]
uploads = []
results_generator = self._initiate_multi_uploads_result_generator(
bucket, keys)
# Initiate Multipart Upload
for expected_key, (status, headers, body) in \
zip(keys, results_generator):
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
elem = fromstring(body, 'InitiateMultipartUploadResult')
self.assertEqual(elem.find('Bucket').text, bucket)
key = elem.find('Key').text
self.assertEqual(expected_key, key)
upload_id = elem.find('UploadId').text
self.assertTrue(upload_id is not None)
self.assertTrue((key, upload_id) not in uploads)
uploads.append((key, upload_id))
# sanity, there's just one multi-part upload
self.assertEqual(1, len(uploads))
self.assertEqual(1, len(keys))
_, upload_id = uploads[0]
return upload_id
def _copy_part_from_new_src_range(self, bucket, key, upload_id):
src_bucket = 'bucket2'
src_obj = 'obj4'
src_content = b'y' * (self.min_segment_size // 2) + b'z' * \
self.min_segment_size
src_range = 'bytes=0-%d' % (self.min_segment_size - 1)
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
etag = md5(
src_content[:self.min_segment_size],
usedforsecurity=False).hexdigest()
# prepare src obj
self.conn.make_request('PUT', src_bucket)
self.conn.make_request('PUT', src_bucket, src_obj, body=src_content)
_, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj)
self.assertCommonResponseHeaders(headers)
status, headers, body, resp_etag = \
self._upload_part_copy(src_bucket, src_obj, bucket,
key, upload_id, 1, src_range)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
self.assertTrue('etag' not in headers)
elem = fromstring(body, 'CopyPartResult')
etags = [elem.find('ETag').text]
copy_resp_last_modified = elem.find('LastModified').text
self.assertIsNotNone(copy_resp_last_modified)
self.assertEqual(resp_etag, etag)
# Check last-modified timestamp
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('GET', bucket, key, query=query)
elem = fromstring(body, 'ListPartsResult')
listing_last_modified = [p.find('LastModified').text
for p in elem.iterfind('Part')]
# There should be *exactly* one parts in the result
self.assertEqual(listing_last_modified, [copy_resp_last_modified])
# sanity, there's just one etag
self.assertEqual(1, len(etags))
return etags[0]
def _complete_mpu_upload(self, bucket, key, upload_id, etags):
# Complete Multipart Upload
query = 'uploadId=%s' % upload_id
xml = self._gen_comp_xml(etags)
status, headers, body = \
self.conn.make_request('POST', bucket, key, body=xml,
query=query)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
if 'content-length' in headers:
self.assertEqual(headers['content-length'], str(len(body)))
else:
self.assertIn('transfer-encoding', headers)
self.assertEqual(headers['transfer-encoding'], 'chunked')
lines = body.split(b'\n')
self.assertTrue(lines[0].startswith(b'<?xml'), body)
self.assertTrue(lines[0].endswith(b'?>'), body)
elem = fromstring(body, 'CompleteMultipartUploadResult')
self.assertEqual(
'%s/%s/%s' %
(tf.config['s3_storage_url'].rstrip('/'), bucket, key),
elem.find('Location').text)
self.assertEqual(elem.find('Bucket').text, bucket)
self.assertEqual(elem.find('Key').text, key)
concatted_etags = b''.join(
etag.strip('"').encode('ascii') for etag in etags)
exp_etag = '"%s-%s"' % (
md5(binascii.unhexlify(concatted_etags),
usedforsecurity=False).hexdigest(), len(etags))
etag = elem.find('ETag').text
self.assertEqual(etag, exp_etag)
@skip_boto2_sort_header_bug
def test_mpu_copy_part_from_range_then_complete(self):
bucket = 'mpu-copy-range'
key = 'obj-complete'
upload_id = self._initiate_mpu_upload(bucket, key)
etag = self._copy_part_from_new_src_range(bucket, key, upload_id)
self._complete_mpu_upload(bucket, key, upload_id, [etag])
@skip_boto2_sort_header_bug
def test_mpu_copy_part_from_range_then_abort(self):
bucket = 'mpu-copy-range'
key = 'obj-abort'
upload_id = self._initiate_mpu_upload(bucket, key)
self._copy_part_from_new_src_range(bucket, key, upload_id)
# Abort Multipart Upload
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('DELETE', bucket, key, query=query)
# sanity checks
self.assertEqual(status, 204)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'text/html; charset=UTF-8')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], '0')
def _copy_part_from_new_mpu_range(self, bucket, key, upload_id):
src_bucket = 'bucket2'
src_obj = 'mpu2'
src_upload_id = self._initiate_mpu_upload(src_bucket, src_obj)
# upload parts
etags = []
for part_num in range(2):
# Upload Part
content = (chr(97 + part_num) * self.min_segment_size).encode()
etag = md5(content, usedforsecurity=False).hexdigest()
status, headers, body = \
self._upload_part(src_bucket, src_obj, src_upload_id,
content, part_num=part_num + 1)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers, etag)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'],
'text/html; charset=UTF-8')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], '0')
self.assertEqual(headers['etag'], '"%s"' % etag)
etags.append(etag)
self._complete_mpu_upload(src_bucket, src_obj, src_upload_id, etags)
# Upload Part Copy -- MPU as source
src_range = 'bytes=0-%d' % (self.min_segment_size - 1)
status, headers, body, resp_etag = \
self._upload_part_copy(src_bucket, src_obj, bucket,
key, upload_id, part_num=1,
src_range=src_range)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertIn('content-type', headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertIn('content-length', headers)
self.assertEqual(headers['content-length'], str(len(body)))
self.assertNotIn('etag', headers)
elem = fromstring(body, 'CopyPartResult')
last_modified = elem.find('LastModified').text
self.assertIsNotNone(last_modified)
# use copied with src_range from src_obj?part-number=1
self.assertEqual(resp_etag, etags[0])
return resp_etag
@skip_boto2_sort_header_bug
def test_mpu_copy_part_from_mpu_part_number_then_complete(self):
bucket = 'mpu-copy-range'
key = 'obj-complete'
upload_id = self._initiate_mpu_upload(bucket, key)
etag = self._copy_part_from_new_mpu_range(bucket, key, upload_id)
self._complete_mpu_upload(bucket, key, upload_id, [etag])
@skip_boto2_sort_header_bug
def test_mpu_copy_part_from_mpu_part_number_then_abort(self):
bucket = 'mpu-copy-range'
key = 'obj-abort'
upload_id = self._initiate_mpu_upload(bucket, key)
self._copy_part_from_new_mpu_range(bucket, key, upload_id)
# Abort Multipart Upload
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('DELETE', bucket, key, query=query)
# sanity checks
self.assertEqual(status, 204)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'text/html; charset=UTF-8')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], '0')
def test_object_multi_upload_part_copy_version(self):
if 'object_versioning' not in tf.cluster_info:
self.skipTest('Object Versioning not enabled')
bucket = 'bucket'
keys = ['obj1']
uploads = []
results_generator = self._initiate_multi_uploads_result_generator(
bucket, keys)
# Initiate Multipart Upload
for expected_key, (status, headers, body) in \
zip(keys, results_generator):
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
elem = fromstring(body, 'InitiateMultipartUploadResult')
self.assertEqual(elem.find('Bucket').text, bucket)
key = elem.find('Key').text
self.assertEqual(expected_key, key)
upload_id = elem.find('UploadId').text
self.assertTrue(upload_id is not None)
self.assertTrue((key, upload_id) not in uploads)
uploads.append((key, upload_id))
self.assertEqual(len(uploads), len(keys)) # sanity
key, upload_id = uploads[0]
src_bucket = 'bucket2'
src_obj = 'obj4'
src_content = b'y' * (self.min_segment_size // 2) + b'z' * \
self.min_segment_size
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
etags = [md5(src_content, usedforsecurity=False).hexdigest()]
# prepare null-version src obj
self.conn.make_request('PUT', src_bucket)
self.conn.make_request('PUT', src_bucket, src_obj, body=src_content)
_, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj)
self.assertCommonResponseHeaders(headers)
# Turn on versioning
elem = Element('VersioningConfiguration')
SubElement(elem, 'Status').text = 'Enabled'
xml = tostring(elem)
status, headers, body = self.conn.make_request(
'PUT', src_bucket, body=xml, query='versioning')
self.assertEqual(status, 200)
src_obj2 = 'obj5'
src_content2 = b'stub'
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
etags.append(md5(src_content2, usedforsecurity=False).hexdigest())
# prepare src obj w/ real version
self.conn.make_request('PUT', src_bucket, src_obj2, body=src_content2)
_, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj2)
self.assertCommonResponseHeaders(headers)
version_id2 = headers['x-amz-version-id']
status, headers, body, resp_etag = \
self._upload_part_copy(src_bucket, src_obj, bucket,
key, upload_id, 1,
src_version_id='null')
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
self.assertTrue('etag' not in headers)
elem = fromstring(body, 'CopyPartResult')
copy_resp_last_modifieds = [elem.find('LastModified').text]
self.assertTrue(copy_resp_last_modifieds[0] is not None)
self.assertEqual(resp_etag, etags[0])
status, headers, body, resp_etag = \
self._upload_part_copy(src_bucket, src_obj2, bucket,
key, upload_id, 2,
src_version_id=version_id2)
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'application/xml')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], str(len(body)))
self.assertTrue('etag' not in headers)
elem = fromstring(body, 'CopyPartResult')
copy_resp_last_modifieds.append(elem.find('LastModified').text)
self.assertTrue(copy_resp_last_modifieds[1] is not None)
self.assertEqual(resp_etag, etags[1])
# Check last-modified timestamp
key, upload_id = uploads[0]
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('GET', bucket, key, query=query)
elem = fromstring(body, 'ListPartsResult')
listing_last_modified = [p.find('LastModified').text
for p in elem.iterfind('Part')]
self.assertEqual(listing_last_modified, copy_resp_last_modifieds)
# Abort Multipart Upload
key, upload_id = uploads[0]
query = 'uploadId=%s' % upload_id
status, headers, body = \
self.conn.make_request('DELETE', bucket, key, query=query)
# sanity checks
self.assertEqual(status, 204)
self.assertCommonResponseHeaders(headers)
self.assertTrue('content-type' in headers)
self.assertEqual(headers['content-type'], 'text/html; charset=UTF-8')
self.assertTrue('content-length' in headers)
self.assertEqual(headers['content-length'], '0')
def test_delete_bucket_multi_upload_object_exisiting(self):
bucket = 'bucket'
keys = ['obj1']
uploads = []
results_generator = self._initiate_multi_uploads_result_generator(
bucket, keys)
# Initiate Multipart Upload
for expected_key, (status, _, body) in \
zip(keys, results_generator):
self.assertEqual(status, 200) # sanity
elem = fromstring(body, 'InitiateMultipartUploadResult')
key = elem.find('Key').text
self.assertEqual(expected_key, key) # sanity
upload_id = elem.find('UploadId').text
self.assertTrue(upload_id is not None) # sanity
self.assertTrue((key, upload_id) not in uploads)
uploads.append((key, upload_id))
self.assertEqual(len(uploads), len(keys)) # sanity
# Upload Part
key, upload_id = uploads[0]
content = b'a' * self.min_segment_size
status, headers, body = \
self._upload_part(bucket, key, upload_id, content)
self.assertEqual(status, 200)
# Complete Multipart Upload
key, upload_id = uploads[0]
replace md5 with swift utils version md5 is not an approved algorithm in FIPS mode, and trying to instantiate a hashlib.md5() will fail when the system is running in FIPS mode. md5 is allowed when in a non-security context. There is a plan to add a keyword parameter (usedforsecurity) to hashlib.md5() to annotate whether or not the instance is being used in a security context. In the case where it is not, the instantiation of md5 will be allowed. See https://bugs.python.org/issue9216 for more details. Some downstream python versions already support this parameter. To support these versions, a new encapsulation of md5() is added to swift/common/utils.py. This encapsulation is identical to the one being added to oslo.utils, but is recreated here to avoid adding a dependency. This patch is to replace the instances of hashlib.md5() with this new encapsulation, adding an annotation indicating whether the usage is a security context or not. While this patch seems large, it is really just the same change over and again. Reviewers need to pay particular attention as to whether the keyword parameter (usedforsecurity) is set correctly. Right now, all of them appear to be not used in a security context. Now that all the instances have been converted, we can update the bandit run to look for these instances and ensure that new invocations do not creep in. With this latest patch, the functional and unit tests all pass on a FIPS enabled system. Co-Authored-By: Pete Zaitcev Change-Id: Ibb4917da4c083e1e094156d748708b87387f2d87
2020-09-11 16:28:11 -04:00
etags = [md5(content, usedforsecurity=False).hexdigest()]
xml = self._gen_comp_xml(etags)
status, headers, body = \
self._complete_multi_upload(bucket, key, upload_id, xml)
self.assertEqual(status, 200) # sanity
# GET multipart object
status, headers, body = \
self.conn.make_request('GET', bucket, key)
self.assertEqual(status, 200) # sanity
self.assertEqual(content, body) # sanity
# DELETE bucket while the object existing
status, headers, body = \
self.conn.make_request('DELETE', bucket)
self.assertEqual(status, 409) # sanity
# The object must still be there.
status, headers, body = \
self.conn.make_request('GET', bucket, key)
self.assertEqual(status, 200) # sanity
self.assertEqual(content, body) # sanity
# Can delete it with DeleteMultipleObjects request
elem = Element('Delete')
SubElement(elem, 'Quiet').text = 'true'
obj_elem = SubElement(elem, 'Object')
SubElement(obj_elem, 'Key').text = key
body = tostring(elem, use_s3ns=False)
status, headers, body = self.conn.make_request(
'POST', bucket, body=body, query='delete',
headers={'Content-MD5': calculate_md5(body)})
self.assertEqual(status, 200)
self.assertCommonResponseHeaders(headers)
status, headers, body = \
self.conn.make_request('GET', bucket, key)
self.assertEqual(status, 404) # sanity
# Now we can delete
status, headers, body = \
self.conn.make_request('DELETE', bucket)
self.assertEqual(status, 204) # sanity
class TestS3ApiMultiUploadSigV4(TestS3ApiMultiUpload, SigV4Mixin):
pass
if __name__ == '__main__':
unittest.main()