From 1a27d1b83ff6567742478a0e4860cd0995938203 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Wed, 11 Jun 2025 18:59:13 +0100 Subject: [PATCH] s3api: fix multi-upload BadDigest error S3 includes the expected base64 digest in a BadDigest response to a multipart complete POST request. Co-Authored-By: Tim Burke Change-Id: Ie20ccf10846854f375c29be1b0b00b8eaacc9afa --- .../s3api/controllers/multi_upload.py | 7 +- test/s3api/test_input_errors.py | 149 ++++++++++++++++-- 2 files changed, 144 insertions(+), 12 deletions(-) diff --git a/swift/common/middleware/s3api/controllers/multi_upload.py b/swift/common/middleware/s3api/controllers/multi_upload.py index 565c625351..edb4bb2f51 100644 --- a/swift/common/middleware/s3api/controllers/multi_upload.py +++ b/swift/common/middleware/s3api/controllers/multi_upload.py @@ -700,9 +700,10 @@ class UploadController(Controller): if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag - if req.headers['etag'] != md5( - xml, usedforsecurity=False).hexdigest(): - raise BadDigest(content_md5=req.headers['content-md5']) + md5_body = md5(xml, usedforsecurity=False).hexdigest() + if req.headers['etag'] != md5_body: + raise BadDigest( + expected_digest=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server diff --git a/test/s3api/test_input_errors.py b/test/s3api/test_input_errors.py index b93f885f6c..998cf8ec52 100644 --- a/test/s3api/test_input_errors.py +++ b/test/s3api/test_input_errors.py @@ -29,7 +29,7 @@ from urllib.parse import urlsplit, urlunsplit, quote from swift.common import bufferedhttp from swift.common.utils.ipaddrs import parse_socket_string -from test.s3api import BaseS3TestCaseWithBucket, get_opt +from test.s3api import BaseS3TestCaseWithBucket, get_opt, get_s3_client def _hmac(key, message, digest): @@ -65,6 +65,7 @@ EPOCH = datetime.datetime.fromtimestamp(0, datetime.timezone.utc) class S3Session(object): bucket_in_host = False default_expiration = 900 # 15 min + ignored_auth_query_params = frozenset() def __init__( self, @@ -202,10 +203,16 @@ class S3SessionV2(S3Session): string_to_sign_lines.extend('%s:%s' % (h, v) for h, v in amz_headers) - string_to_sign_lines.append( - ('/' + request['bucket'] if self.bucket_in_host else '') - + request['path'] - ) + resource = '/' + request['bucket'] if self.bucket_in_host else '' + resource += request['path'] + query_to_sign = {k: v for k, v in request['query'].items() + if k not in self.ignored_auth_query_params} + if query_to_sign: + resource += '?' + '&'.join( + '%s=%s' % (k, v) + for k, v in sorted(query_to_sign.items())) + string_to_sign_lines.append(resource) + signature = base64.b64encode(_hmac( self.secret_key, '\n'.join(string_to_sign_lines), @@ -233,6 +240,9 @@ class S3SessionV2Headers(S3SessionV2): class S3SessionV2Query(S3SessionV2): + ignored_auth_query_params = frozenset({ + 'Expires', 'AWSAccessKeyId', 'Signature'}) + def build_request( self, bucket=None, @@ -415,6 +425,7 @@ class S3SessionV4Headers(S3SessionV4): class S3SessionV4Query(S3SessionV4): + # Note that v4 doesn't ignore any auth query params when signing def build_request( self, bucket=None, @@ -546,7 +557,19 @@ class InputErrorsMixin(object): # self.assertIn('%s' % md5_in_headers, # respbody) - def assertBadDigest(self, resp, md5_in_headers, md5_of_body): + def assertBadDigest(self, resp, md5_in_headers, md5_of_body, + expected_digest_should_be_hex=True): + """ + Check that the response is a well-formed BadDigest error + + :param resp: the ``requests`` response + :param md5_in_headers: the base64-encoded content-md5 sent in the + request headers + :param md5_of_body: the base64-encoded MD5 of the actual request body + :param expected_digest_should_be_hex: whether the in + the response should be hex-encoded; if not, expect it to be + base64-encoded + """ respbody = resp.content if not isinstance(respbody, str): respbody = respbody.decode('utf8') @@ -558,9 +581,15 @@ class InputErrorsMixin(object): self.assertIn("The Content-MD5 you specified did not match " "what we received.", respbody) - # Yes, really -- AWS needs b64 in headers, but reflects back hex - self.assertIn('%s' % binascii.hexlify( - base64.b64decode(md5_in_headers)).decode('ascii'), respbody) + exp_digest = md5_in_headers + if expected_digest_should_be_hex: + # AWS needs b64 in headers, but is inconsistent in what it returns + # in the element: sometimes hex, sometimes b64 + exp_digest = binascii.hexlify( + base64.b64decode(exp_digest)).decode('ascii') + self.assertIn('%s' % exp_digest, + respbody) + # AWS always returns b64 in the element # TODO: AWS provides this, but swift doesn't (yet) # self.assertIn('%s' # % md5_of_body, respbody) @@ -1233,6 +1262,108 @@ class InputErrorsMixin(object): headers={'x-amz-content-sha256': 'UNSIGNED-PAYLOAD'}) self.assertOK(resp) + def _setup_mpu(self, key): + # create an mpu and upload a part + client = get_s3_client(1) + create_mpu_resp = client.create_multipart_upload( + Bucket=self.bucket_name, Key=key) + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + part_resp = client.upload_part( + Body='x' * 1024, + Bucket=self.bucket_name, + Key=key, + PartNumber=1, + UploadId=upload_id) + self.assertEqual(200, part_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + complete_request_body = ( + '\n' + '' + '' + '1' + '%s' + '' + '' % part_resp['ETag'] + ).encode('utf-8') + return upload_id, complete_request_body + + def test_good_md5_good_sha_good_crc_header_mpu(self): + key = 'mpu-name' + upload_id, complete_mpu_body = self._setup_mpu(key) + resp = self.conn.make_request( + self.bucket_name, + key, + query={'uploadId': upload_id}, + method='POST', + body=complete_mpu_body, + headers={ + 'content-md5': _md5(complete_mpu_body), + 'x-amz-content-sha256': _sha256(complete_mpu_body), + 'x-amz-checksum-crc32': _crc32(complete_mpu_body), + } + ) + self.assertEqual(resp.status_code, 200, resp.content) + self.assertIn(b'CompleteMultipartUploadResult', resp.content) + + def test_bad_md5_good_sha_good_crc_header_mpu(self): + key = 'mpu-name' + upload_id, complete_mpu_body = self._setup_mpu(key) + resp = self.conn.make_request( + self.bucket_name, + key, + query={'uploadId': upload_id}, + method='POST', + body=complete_mpu_body, + headers={ + 'content-md5': _md5(b'not the body'), + 'x-amz-content-sha256': _sha256(complete_mpu_body), + 'x-amz-checksum-crc32': _crc32(complete_mpu_body), + } + ) + self.assertBadDigest( + resp, _md5(b'not the body'), _md5(complete_mpu_body), + expected_digest_should_be_hex=False) + + def test_good_md5_bad_sha_good_crc_header_mpu(self): + key = 'mpu-name' + upload_id, complete_mpu_body = self._setup_mpu(key) + resp = self.conn.make_request( + self.bucket_name, + key, + query={'uploadId': upload_id}, + method='POST', + body=complete_mpu_body, + headers={ + 'content-md5': _md5(complete_mpu_body), + 'x-amz-content-sha256': _sha256(b'not the body'), + 'x-amz-checksum-crc32': _crc32(complete_mpu_body), + } + ) + self.assertSHA256Mismatch( + resp, _sha256(b'not the body'), _sha256(complete_mpu_body)) + + def test_good_md5_good_sha_bad_crc_header_mpu(self): + key = 'mpu-name' + upload_id, complete_mpu_body = self._setup_mpu(key) + resp = self.conn.make_request( + self.bucket_name, + key, + query={'uploadId': upload_id}, + method='POST', + body=complete_mpu_body, + headers={ + 'content-md5': _md5(complete_mpu_body), + 'x-amz-content-sha256': _sha256(complete_mpu_body), + 'x-amz-checksum-crc32': _crc32(b'not the body'), + } + ) + # Despite the bad checksum, we complete successfully! + self.assertEqual(resp.status_code, 200, resp.content) + self.assertIn(b'CompleteMultipartUploadResult', resp.content) + class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): session_cls = S3SessionV4Headers