From be56c1e25822a23a6f6ad30d190782a395f3149f Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Fri, 11 Apr 2025 16:58:30 -0700 Subject: [PATCH] s3api: Validate additional checksums on upload See https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html for some background. This covers both "normal" objects and part-uploads for MPUs. Note that because we don't write down any client-provided checksums during initiate-MPU calls, we can't do any verification during complete-MPU calls. crc64nvme checksums are not yet supported; clients attempting to use them will get back 501s. Adds crt as a boto3 extra to test-requirements. The extra lib provides crc32c and crc64nvme checksum support in boto3. Co-Authored-By: Ashwin Nair Co-Authored-By: Alistair Coles Signed-off-by: Tim Burke Signed-off-by: Alistair Coles Change-Id: Id39fd71bc59875a5b88d1d012542136acf880019 --- doc/s3api/rnc/complete_multipart_upload.rnc | 7 +- swift/common/middleware/s3api/exception.py | 29 + swift/common/middleware/s3api/s3api.py | 3 +- swift/common/middleware/s3api/s3request.py | 300 ++++++- .../schema/complete_multipart_upload.rng | 25 + swift/common/utils/__init__.py | 16 +- swift/common/utils/checksum.py | 5 + test/s3api/test_input_errors.py | 777 +++++++++++++++++- test/s3api/test_object_checksums.py | 578 +++++++++++++ .../common/middleware/s3api/test_s3api.py | 14 + .../common/middleware/s3api/test_s3request.py | 470 +++++++++-- test/unit/common/test_utils.py | 22 + 12 files changed, 2155 insertions(+), 91 deletions(-) create mode 100644 test/s3api/test_object_checksums.py diff --git a/doc/s3api/rnc/complete_multipart_upload.rnc b/doc/s3api/rnc/complete_multipart_upload.rnc index dee60e5440..2a8459ef62 100644 --- a/doc/s3api/rnc/complete_multipart_upload.rnc +++ b/doc/s3api/rnc/complete_multipart_upload.rnc @@ -2,6 +2,11 @@ start = element CompleteMultipartUpload { element Part { element PartNumber { xsd:int } & - element ETag { xsd:string } + element ETag { xsd:string } & + element ChecksumCRC32 { xsd:string }? & + element ChecksumCRC32C { xsd:string }? & + element ChecksumCRC64NVME { xsd:string }? & + element ChecksumSHA1 { xsd:string }? & + element ChecksumSHA256 { xsd:string }? }+ } diff --git a/swift/common/middleware/s3api/exception.py b/swift/common/middleware/s3api/exception.py index ca30681927..2e4e9a8064 100644 --- a/swift/common/middleware/s3api/exception.py +++ b/swift/common/middleware/s3api/exception.py @@ -79,3 +79,32 @@ class S3InputMissingSecret(S3InputError): This happens if the auth middleware responsible for the user never called the provided ``check_signature`` callback. """ + + +class S3InputSHA256Mismatch(S3InputError): + """ + Client provided a X-Amz-Content-SHA256, but it doesn't match the data. + + This should result in a BadDigest going back to the client. + """ + def __init__(self, expected, computed): + self.expected = expected + self.computed = computed + + +class S3InputChecksumMismatch(S3InputError): + """ + Client provided a X-Amz-Checksum-* header, but it doesn't match the data. + + This should result in a InvalidRequest going back to the client. + """ + + +class S3InputChecksumTrailerInvalid(S3InputError): + """ + Client provided a X-Amz-Checksum-* trailer, but it is not a valid format. + + This should result in a InvalidRequest going back to the client. + """ + def __init__(self, trailer_name): + self.trailer = trailer_name diff --git a/swift/common/middleware/s3api/s3api.py b/swift/common/middleware/s3api/s3api.py index dfef93a0d3..5d3f80f50a 100644 --- a/swift/common/middleware/s3api/s3api.py +++ b/swift/common/middleware/s3api/s3api.py @@ -159,7 +159,7 @@ from swift.common.middleware.s3api.s3response import ErrorResponse, \ InternalError, MethodNotAllowed, S3ResponseBase, S3NotImplemented from swift.common.utils import get_logger, config_true_value, \ config_positive_int_value, split_path, closing_if_possible, \ - list_from_csv, parse_header + list_from_csv, parse_header, checksum from swift.common.middleware.s3api.utils import Config from swift.common.middleware.s3api.acl_handlers import get_acl_handler from swift.common.registry import register_swift_info, \ @@ -300,6 +300,7 @@ class S3ApiMiddleware(object): self.logger = get_logger( wsgi_conf, log_route='s3api', statsd_tail_prefix='s3api') self.check_pipeline(wsgi_conf) + checksum.log_selected_implementation(self.logger) def is_s3_cors_preflight(self, env): if env['REQUEST_METHOD'] != 'OPTIONS' or not env.get('HTTP_ORIGIN'): diff --git a/swift/common/middleware/s3api/s3request.py b/swift/common/middleware/s3api/s3request.py index adce490a15..ff088a1b83 100644 --- a/swift/common/middleware/s3api/s3request.py +++ b/swift/common/middleware/s3api/s3request.py @@ -26,7 +26,8 @@ from urllib.parse import quote, unquote, parse_qsl import string from swift.common.utils import split_path, json, md5, streq_const_time, \ - close_if_possible, InputProxy, get_policy_index, list_from_csv + close_if_possible, InputProxy, get_policy_index, list_from_csv, \ + strict_b64decode, base64_str, checksum from swift.common.registry import get_swift_info from swift.common import swob from swift.common.http import HTTP_OK, HTTP_CREATED, HTTP_ACCEPTED, \ @@ -62,7 +63,9 @@ from swift.common.middleware.s3api.s3response import AccessDenied, \ from swift.common.middleware.s3api.exception import NotS3Request, \ S3InputError, S3InputSizeError, S3InputIncomplete, \ S3InputChunkSignatureMismatch, S3InputChunkTooSmall, \ - S3InputMalformedTrailer, S3InputMissingSecret + S3InputMalformedTrailer, S3InputMissingSecret, \ + S3InputSHA256Mismatch, S3InputChecksumMismatch, \ + S3InputChecksumTrailerInvalid from swift.common.middleware.s3api.utils import utf8encode, \ S3Timestamp, mktime, MULTIUPLOAD_SUFFIX from swift.common.middleware.s3api.subresource import decode_acl, encode_acl @@ -91,6 +94,39 @@ SIGV4_CHUNK_MIN_SIZE = 8192 SERVICE = 's3' # useful for mocking out in tests +CHECKSUMS_BY_HEADER = { + 'x-amz-checksum-crc32': checksum.crc32, + 'x-amz-checksum-crc32c': checksum.crc32c, + 'x-amz-checksum-crc64nvme': checksum.crc64nvme, + 'x-amz-checksum-sha1': sha1, + 'x-amz-checksum-sha256': sha256, +} + + +def _get_checksum_hasher(header): + try: + return CHECKSUMS_BY_HEADER[header]() + except (KeyError, NotImplementedError): + raise S3NotImplemented('The %s algorithm is not supported.' % header) + + +def _validate_checksum_value(checksum_hasher, b64digest): + return strict_b64decode( + b64digest, + exact_size=checksum_hasher.digest_size, + ) + + +def _validate_checksum_header_cardinality(num_checksum_headers, + headers_and_trailer=False): + if num_checksum_headers > 1: + # inconsistent messaging for AWS compatibility... + msg = 'Expecting a single x-amz-checksum- header' + if not headers_and_trailer: + msg += '. Multiple checksum Types are not allowed.' + raise InvalidRequest(msg) + + def _is_streaming(aws_sha256): return aws_sha256 in ( 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', @@ -135,20 +171,6 @@ def _header_acl_property(resource): doc='Get and set the %s acl property' % resource) -class S3InputSHA256Mismatch(BaseException): - """ - Client provided a X-Amz-Content-SHA256, but it doesn't match the data. - - Inherit from BaseException (rather than Exception) so it cuts from the - proxy-server app (which will presumably be the one reading the input) - through all the layers of the pipeline back to us. It should never escape - the s3api middleware. - """ - def __init__(self, expected, computed): - self.expected = expected - self.computed = computed - - class HashingInput(InputProxy): """ wsgi.input wrapper to verify the SHA256 of the input as it's read. @@ -168,6 +190,8 @@ class HashingInput(InputProxy): ) def chunk_update(self, chunk, eof, *args, **kwargs): + # Note that "chunk" is just whatever was read from the input; this + # says nothing about whether the underlying stream uses aws-chunked self._hasher.update(chunk) if self.bytes_received < self._expected_length: @@ -187,10 +211,64 @@ class HashingInput(InputProxy): return chunk +class ChecksummingInput(InputProxy): + """ + wsgi.input wrapper to calculate the X-Amz-Checksum-* of the input as it's + read. The calculated value is checked against an expected value that is + sent in either the request headers or trailers. To allow for the latter, + the expected value is lazy fetched once the input has been read. + + :param wsgi_input: file-like object to be wrapped. + :param content_length: the expected number of bytes to be read. + :param checksum_hasher: a hasher to calculate the checksum of read bytes. + :param checksum_key: the name of the header or trailer that will have + the expected checksum value to be checked. + :param checksum_source: a dict that will have the ``checksum_key``. + """ + + def __init__(self, wsgi_input, content_length, checksum_hasher, + checksum_key, checksum_source): + super().__init__(wsgi_input) + self._expected_length = content_length + self._checksum_hasher = checksum_hasher + self._checksum_key = checksum_key + self._checksum_source = checksum_source + + def chunk_update(self, chunk, eof, *args, **kwargs): + # Note that "chunk" is just whatever was read from the input; this + # says nothing about whether the underlying stream uses aws-chunked + self._checksum_hasher.update(chunk) + if self.bytes_received < self._expected_length: + error = eof + elif self.bytes_received == self._expected_length: + # Lazy fetch checksum value because it may have come in trailers + b64digest = self._checksum_source.get(self._checksum_key) + try: + expected_raw_checksum = _validate_checksum_value( + self._checksum_hasher, b64digest) + except ValueError: + # If the checksum value came in a header then it would have + # been validated before the body was read, so if the validation + # fails here then we can infer that the checksum value came in + # a trailer. The S3InputChecksumTrailerInvalid raised here will + # propagate all the way back up the middleware stack to s3api + # where it is caught and translated to an InvalidRequest. + raise S3InputChecksumTrailerInvalid(self._checksum_key) + error = self._checksum_hasher.digest() != expected_raw_checksum + else: + error = True + + if error: + self.close() + # Since we don't return the last chunk, the PUT never completes + raise S3InputChecksumMismatch(self._checksum_hasher.name.upper()) + return chunk + + class ChunkReader(InputProxy): """ - wsgi.input wrapper to read a single chunk from a chunked input and validate - its signature. + wsgi.input wrapper to read a single chunk from an aws-chunked input and + validate its signature. :param wsgi_input: a wsgi input. :param chunk_size: number of bytes to read. @@ -237,6 +315,7 @@ class ChunkReader(InputProxy): return super().readline(size) def chunk_update(self, chunk, eof, *args, **kwargs): + # Note that "chunk" is just whatever was read from the input self._sha256.update(chunk) if self.bytes_received == self.chunk_size: if self._validator and not self._validator( @@ -496,9 +575,8 @@ class SigCheckerV2(BaseSigChecker): return b'\n'.join(buf) def _check_signature(self): - valid_signature = base64.b64encode(hmac.new( - self._secret, self.string_to_sign, sha1 - ).digest()).strip().decode('ascii') + valid_signature = base64_str( + hmac.new(self._secret, self.string_to_sign, sha1).digest()) return streq_const_time(self.signature, valid_signature) @@ -985,10 +1063,30 @@ class S3Request(swob.Request): self.sig_checker = SigCheckerV2(self) aws_sha256 = self.headers.get('x-amz-content-sha256') if self.method in ('PUT', 'POST'): + checksum_hasher, checksum_header, checksum_trailer = \ + self._validate_checksum_headers() if _is_streaming(aws_sha256): - self._install_streaming_input_wrapper(aws_sha256) + if checksum_trailer: + streaming_input = self._install_streaming_input_wrapper( + aws_sha256, checksum_trailer=checksum_trailer) + checksum_key = checksum_trailer + checksum_source = streaming_input.trailers + else: + self._install_streaming_input_wrapper(aws_sha256) + checksum_key = checksum_header + checksum_source = self.headers + elif checksum_trailer: + raise MalformedTrailerError else: self._install_non_streaming_input_wrapper(aws_sha256) + checksum_key = checksum_header + checksum_source = self.headers + + # S3 doesn't check the checksum against the request body for at + # least some POSTs (e.g. MPU complete) so restrict this to PUTs + if checksum_key and self.method == 'PUT': + self._install_checksumming_input_wrapper( + checksum_hasher, checksum_key, checksum_source) # Lock in string-to-sign now, before we start messing with query params self.environ['s3api.auth_details'] = { @@ -1308,23 +1406,24 @@ class S3Request(swob.Request): # used to be, AWS would store '', but not any more self.headers['Content-Encoding'] = new_enc - def _install_streaming_input_wrapper(self, aws_sha256): + def _install_streaming_input_wrapper(self, aws_sha256, + checksum_trailer=None): + """ + Wrap the wsgi input with a reader that parses an aws-chunked body. + + :param aws_sha256: the value of the 'x-amz-content-sha256' header. + :param checksum_trailer: the name of an 'x-amz-checksum-*' trailer + (if any) that is to be expected at the end of the body. + :return: an instance of StreamingInput. + """ self._cleanup_content_encoding() self.content_length = int(self.headers.get( 'x-amz-decoded-content-length')) expected_trailers = set() if aws_sha256 == 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER': expected_trailers.add('x-amz-trailer-signature') - trailer = self.headers.get('x-amz-trailer', '') - trailer_list = [ - v.strip() for v in trailer.rstrip(',').split(',') - ] if trailer.strip() else [] - if len(trailer_list) > 1: - raise InvalidRequest( - 'Expecting a single x-amz-checksum- header. Multiple ' - 'checksum Types are not allowed.') - else: - expected_trailers.update(trailer_list) + if checksum_trailer: + expected_trailers.add(checksum_trailer) streaming_input = StreamingInput( self.environ['wsgi.input'], self.content_length, @@ -1346,6 +1445,131 @@ class S3Request(swob.Request): # reject with length-required and we'll translate back to # MissingContentLength) + def _validate_x_amz_checksum_headers(self): + """ + Validate and return a header that specifies a checksum value. A valid + header must be named x-amz-checksum- where is + one of the supported checksum algorithms. + + :raises: InvalidRequest if more than one checksum header is found or if + an invalid algorithm is specified. + :return: a dict containing at most a single checksum header name:value + pair. + """ + checksum_headers = { + h.lower(): v + for h, v in self.headers.items() + if (h.lower().startswith('x-amz-checksum-') + and h.lower() not in ('x-amz-checksum-algorithm', + 'x-amz-checksum-type')) + } + if any(h not in CHECKSUMS_BY_HEADER + for h in checksum_headers): + raise InvalidRequest('The algorithm type you specified in ' + 'x-amz-checksum- header is invalid.') + _validate_checksum_header_cardinality(len(checksum_headers)) + return checksum_headers + + def _validate_x_amz_trailer_header(self): + """ + Validate and return the name of a checksum trailer that is declared by + an ``x-amz-trailer`` header. A valid trailer must be named + x-amz-checksum- where is one of the supported + checksum algorithms. + + :raises: InvalidRequest if more than one checksum trailer is declared + by the ``x-amz-trailer`` header, or if an invalid algorithm is + specified. + :return: a list containing at most a single checksum header name. + """ + header = self.headers.get('x-amz-trailer', '').strip() + checksum_headers = [ + v.strip() for v in header.rstrip(',').split(',') + ] if header else [] + if any(h not in CHECKSUMS_BY_HEADER + for h in checksum_headers): + raise InvalidRequest('The value specified in the x-amz-trailer ' + 'header is not supported') + _validate_checksum_header_cardinality(len(checksum_headers)) + return checksum_headers + + def _validate_checksum_headers(self): + """ + A checksum for the request is specified by a checksum header of the + form: + + x-amz-checksum-: + + where is one of the supported checksum algorithms and + is the value to be checked. A checksum header may be sent in + either the headers or the trailers. An ``x-amz-trailer`` header is used + to declare that a checksum header is to be expected in the trailers. + + At most one checksum header is allowed in the headers or trailers. If + this condition is met, this method returns the name of the checksum + header or trailer and a hasher for the checksum algorithm that it + declares. + + :raises InvalidRequest: if any of the following conditions occur: more + than one checksum header is declared; the checksum header specifies + an invalid algorithm; the algorithm does not match the value of any + ``x-amz-sdk-checksum-algorithm`` header that is also present; the + checksum value is invalid. + :raises S3NotImplemented: if the declared algorithm is valid but not + supported. + :return: a tuple of + (hasher, checksum header name, checksum trailer name) where at + least one of (checksum header name, checksum trailer name) will be + None. + """ + checksum_headers = self._validate_x_amz_checksum_headers() + checksum_trailer_headers = self._validate_x_amz_trailer_header() + _validate_checksum_header_cardinality( + len(checksum_headers) + len(checksum_trailer_headers), + headers_and_trailer=True + ) + + if checksum_headers: + checksum_trailer = None + checksum_header, b64digest = list(checksum_headers.items())[0] + checksum_hasher = _get_checksum_hasher(checksum_header) + try: + # early check on the value... + _validate_checksum_value(checksum_hasher, b64digest) + except ValueError: + raise InvalidRequest( + 'Value for %s header is invalid.' % checksum_header) + elif checksum_trailer_headers: + checksum_header = None + checksum_trailer = checksum_trailer_headers[0] + checksum_hasher = _get_checksum_hasher(checksum_trailer) + # checksum should appear at end of request in trailers + else: + checksum_hasher = checksum_header = checksum_trailer = None + + checksum_algo = self.headers.get('x-amz-sdk-checksum-algorithm') + if checksum_algo: + if not checksum_hasher: + raise InvalidRequest( + 'x-amz-sdk-checksum-algorithm specified, but no ' + 'corresponding x-amz-checksum-* or x-amz-trailer ' + 'headers were found.') + if checksum_algo.lower() != checksum_hasher.name: + raise InvalidRequest('Value for x-amz-sdk-checksum-algorithm ' + 'header is invalid.') + + return checksum_hasher, checksum_header, checksum_trailer + + def _install_checksumming_input_wrapper( + self, checksum_hasher, checksum_key, checksum_source): + self.environ['wsgi.input'] = ChecksummingInput( + self.environ['wsgi.input'], + self.content_length, + checksum_hasher, + checksum_key, + checksum_source + ) + def _validate_headers(self): if 'CONTENT_LENGTH' in self.environ: try: @@ -1463,8 +1687,7 @@ class S3Request(swob.Request): raise InvalidRequest('Missing required header for this request: ' 'Content-MD5') - digest = base64.b64encode(md5( - body, usedforsecurity=False).digest()).strip().decode('ascii') + digest = base64_str(md5(body, usedforsecurity=False).digest()) if self.environ['HTTP_CONTENT_MD5'] != digest: raise BadDigest(content_md5=self.environ['HTTP_CONTENT_MD5']) @@ -1891,6 +2114,13 @@ class S3Request(swob.Request): client_computed_content_s_h_a256=err.expected, s3_computed_content_s_h_a256=err.computed, ) + except S3InputChecksumMismatch as e: + raise BadDigest( + 'The %s you specified did not ' + 'match the calculated checksum.' % e.args[0]) + except S3InputChecksumTrailerInvalid as e: + raise InvalidRequest( + 'Value for %s trailing header is invalid.' % e.trailer) except S3InputChunkSignatureMismatch: raise SignatureDoesNotMatch( **self.signature_does_not_match_kwargs()) diff --git a/swift/common/middleware/s3api/schema/complete_multipart_upload.rng b/swift/common/middleware/s3api/schema/complete_multipart_upload.rng index d7ba2569b4..55aefa464e 100644 --- a/swift/common/middleware/s3api/schema/complete_multipart_upload.rng +++ b/swift/common/middleware/s3api/schema/complete_multipart_upload.rng @@ -11,6 +11,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/swift/common/utils/__init__.py b/swift/common/utils/__init__.py index 908e1458de..7e59f9ca23 100644 --- a/swift/common/utils/__init__.py +++ b/swift/common/utils/__init__.py @@ -4619,7 +4619,7 @@ def safe_json_loads(value): return None -def strict_b64decode(value, allow_line_breaks=False): +def strict_b64decode(value, allow_line_breaks=False, exact_size=None): ''' Validate and decode Base64-encoded data. @@ -4628,6 +4628,8 @@ def strict_b64decode(value, allow_line_breaks=False): :param value: some base64-encoded data :param allow_line_breaks: if True, ignore carriage returns and newlines + :param exact_size: if provided, the exact size of the decoded bytes + expected; also enforces round-trip checks :returns: the decoded data :raises ValueError: if ``value`` is not a string, contains invalid characters, or has insufficient padding @@ -4648,7 +4650,17 @@ def strict_b64decode(value, allow_line_breaks=False): strip_chars += '\r\n' if any(c not in valid_chars for c in value.strip(strip_chars)): raise ValueError - return base64.b64decode(value) + ret_val = base64.b64decode(value) + if exact_size is not None: + if len(ret_val) != exact_size: + raise ValueError + if base64_str(ret_val) != value: + raise ValueError + return ret_val + + +def base64_str(value): + return base64.b64encode(value).decode('ascii') def cap_length(value, max_length): diff --git a/swift/common/utils/checksum.py b/swift/common/utils/checksum.py index 7e1a434ed0..14e3a158b6 100644 --- a/swift/common/utils/checksum.py +++ b/swift/common/utils/checksum.py @@ -186,6 +186,11 @@ def crc32c(data=None, initial_value=0): initial_value=initial_value) +def crc64nvme(data=None, initial_value=0): + '''Stub for s3api''' + raise NotImplementedError + + def log_selected_implementation(logger): try: impl = _select_crc32c_impl() diff --git a/test/s3api/test_input_errors.py b/test/s3api/test_input_errors.py index 998cf8ec52..d5205948b6 100644 --- a/test/s3api/test_input_errors.py +++ b/test/s3api/test_input_errors.py @@ -290,7 +290,7 @@ class S3SessionV4(S3Session): for k, v in sorted(request['query'].items())), ] canonical_request_lines.extend( - '%s:%s' % (h, request['headers'][h]) + '%s:%s' % (h, request['headers'][h].strip()) for h in request['signed_headers']) canonical_request_lines.extend([ '', @@ -838,6 +838,297 @@ class InputErrorsMixin(object): self.assertSHA256Mismatch( resp, EMPTY_SHA256.upper(), _sha256(TEST_BODY)) + def test_good_md5_good_sha_good_crc(self): + resp = self.conn.make_request( + self.bucket_name, + 'good-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'content-md5': _md5(TEST_BODY), + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + }) + self.assertOK(resp) + + def test_good_md5_good_sha_good_crc_declared(self): + resp = self.conn.make_request( + self.bucket_name, + 'good-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'content-md5': _md5(TEST_BODY), + 'x-amz-content-sha256': _sha256(TEST_BODY), + # can flag that you're going to send it + 'x-amz-sdk-checksum-algorithm': 'CRC32', + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + }) + self.assertOK(resp) + + def test_good_md5_good_sha_no_crc_but_declared(self): + resp = self.conn.make_request( + self.bucket_name, + 'missing-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'content-md5': _md5(TEST_BODY), + 'x-amz-content-sha256': _sha256(TEST_BODY), + # but if you flag it, you gotta send it + 'x-amz-sdk-checksum-algorithm': 'CRC32', + }) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'x-amz-sdk-checksum-algorithm specified, but ' + b'no corresponding x-amz-checksum-* or x-amz-trailer ' + b'headers were found.', resp.content) + + def test_good_md5_good_sha_good_crc_algo_mismatch(self): + resp = self.conn.make_request( + self.bucket_name, + 'good-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'content-md5': _md5(TEST_BODY), + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-sdk-checksum-algorithm': 'CRC32C', + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + }) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + # Note that if there's a mismatch between what you flag and what you + # send, the message isn't super clear + self.assertIn(b'Value for x-amz-sdk-checksum-algorithm ' + b'header is invalid.', resp.content) + + def test_good_md5_good_sha_invalid_crc_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'invalid-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'content-md5': _md5(TEST_BODY), + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': 'bad'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Value for x-amz-checksum-crc32 header is ' + b'invalid.', resp.content) + + def test_good_md5_good_sha_bad_crc_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'content-md5': _md5(TEST_BODY), + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(b'not the body')}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'BadDigest', resp.content) + self.assertIn(b'The CRC32 you specified did not match the ' + b'calculated checksum.', resp.content) + + def test_good_md5_bad_sha_bad_crc_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'content-md5': _md5(TEST_BODY), + 'x-amz-content-sha256': _sha256(b'not the body'), + 'x-amz-checksum-crc32': _crc32(b'not the body')}) + # SHA256 trumps checksum + self.assertSHA256Mismatch( + resp, _sha256(b'not the body'), _sha256(TEST_BODY)) + + def test_no_md5_good_sha_good_crc_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY)}) + self.assertOK(resp) + + def test_no_md5_good_sha_unsupported_crc_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-bad': _crc32(TEST_BODY)}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'The algorithm type you specified in ' + b'x-amz-checksum- header is invalid.', + resp.content) + + def test_no_md5_good_sha_multiple_crc_in_headers(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32c': _crc32(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY)}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + + def test_no_md5_good_sha_multiple_crc_in_headers_algo_mismatch(self): + # repeats trump the algo mismatch + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'sha256', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32c': _crc32(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY)}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + + def test_no_md5_good_sha_crc_in_trailer_but_not_streaming(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-trailer': 'x-amz-checksum-crc32'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'MalformedTrailerError', resp.content) + self.assertIn(b'The request contained trailing data that was ' + b'not well-formed or did not conform to our published ' + b'schema.', resp.content) + + def test_no_md5_good_sha_duplicated_crc_in_trailer_algo_mismatch(self): + # repeats trump the algo mismatch + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'sha256', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-trailer': 'x-amz-checksum-crc32, x-amz-checksum-crc32'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + + def test_no_md5_good_sha_multiple_crc_in_trailer_algo_mismatch(self): + # repeats trump the algo mismatch + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'sha256', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-trailer': 'x-amz-checksum-crc32, x-amz-checksum-crc32c'} + ) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + + def test_no_md5_good_sha_different_crc_in_trailer_and_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-trailer': 'x-amz-checksum-crc32c'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header' + b'', resp.content) + + def test_no_md5_good_sha_same_crc_in_trailer_and_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-trailer': 'x-amz-checksum-crc32'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header' + b'', resp.content) + + def test_no_md5_good_sha_multiple_crc_in_trailer_and_header(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-trailer': 'x-amz-checksum-crc32, x-amz-checksum-crc32c'} + ) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + + def test_no_md5_good_sha_multiple_crc_in_header_and_trailer(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-content-sha256': _sha256(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-checksum-sha256': _sha256(TEST_BODY), + 'x-amz-trailer': 'x-amz-checksum-crc32'} + ) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + def test_no_md5_bad_sha_empty_body(self): resp = self.conn.make_request( self.bucket_name, @@ -1426,6 +1717,13 @@ class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): 'not well-formed or did not conform to our published ' 'schema.', respbody) + def assertUnsupportedTrailerHeader(self, resp): + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'The value specified in the x-amz-trailer ' + b'header is not supported', + resp.content) + def test_get_service_no_sha(self): resp = self.conn.make_request() self.assertMissingSHA256(resp) @@ -1528,6 +1826,19 @@ class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): headers={'x-amz-content-sha256': 'unsigned-payload'}) self.assertInvalidSHA256(resp, 'unsigned-payload') + def test_no_md5_no_sha_good_crc(self): + resp = self.conn.make_request( + self.bucket_name, + 'bad-checksum', + method='PUT', + body=TEST_BODY, + headers={ + 'x-amz-checksum-crc32': _crc32(TEST_BODY)}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Missing required header for this request: ' + b'x-amz-content-sha256', resp.content) + def test_strm_unsgnd_pyld_trl_not_encoded(self): resp = self.conn.make_request( self.bucket_name, @@ -1610,6 +1921,140 @@ class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): self.assertIn(b'You must provide the Content-Length HTTP ' b'header.', resp.content) + def test_strm_unsgnd_pyld_trl_crc_header_ok(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b'']) + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY))}) + self.assertOK(resp) + + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='GET', + headers={'x-amz-content-sha256': 'UNSIGNED-PAYLOAD'}) + self.assertOK(resp, TEST_BODY) + self.assertNotIn('Content-Encoding', resp.headers) + + def test_strm_unsgnd_pyld_trl_crc_header_x_amz_checksum_type_ok(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b'']) + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + # unexpected with a PUT but tolerated... + 'x-amz-checksum-type': 'COMPOSITE', + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY))}) + self.assertOK(resp) + + def test_strm_unsgnd_pyld_trl_crc_header_x_amz_checksum_algorithm_ok(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b'']) + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + # unexpected with a PUT but tolerated... + 'x-amz-checksum-algorithm': 'crc32', + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY))}) + self.assertOK(resp) + + def test_strm_unsgnd_pyld_trl_crc_header_algo_mismatch(self): + chunked_body = b'nonsense ignored' + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-sdk-checksum-algorithm': 'sha256', + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY))}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Value for x-amz-sdk-checksum-algorithm ' + b'header is invalid.', resp.content) + + def test_strm_unsgnd_pyld_trl_multiple_crc_header(self): + chunked_body = b'nonsense ignored' + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-checksum-crc32c': _crc32(TEST_BODY), + 'x-amz-checksum-crc32': _crc32(TEST_BODY), + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY))}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + + def test_strm_unsgnd_pyld_trl_crc_header_mismatch(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b'']) + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-checksum-crc32': _crc32(b'not the test body'), + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY))}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'BadDigest', resp.content) + self.assertIn(b'The CRC32 you specified did not match the ' + b'calculated checksum.', resp.content) + + def test_strm_unsgnd_pyld_trl_declared_algo_declared_no_trailer_sent(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b'']) + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-trailer': 'x-amz-checksum-crc32', + 'x-amz-decoded-content-length': str(len(TEST_BODY))}) + self.assertMalformedTrailer(resp) + def test_strm_unsgnd_pyld_trl_declared_no_trailer_sent(self): chunked_body = b''.join( b'%x\r\n%s\r\n' % (len(chunk), chunk) @@ -1687,6 +2132,202 @@ class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): 'x-amz-trailer': 'x-amz-checksum-crc32'}) self.assertOK(resp) + def test_strm_unsgnd_pyld_trl_with_comma_in_trailer_ok(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32,'}) + self.assertOK(resp) + + def test_strm_unsgnd_pyld_trl_with_commas_in_trailer_1(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': ', x-amz-checksum-crc32, ,'}) + self.assertUnsupportedTrailerHeader(resp) + + def test_strm_unsgnd_pyld_trl_with_commas_in_trailer_2(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': ', x-amz-checksum-crc32'}) + self.assertUnsupportedTrailerHeader(resp) + + def test_strm_unsgnd_pyld_trl_with_commas_in_trailer_3(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': ',x-amz-checksum-crc32'}) + self.assertUnsupportedTrailerHeader(resp) + + def test_strm_unsgnd_pyld_trl_with_commas_in_trailer_4(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32,,'}) + self.assertOK(resp) + + def test_strm_unsgnd_pyld_trl_with_commas_in_trailer_5(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32, ,'}) + self.assertUnsupportedTrailerHeader(resp) + + def test_strm_unsgnd_pyld_trl_with_commas_in_trailer_6(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32, '}) + self.assertOK(resp) + + def test_strm_unsgnd_pyld_trl_with_trailer_checksum_mismatch(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(b"not the body")}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'BadDigest', resp.content) + self.assertIn(b'The CRC32 you specified did not match the ' + b'calculated checksum.', resp.content) + + def test_strm_unsgnd_pyld_trl_with_trailer_checksum_invalid(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {"not=base-64"}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Value for x-amz-checksum-crc32 trailing ' + b'header is invalid.', resp.content) + + def test_strm_unsgnd_pyld_trl_content_sha256_in_trailer(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-content-sha256: {_sha256(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-content-sha256'}) + self.assertUnsupportedTrailerHeader(resp) + def test_strm_unsgnd_pyld_trl_with_trailer_no_cr(self): chunked_body = b''.join( b'%x\r\n%s\r\n' % (len(chunk), chunk) @@ -1826,7 +2467,7 @@ class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): 'x-amz-trailer': 'x-amz-checksum-crc32'}) self.assertOK(resp) - def test_strm_unsgnd_pyld_trl_wrong_trailer(self): + def test_strm_unsgnd_pyld_trl_mismatch_trailer(self): chunked_body = b''.join( b'%x\r\n%s\r\n' % (len(chunk), chunk) for chunk in [TEST_BODY, b''])[:-2] @@ -1845,6 +2486,115 @@ class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): 'x-amz-trailer': 'x-amz-checksum-crc32c'}) self.assertMalformedTrailer(resp) + def test_strm_unsgnd_pyld_trl_unsupported_trailer_sent(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-bad: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32c'}) + self.assertMalformedTrailer(resp) + + def test_strm_unsgnd_pyld_trl_non_checksum_trailer(self): + def do_test(trailer, value): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'{trailer}: {value}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': + 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': trailer}) + self.assertUnsupportedTrailerHeader(resp) + + do_test('foo', 'bar') + do_test('content-md5', _md5(TEST_BODY)) + do_test('x-amz-content-sha256', _sha256(TEST_BODY)) + + def test_strm_unsgnd_pyld_trl_unsupported_trailer_declared(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-bad'}) + self.assertUnsupportedTrailerHeader(resp) + + def test_strm_unsgnd_pyld_trl_multiple_checksum_trailers(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + f'x-amz-checksum-sha256: {_sha256(TEST_BODY)}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': + 'x-amz-checksum-crc32, x-amz-checksum-sha256'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'InvalidRequest', resp.content) + self.assertIn(b'Expecting a single x-amz-checksum- header. ' + b'Multiple checksum Types are not allowed.', + resp.content) + + def test_strm_unsgnd_pyld_trl_multiple_trailers_unsupported(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + 'x-amz-foo: bar\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': + 'x-amz-checksum-crc32, x-amz-foo'}) + self.assertUnsupportedTrailerHeader(resp) + def test_strm_unsgnd_pyld_trl_extra_trailer(self): chunked_body = b''.join( b'%x\r\n%s\r\n' % (len(chunk), chunk) @@ -1885,6 +2635,29 @@ class TestV4AuthHeaders(InputErrorsMixin, BaseS3TestCaseWithBucket): 'x-amz-trailer': 'x-amz-checksum-crc32'}) self.assertOK(resp) + def test_strm_unsgnd_pyld_trl_good_then_bad_trailer(self): + chunked_body = b''.join( + b'%x\r\n%s\r\n' % (len(chunk), chunk) + for chunk in [TEST_BODY, b''])[:-2] + chunked_body += ''.join([ + f'x-amz-checksum-crc32: {_crc32(TEST_BODY)}\r\n', + f'x-amz-checksum-crc32: {_crc32(TEST_BODY[:-1])}\r\n', + ]).encode('ascii') + resp = self.conn.make_request( + self.bucket_name, + 'test-obj', + method='PUT', + body=chunked_body, + headers={ + 'x-amz-content-sha256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'content-encoding': 'aws-chunked', + 'x-amz-decoded-content-length': str(len(TEST_BODY)), + 'x-amz-trailer': 'x-amz-checksum-crc32'}) + self.assertEqual(resp.status_code, 400, resp.content) + self.assertIn(b'BadDigest', resp.content) + self.assertIn(b'The CRC32 you specified did not match the ' + b'calculated checksum.', resp.content) + def test_strm_unsgnd_pyld_trl_extra_line_then_trailer_ok(self): chunked_body = b''.join( b'%x\r\n%s\r\n' % (len(chunk), chunk) diff --git a/test/s3api/test_object_checksums.py b/test/s3api/test_object_checksums.py new file mode 100644 index 0000000000..dc7661cdf2 --- /dev/null +++ b/test/s3api/test_object_checksums.py @@ -0,0 +1,578 @@ +# Copyright (c) 2010-2023 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import binascii +import botocore +import hashlib +from unittest import SkipTest + +from swift.common.utils import base64_str +from swift.common.utils.checksum import crc32c +from test.s3api import BaseS3TestCaseWithBucket + +TEST_BODY = b'123456789' + + +def boto_at_least(*version): + return tuple(int(x) for x in botocore.__version__.split('.')) >= version + + +class ObjectChecksumMixin(object): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.client = cls.get_s3_client(1) + cls.use_tls = cls.client._endpoint.host.startswith('https:') + cls.CHECKSUM_HDR = 'x-amz-checksum-' + cls.ALGORITHM.lower() + + def assert_error(self, resp, err_code, err_msg, obj_name, **extra): + self.assertEqual(400, resp['ResponseMetadata']['HTTPStatusCode']) + self.assertEqual(err_code, resp['Error']['Code']) + self.assertEqual(err_msg, resp['Error']['Message']) + self.assertEqual({k: resp['Error'].get(k) for k in extra}, extra) + + # Sanity check: object was not created + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.head_object(Bucket=self.bucket_name, Key=obj_name) + resp = caught.exception.response + self.assertEqual(404, resp['ResponseMetadata']['HTTPStatusCode']) + + def test_let_sdk_compute(self): + obj_name = self.create_name(self.ALGORITHM + '-sdk') + resp = self.client.put_object( + Bucket=self.bucket_name, + Key=obj_name, + Body=TEST_BODY, + ChecksumAlgorithm=self.ALGORITHM, + ) + self.assertEqual(200, resp['ResponseMetadata']['HTTPStatusCode']) + + def test_good_checksum(self): + obj_name = self.create_name(self.ALGORITHM + '-with-algo-header') + resp = self.client.put_object( + Bucket=self.bucket_name, + Key=obj_name, + Body=TEST_BODY, + ChecksumAlgorithm=self.ALGORITHM, + **{'Checksum' + self.ALGORITHM: self.EXPECTED} + ) + self.assertEqual(200, resp['ResponseMetadata']['HTTPStatusCode']) + + def test_good_checksum_no_algorithm_header(self): + obj_name = self.create_name(self.ALGORITHM + '-no-algo-header') + resp = self.client.put_object( + Bucket=self.bucket_name, + Key=obj_name, + Body=TEST_BODY, + **{'Checksum' + self.ALGORITHM: self.EXPECTED} + ) + self.assertEqual(200, resp['ResponseMetadata']['HTTPStatusCode']) + + def test_invalid_checksum(self): + obj_name = self.create_name(self.ALGORITHM + '-invalid') + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object( + Bucket=self.bucket_name, + Key=obj_name, + Body=TEST_BODY, + ChecksumAlgorithm=self.ALGORITHM, + **{'Checksum' + self.ALGORITHM: self.INVALID} + ) + self.assert_error( + caught.exception.response, + 'InvalidRequest', + 'Value for %s header is invalid.' % self.CHECKSUM_HDR, + obj_name, + ) + + def test_bad_checksum(self): + obj_name = self.create_name(self.ALGORITHM + '-bad') + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object( + Bucket=self.bucket_name, + Key=obj_name, + Body=TEST_BODY, + ChecksumAlgorithm=self.ALGORITHM, + **{'Checksum' + self.ALGORITHM: self.BAD} + ) + self.assert_error( + caught.exception.response, + 'BadDigest', + 'The %s you specified did not match the calculated checksum.' + % self.ALGORITHM, + obj_name, + ) + + def test_mpu_upload_part_invalid_checksum(self): + obj_name = self.create_name( + self.ALGORITHM + '-mpu-upload-part-invalid-checksum') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + ChecksumAlgorithm=self.ALGORITHM) + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=TEST_BODY, + **{'Checksum' + self.ALGORITHM: self.INVALID}, + ) + self.assert_error( + caught.exception.response, + 'InvalidRequest', + 'Value for %s header is invalid.' % self.CHECKSUM_HDR, + obj_name, + ) + + def test_mpu_upload_part_bad_checksum(self): + obj_name = self.create_name( + self.ALGORITHM + '-mpu-upload-part-bad-checksum') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + ChecksumAlgorithm=self.ALGORITHM) + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=TEST_BODY, + **{'Checksum' + self.ALGORITHM: self.BAD}, + ) + self.assert_error( + caught.exception.response, + 'BadDigest', + 'The %s you specified did not match the calculated ' + 'checksum.' % self.ALGORITHM, + obj_name, + ) + + def test_mpu_upload_part_good_checksum(self): + obj_name = self.create_name(self.ALGORITHM + '-mpu-upload-part-good') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + ChecksumAlgorithm=self.ALGORITHM) + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + part_resp = self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=TEST_BODY, + **{'Checksum' + self.ALGORITHM: self.EXPECTED}, + ) + self.assertEqual(200, part_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + + def test_mpu_complete_good_checksum(self): + checksum_kwargs = { + 'ChecksumAlgorithm': self.ALGORITHM, + } + if boto_at_least(1, 36): + checksum_kwargs['ChecksumType'] = 'COMPOSITE' + + obj_name = self.create_name(self.ALGORITHM + '-mpu-complete-good') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + **checksum_kwargs) + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + part_resp = self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=TEST_BODY, + **{'Checksum' + self.ALGORITHM: self.EXPECTED}, + ) + self.assertEqual(200, part_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + complete_mpu_resp = self.client.complete_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + MultipartUpload={ + 'Parts': [ + { + 'ETag': part_resp['ETag'], + 'PartNumber': 1, + 'Checksum' + self.ALGORITHM: self.EXPECTED, + }, + ], + }, + UploadId=upload_id, + ) + self.assertEqual(200, complete_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + + +class TestObjectChecksumCRC32(ObjectChecksumMixin, BaseS3TestCaseWithBucket): + ALGORITHM = 'CRC32' + EXPECTED = 'y/Q5Jg==' + INVALID = 'y/Q5Jh==' + BAD = 'z/Q5Jg==' + + +class TestObjectChecksumCRC32C(ObjectChecksumMixin, BaseS3TestCaseWithBucket): + ALGORITHM = 'CRC32C' + EXPECTED = '4waSgw==' + INVALID = '4waSgx==' + BAD = '5waSgw==' + + @classmethod + def setUpClass(cls): + if not botocore.httpchecksum.HAS_CRT: + raise SkipTest('botocore cannot crc32c (run `pip install awscrt`)') + super().setUpClass() + + +class TestObjectChecksumSHA1(ObjectChecksumMixin, BaseS3TestCaseWithBucket): + ALGORITHM = 'SHA1' + EXPECTED = '98O8HYCOBHMq32eZZczDTKeuNEE=' + INVALID = '98O8HYCOBHMq32eZZczDTKeuNEF=' + BAD = '+8O8HYCOBHMq32eZZczDTKeuNEE=' + + +class TestObjectChecksumSHA256(ObjectChecksumMixin, BaseS3TestCaseWithBucket): + ALGORITHM = 'SHA256' + EXPECTED = 'FeKw08M4keuw8e9gnsQZQgwg4yDOlMZfvIwzEkSOsiU=' + INVALID = 'FeKw08M4keuw8e9gnsQZQgwg4yDOlMZfvIwzEkSOsiV=' + BAD = 'GeKw08M4keuw8e9gnsQZQgwg4yDOlMZfvIwzEkSOsiU=' + + +class TestObjectChecksums(BaseS3TestCaseWithBucket): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.client = cls.get_s3_client(1) + cls.use_tls = cls.client._endpoint.host.startswith('https:') + + def test_multi_checksum(self): + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object( + Bucket=self.bucket_name, + Key=self.create_name('multi-checksum'), + Body=TEST_BODY, + # Note: Both valid! Ought to be able to validate & store both + ChecksumCRC32='y/Q5Jg==', + ChecksumSHA1='98O8HYCOBHMq32eZZczDTKeuNEE=', + ) + resp = caught.exception.response + code = resp['ResponseMetadata']['HTTPStatusCode'] + self.assertEqual(400, code) + self.assertEqual('InvalidRequest', resp['Error']['Code']) + self.assertEqual( + resp['Error']['Message'], + 'Expecting a single x-amz-checksum- header. ' + 'Multiple checksum Types are not allowed.') + + def test_different_checksum_requested(self): + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object( + Bucket=self.bucket_name, + Key=self.create_name('different-checksum'), + Body=TEST_BODY, + ChecksumCRC32='y/Q5Jg==', + ChecksumAlgorithm='SHA1', + ) + resp = caught.exception.response + code = resp['ResponseMetadata']['HTTPStatusCode'] + self.assertEqual(400, code) + self.assertEqual('InvalidRequest', resp['Error']['Code']) + if boto_at_least(1, 36): + self.assertEqual( + resp['Error']['Message'], + 'Value for x-amz-sdk-checksum-algorithm header is invalid.') + else: + self.assertEqual( + resp['Error']['Message'], + 'Expecting a single x-amz-checksum- header') + + def assert_invalid(self, resp): + code = resp['ResponseMetadata']['HTTPStatusCode'] + self.assertEqual(400, code) + self.assertEqual('InvalidRequest', resp['Error']['Code']) + self.assertEqual( + resp['Error']['Message'], + 'Value for x-amz-checksum-crc32 header is invalid.') + + def test_invalid_base64_invalid_length(self): + put_kwargs = { + 'Bucket': self.bucket_name, + 'Key': self.create_name('invalid-bad-length'), + 'Body': TEST_BODY, + 'ChecksumCRC32': 'short===', # invalid length for base64 + } + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object(**put_kwargs) + self.assert_invalid(caught.exception.response) + + def test_invalid_base64_too_short(self): + put_kwargs = { + 'Bucket': self.bucket_name, + 'Key': self.create_name('invalid-short'), + 'Body': TEST_BODY, + 'ChecksumCRC32': 'shrt', # only 3 bytes + } + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object(**put_kwargs) + self.assert_invalid(caught.exception.response) + + def test_invalid_base64_too_long(self): + put_kwargs = { + 'Bucket': self.bucket_name, + 'Key': self.create_name('invalid-long'), + 'Body': TEST_BODY, + 'ChecksumCRC32': 'toolong=', # 5 bytes + } + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object(**put_kwargs) + self.assert_invalid(caught.exception.response) + + def test_invalid_base64_all_invalid_chars(self): + put_kwargs = { + 'Bucket': self.bucket_name, + 'Key': self.create_name('purely-invalid'), + 'Body': TEST_BODY, + 'ChecksumCRC32': '^^^^^^==', # all invalid char + } + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object(**put_kwargs) + self.assert_invalid(caught.exception.response) + + def test_invalid_base64_includes_invalid_chars(self): + put_kwargs = { + 'Bucket': self.bucket_name, + 'Key': self.create_name('contains-invalid'), + 'Body': TEST_BODY, + 'ChecksumCRC32': 'y^/^Q5^J^g==', # spaced out with invalid chars + } + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.put_object(**put_kwargs) + self.assert_invalid(caught.exception.response) + + def test_mpu_no_checksum_upload_part_invalid_checksum(self): + obj_name = self.create_name('no-checksum-mpu') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name) + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=TEST_BODY, + ChecksumCRC32=TestObjectChecksumCRC32.INVALID, + ) + self.assert_invalid(caught.exception.response) + + def test_mpu_has_no_checksum(self): + # Clients don't need to be thinking about checksums at all + obj_name = self.create_name('no-checksum-mpu') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name) + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + part_resp = self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=TEST_BODY, + ) + complete_mpu_resp = self.client.complete_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + MultipartUpload={ + 'Parts': [ + { + 'ETag': part_resp['ETag'], + 'PartNumber': 1, + }, + ], + }, + UploadId=upload_id, + ) + self.assertEqual(200, complete_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + + head_resp = self.client.head_object( + Bucket=self.bucket_name, Key=obj_name) + self.assertFalse([k for k in head_resp + if k.startswith('Checksum')]) + + def test_mpu_upload_part_multi_checksum(self): + obj_name = self.create_name('multi-checksum-mpu') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + ChecksumAlgorithm='CRC32C') + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=TEST_BODY, + # Both valid! + ChecksumCRC32=TestObjectChecksumCRC32.EXPECTED, + ChecksumCRC32C=TestObjectChecksumCRC32C.EXPECTED, + ) + resp = caught.exception.response + self.assertEqual(400, resp['ResponseMetadata']['HTTPStatusCode']) + self.assertEqual(resp['Error'], { + 'Code': 'InvalidRequest', + 'Message': ('Expecting a single x-amz-checksum- header. ' + 'Multiple checksum Types are not allowed.'), + }) + # You'd think we ought to be able to validate & store both... + + def test_multipart_mpu(self): + obj_name = self.create_name('multipart-mpu') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + ChecksumAlgorithm='CRC32C') + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + part_body = b'\x00' * 5 * 1024 * 1024 + part_crc32c = base64_str(crc32c(part_body).digest()) + + upload_part_resp = self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=part_body, + ChecksumCRC32C=part_crc32c, + ) + self.assertEqual(200, upload_part_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + # then do another + upload_part_resp = self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=2, + Body=part_body, + ChecksumCRC32C=part_crc32c, + ) + self.assertEqual(200, upload_part_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + + complete_mpu_resp = self.client.complete_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + MultipartUpload={ + 'Parts': [ + { + 'PartNumber': 1, + 'ETag': upload_part_resp['ETag'], + 'ChecksumCRC32C': part_crc32c, + }, + { + 'PartNumber': 2, + 'ETag': upload_part_resp['ETag'], + 'ChecksumCRC32C': part_crc32c, + }, + ], + }, + UploadId=upload_id, + ) + self.assertEqual(200, complete_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + mpu_etag = '"' + hashlib.md5(binascii.unhexlify( + upload_part_resp['ETag'].strip('"')) * 2).hexdigest() + '-2"' + self.assertEqual(mpu_etag, + complete_mpu_resp['ETag']) + + def test_multipart_mpu_no_etags(self): + obj_name = self.create_name('multipart-mpu') + create_mpu_resp = self.client.create_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + ChecksumAlgorithm='CRC32C') + self.assertEqual(200, create_mpu_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + upload_id = create_mpu_resp['UploadId'] + part_body = b'\x00' * 5 * 1024 * 1024 + part_crc32c = base64_str(crc32c(part_body).digest()) + + upload_part_resp = self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=1, + Body=part_body, + ChecksumCRC32C=part_crc32c, + ) + self.assertEqual(200, upload_part_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + # then do another + upload_part_resp = self.client.upload_part( + Bucket=self.bucket_name, + Key=obj_name, + UploadId=upload_id, + PartNumber=2, + Body=part_body, + ChecksumCRC32C=part_crc32c, + ) + self.assertEqual(200, upload_part_resp[ + 'ResponseMetadata']['HTTPStatusCode']) + + with self.assertRaises(botocore.exceptions.ClientError) as caught: + self.client.complete_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + MultipartUpload={ + 'Parts': [ + { + 'PartNumber': 1, + 'ChecksumCRC32C': part_crc32c, + }, + { + 'PartNumber': 2, + 'ChecksumCRC32C': part_crc32c, + }, + ], + }, + UploadId=upload_id, + ) + resp = caught.exception.response + self.assertEqual(400, resp['ResponseMetadata']['HTTPStatusCode']) + self.assertEqual(resp['Error']['Code'], 'MalformedXML') + self.assertEqual( + resp['Error']['Message'], + 'The XML you provided was not well-formed or did not validate ' + 'against our published schema' + ) + abort_resp = self.client.abort_multipart_upload( + Bucket=self.bucket_name, Key=obj_name, + UploadId=upload_id, + ) + self.assertEqual(204, abort_resp[ + 'ResponseMetadata']['HTTPStatusCode']) diff --git a/test/unit/common/middleware/s3api/test_s3api.py b/test/unit/common/middleware/s3api/test_s3api.py index 840d7489a6..6b6ac082f9 100644 --- a/test/unit/common/middleware/s3api/test_s3api.py +++ b/test/unit/common/middleware/s3api/test_s3api.py @@ -15,6 +15,7 @@ # limitations under the License. import base64 +import io import unittest from unittest.mock import patch, MagicMock import calendar @@ -243,6 +244,17 @@ class TestS3ApiMiddleware(S3ApiTestCase): self.assertEqual([(b's3api.test-metric:1|c', ('1.2.3.4', 8125))], client.sendto_calls) + def test_init_logs_checksum_implementation(self): + with mock.patch('swift.common.middleware.s3api.s3api.get_logger', + return_value=self.logger), \ + mock.patch('swift.common.utils.checksum.crc32c_isal') \ + as mock_crc32c: + mock_crc32c.__name__ = 'crc32c_isal' + S3ApiMiddleware(None, {}) + self.assertEqual( + {'info': ['Using crc32c_isal implementation for CRC32C.']}, + self.logger.all_log_lines()) + def test_non_s3_request_passthrough(self): req = Request.blank('/something') status, headers, body = self.call_s3api(req) @@ -320,6 +332,7 @@ class TestS3ApiMiddleware(S3ApiTestCase): 'PATH_INFO': path, 'QUERY_STRING': query_string, 'HTTP_AUTHORIZATION': 'AWS X:Y:Z', + 'wsgi.input': io.BytesIO(), } for header, value in headers.items(): header = 'HTTP_' + header.replace('-', '_').upper() @@ -1263,6 +1276,7 @@ class TestS3ApiMiddleware(S3ApiTestCase): 'Credential=X:Y/20110909/us-east-1/s3/aws4_request, ' 'SignedHeaders=content-md5;content-type;date, ' 'Signature=x', + 'wsgi.input': io.BytesIO(), } fake_time = calendar.timegm((2011, 9, 9, 23, 36, 0)) env.update(environ) diff --git a/test/unit/common/middleware/s3api/test_s3request.py b/test/unit/common/middleware/s3api/test_s3request.py index 53cd5f9812..6d03e17655 100644 --- a/test/unit/common/middleware/s3api/test_s3request.py +++ b/test/unit/common/middleware/s3api/test_s3request.py @@ -12,7 +12,8 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. - +import base64 +import io from datetime import timedelta import hashlib from unittest.mock import patch, MagicMock @@ -23,23 +24,25 @@ from io import BytesIO from swift.common import swob from swift.common.middleware.s3api import s3request, s3response, controllers +from swift.common.middleware.s3api.exception import S3InputChecksumMismatch from swift.common.swob import Request, HTTPNoContent from swift.common.middleware.s3api.utils import mktime, Config from swift.common.middleware.s3api.acl_handlers import get_acl_handler from swift.common.middleware.s3api.subresource import ACL, User, Owner, \ Grant, encode_acl -from test.unit.common.middleware.s3api.test_s3api import S3ApiTestCase from swift.common.middleware.s3api.s3request import S3Request, \ S3AclRequest, SigV4Request, SIGV4_X_AMZ_DATE_FORMAT, HashingInput, \ ChunkReader, StreamingInput, S3InputSHA256Mismatch, \ - S3InputChunkSignatureMismatch + S3InputChunkSignatureMismatch, _get_checksum_hasher from swift.common.middleware.s3api.s3response import InvalidArgument, \ NoSuchBucket, InternalError, ServiceUnavailable, \ AccessDenied, SignatureDoesNotMatch, RequestTimeTooSkewed, \ InvalidPartArgument, InvalidPartNumber, InvalidRequest, \ - XAmzContentSHA256Mismatch, ErrorResponse - + XAmzContentSHA256Mismatch, ErrorResponse, S3NotImplemented +from swift.common.utils import checksum from test.debug_logger import debug_logger +from test.unit import requires_crc32c +from test.unit.common.middleware.s3api.test_s3api import S3ApiTestCase Fake_ACL_MAP = { # HEAD Bucket @@ -1371,6 +1374,41 @@ class TestRequest(S3ApiTestCase): resp_body = sigv4_req.environ['wsgi.input'].read() self.assertEqual(body, resp_body) + def test_sig_v4_unsgnd_pyld_crc32_ok(self): + body = b'abcdefghijklmnopqrstuvwxyz\n' + crc = base64.b64encode(checksum.crc32(body).digest()) + sigv4_req = self._test_sig_v4_unsigned_payload( + body=body, + extra_headers={'X-Amz-Checksum-Crc32': crc} + ) + resp_body = sigv4_req.environ['wsgi.input'].read() + self.assertEqual(body, resp_body) + + def test_sig_v4_unsgnd_pyld_crc32_mismatch(self): + body = b'abcdefghijklmnopqrstuvwxyz\n' + crc = base64.b64encode(checksum.crc32(b'not the body').digest()) + sigv4_req = self._test_sig_v4_unsigned_payload( + body=body, + extra_headers={'X-Amz-Checksum-Crc32': crc} + ) + with self.assertRaises(S3InputChecksumMismatch): + sigv4_req.environ['wsgi.input'].read() + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_unsgnd_pyld_crc32_invalid(self): + req = self._make_sig_v4_unsigned_payload_req( + extra_headers={'X-Amz-Checksum-Crc32': 'not a crc'} + ) + with self.assertRaises(s3request.InvalidRequest): + SigV4Request(req.environ) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_unsgnd_pyld_declares_crc32_trailer(self): + req = self._make_sig_v4_unsigned_payload_req( + extra_headers={'X-Amz-Trailer': 'x-amz-checksum-crc32'}) + with self.assertRaises(s3request.MalformedTrailerError): + SigV4Request(req.environ) + def _make_valid_v4_streaming_hmac_sha256_payload_request(self): environ = { 'HTTP_HOST': 's3.test.com', @@ -1579,7 +1617,7 @@ class TestRequest(S3ApiTestCase): sigv4_req = SigV4Request(req.environ) # Verify header signature self.assertTrue(sigv4_req.sig_checker.check_signature('secret')) - return req + return sigv4_req def test_check_sig_v4_streaming_aws_hmac_sha256_payload_trailer_ok(self): body = 'a;chunk-signature=c9dd07703599d3d0bd51c96193110756d4f7091d5a' \ @@ -1590,9 +1628,10 @@ class TestRequest(S3ApiTestCase): '873b4142cf9d815360abc0\r\nuvwxyz\n\r\n' \ '0;chunk-signature=b1ff1f86dccfbe9bcc80011e2b87b72e43e0c7f543' \ 'bb93612c06f9808ccb772e\r\n' \ - 'x-amz-checksum-sha256:foo\r\n' \ - 'x-amz-trailer-signature:347dd27b77f240eee9904e9aaaa10acb955a' \ - 'd1bd0d6dd2e2c64794195eb5535b\r\n' + 'x-amz-checksum-sha256:EBCn52FhCYCsWRNZyHH3JN4VDyNEDrtZWaxMBy' \ + 'TJHZE=\r\n' \ + 'x-amz-trailer-signature:1212d72cb487bf08ed25d1329dc93f65fde0' \ + 'dcb21739a48f3182c86cfe79737b\r\n' req = self._test_sig_v4_streaming_aws_hmac_sha256_payload_trailer(body) self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', req.environ['wsgi.input'].read()) @@ -1626,13 +1665,16 @@ class TestRequest(S3ApiTestCase): with self.assertRaises(s3request.S3InputChunkSignatureMismatch): req.environ['wsgi.input'].read(10) - @patch.object(S3Request, '_validate_dates', lambda *a: None) - def _test_sig_v4_streaming_unsigned_payload_trailer( - self, body, x_amz_trailer='x-amz-checksum-sha256'): + def _make_sig_v4_streaming_unsigned_payload_trailer_req( + self, body=None, wsgi_input=None, extra_headers=None): environ = { 'HTTP_HOST': 's3.test.com', 'REQUEST_METHOD': 'PUT', 'RAW_PATH_INFO': '/test/file'} + if body: + body = body.encode('utf8') + elif wsgi_input: + environ['wsgi.input'] = wsgi_input headers = { 'Authorization': 'AWS4-HMAC-SHA256 ' @@ -1648,50 +1690,62 @@ class TestRequest(S3ApiTestCase): 'X-Amz-Date': '20220330T095351Z', 'X-Amz-Decoded-Content-Length': '27', } - if x_amz_trailer is not None: - headers['X-Amz-Trailer'] = x_amz_trailer - req = Request.blank(environ['RAW_PATH_INFO'], environ=environ, - headers=headers, body=body.encode('utf8')) + if extra_headers: + headers.update(extra_headers) + return Request.blank(environ['RAW_PATH_INFO'], environ=environ, + headers=headers, body=body) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def _test_sig_v4_streaming_unsigned_payload_trailer( + self, body=None, x_amz_trailer='x-amz-checksum-sha256'): + if x_amz_trailer is None: + headers = {} + else: + headers = {'X-Amz-Trailer': x_amz_trailer} + + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, extra_headers=headers) sigv4_req = SigV4Request(req.environ) # Verify header signature self.assertTrue(sigv4_req.sig_checker.check_signature('secret')) - return req + return sigv4_req - def test_check_sig_v4_streaming_unsigned_payload_trailer_ok(self): + def test_sig_v4_strm_unsgnd_pyld_trl_ok(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' \ - 'x-amz-checksum-sha256:foo\r\n' - req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) + 'x-amz-checksum-sha256:EBCn52FhCYCsWRNZyHH3JN4VDyNEDrtZWaxMB' \ + 'yTJHZE=\r\n' + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', - req.environ['wsgi.input'].read()) + s3req.environ['wsgi.input'].read()) - def test_check_sig_v4_streaming_unsigned_payload_trailer_none_ok(self): + def test_sig_v4_strm_unsgnd_pyld_trl_none_ok(self): # verify it's ok to not send any trailer body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' - req = self._test_sig_v4_streaming_unsigned_payload_trailer( + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer( body, x_amz_trailer=None) self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', - req.environ['wsgi.input'].read()) + s3req.environ['wsgi.input'].read()) - def test_check_sig_v4_streaming_unsigned_payload_trailer_undeclared(self): + def test_sig_v4_strm_unsgnd_pyld_trl_undeclared(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' \ 'x-amz-checksum-sha256:undeclared\r\n' - req = self._test_sig_v4_streaming_unsigned_payload_trailer( + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer( body, x_amz_trailer=None) self.assertEqual(b'abcdefghijklmnopqrst', - req.environ['wsgi.input'].read(20)) + s3req.environ['wsgi.input'].read(20)) with self.assertRaises(s3request.S3InputIncomplete): - req.environ['wsgi.input'].read() + s3req.environ['wsgi.input'].read() - def test_check_sig_v4_streaming_unsigned_payload_trailer_multiple(self): + def test_sig_v4_strm_unsgnd_pyld_trl_multiple(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ @@ -1702,7 +1756,56 @@ class TestRequest(S3ApiTestCase): body, x_amz_trailer='x-amz-checksum-sha256,x-amz-checksum-crc32') - def test_check_sig_v4_streaming_unsigned_payload_trailer_mismatch(self): + def test_sig_v4_strm_unsgnd_pyld_trl_with_commas_invalid(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' \ + 'x-amz-checksum-sha256:undeclared\r\n' + with self.assertRaises(s3request.InvalidRequest): + self._test_sig_v4_streaming_unsigned_payload_trailer( + body, + x_amz_trailer=', x-amz-checksum-crc32, ,') + with self.assertRaises(s3request.InvalidRequest): + self._test_sig_v4_streaming_unsigned_payload_trailer( + body, + x_amz_trailer=', x-amz-checksum-crc32') + with self.assertRaises(s3request.InvalidRequest): + self._test_sig_v4_streaming_unsigned_payload_trailer( + body, + x_amz_trailer=',x-amz-checksum-crc32') + with self.assertRaises(s3request.InvalidRequest): + self._test_sig_v4_streaming_unsigned_payload_trailer( + body, + x_amz_trailer='x-amz-checksum-crc32, ,') + + def test_sig_v4_strm_unsgnd_pyld_trl_with_commas_ok(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' \ + 'x-amz-checksum-sha256:EBCn52FhCYCsWRNZyHH3JN4VDyNEDrtZWaxMB' \ + 'yTJHZE=\r\n' + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer( + body, x_amz_trailer='x-amz-checksum-sha256, ') + self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', + s3req.environ['wsgi.input'].read()) + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer( + body, x_amz_trailer='x-amz-checksum-sha256,,') + self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', + s3req.environ['wsgi.input'].read()) + + def test_sig_v4_strm_unsgnd_pyld_trl_unrecognised(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + with self.assertRaises(s3request.InvalidRequest): + self._test_sig_v4_streaming_unsigned_payload_trailer( + body, + x_amz_trailer='x-amz-content-sha256') + + def test_sig_v4_strm_unsgnd_pyld_trl_mismatch(self): # the unexpected footer is detected before the incomplete line body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ @@ -1710,66 +1813,312 @@ class TestRequest(S3ApiTestCase): '0\r\n' \ 'x-amz-checksum-not-sha256:foo\r\n' \ 'x-' - req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) self.assertEqual(b'abcdefghijklmnopqrst', - req.environ['wsgi.input'].read(20)) + s3req.environ['wsgi.input'].read(20)) # trailers are read with penultimate chunk?? with self.assertRaises(s3request.S3InputMalformedTrailer): - req.environ['wsgi.input'].read() + s3req.environ['wsgi.input'].read() - def test_check_sig_v4_streaming_unsigned_payload_trailer_missing(self): + def test_sig_v4_strm_unsgnd_pyld_trl_missing(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' \ '\r\n' - req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) self.assertEqual(b'abcdefghijklmnopqrst', - req.environ['wsgi.input'].read(20)) + s3req.environ['wsgi.input'].read(20)) # trailers are read with penultimate chunk?? with self.assertRaises(s3request.S3InputMalformedTrailer): - req.environ['wsgi.input'].read() + s3req.environ['wsgi.input'].read() - def test_check_sig_v4_streaming_unsigned_payload_trailer_extra(self): + def test_sig_v4_strm_unsgnd_pyld_trl_extra(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' \ 'x-amz-checksum-crc32:foo\r\n' \ 'x-amz-checksum-sha32:foo\r\n' - req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) self.assertEqual(b'abcdefghijklmnopqrst', - req.environ['wsgi.input'].read(20)) + s3req.environ['wsgi.input'].read(20)) # trailers are read with penultimate chunk?? with self.assertRaises(s3request.S3InputMalformedTrailer): - req.environ['wsgi.input'].read() + s3req.environ['wsgi.input'].read() - def test_check_sig_v4_streaming_unsigned_payload_trailer_duplicate(self): + def test_sig_v4_strm_unsgnd_pyld_trl_duplicate(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' \ 'x-amz-checksum-sha256:foo\r\n' \ - 'x-amz-checksum-sha256:bar\r\n' - req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) + 'x-amz-checksum-sha256:EBCn52FhCYCsWRNZyHH3JN4VDyNEDrtZWaxMB' \ + 'yTJHZE=\r\n' + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) self.assertEqual(b'abcdefghijklmnopqrst', - req.environ['wsgi.input'].read(20)) + s3req.environ['wsgi.input'].read(20)) # Reading the rest succeeds! AWS would complain about the checksum, # but we aren't looking at it (yet) - req.environ['wsgi.input'].read() + s3req.environ['wsgi.input'].read() - def test_check_sig_v4_streaming_unsigned_payload_trailer_short(self): + def test_sig_v4_strm_unsgnd_pyld_trl_short(self): body = 'a\r\nabcdefghij\r\n' \ 'a\r\nklmnopqrst\r\n' \ '7\r\nuvwxyz\n\r\n' \ '0\r\n' \ 'x-amz-checksum-sha256' - req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) self.assertEqual(b'abcdefghijklmnopqrst', - req.environ['wsgi.input'].read(20)) + s3req.environ['wsgi.input'].read(20)) # trailers are read with penultimate chunk?? with self.assertRaises(s3request.S3InputIncomplete): - req.environ['wsgi.input'].read() + s3req.environ['wsgi.input'].read() + + def test_sig_v4_strm_unsgnd_pyld_trl_invalid(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' \ + 'x-amz-checksum-sha256: not=base-64\r\n' + s3req = self._test_sig_v4_streaming_unsigned_payload_trailer(body) + self.assertEqual(b'abcdefghijklmnopqrst', + s3req.environ['wsgi.input'].read(20)) + with self.assertRaises(s3request.S3InputChecksumTrailerInvalid): + s3req.environ['wsgi.input'].read() + + # ...which in context gets translated to a 400 response + with self.assertRaises(s3response.InvalidRequest) as cm, \ + s3req.translate_read_errors(): + s3req.environ['wsgi.input'].read() + self.assertIn( + 'Value for x-amz-checksum-sha256 trailing header is invalid.', + str(cm.exception.body)) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_sha256_ok(self): + # TODO: do we already have coverage for this? + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + headers = { + 'x-amz-checksum-sha256': + 'EBCn52FhCYCsWRNZyHH3JN4VDyNEDrtZWaxMByTJHZE=', + } + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers=headers + ) + sigv4_req = SigV4Request(req.environ) + self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', + sigv4_req.environ['wsgi.input'].read()) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_sha256_mismatch(self): + # TODO: do we already have coverage for this? + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + headers = { + 'x-amz-sdk-checksum-algorithm': 'sha256', + 'x-amz-checksum-sha256': + 'BADBADBADBADWRNZyHH3JN4VDyNEDrtZWaxMByTJHZE=', + } + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers=headers + ) + sigv4_req = SigV4Request(req.environ) + with self.assertRaises(s3request.BadDigest) as cm, \ + sigv4_req.translate_read_errors(): + sigv4_req.environ['wsgi.input'].read() + self.assertIn('The SHA256 you specified did not match the calculated ' + 'checksum.', str(cm.exception.body)) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc32_ok(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode( + checksum.crc32(b'abcdefghijklmnopqrstuvwxyz\n').digest()) + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-crc32': crc} + ) + sigv4_req = SigV4Request(req.environ) + self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', + sigv4_req.environ['wsgi.input'].read()) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc32_mismatch(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode(checksum.crc32(b'not-the-body').digest()) + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-crc32': crc} + ) + sigv4_req = SigV4Request(req.environ) + with self.assertRaises(S3InputChecksumMismatch): + sigv4_req.environ['wsgi.input'].read() + + @requires_crc32c + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc32c_ok(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode( + checksum.crc32c(b'abcdefghijklmnopqrstuvwxyz\n').digest()) + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-crc32c': crc} + ) + sigv4_req = SigV4Request(req.environ) + self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', + sigv4_req.environ['wsgi.input'].read()) + + @requires_crc32c + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc32c_mismatch(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode(checksum.crc32c(b'not-the-body').digest()) + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-crc32c': crc} + ) + sigv4_req = SigV4Request(req.environ) + with self.assertRaises(S3InputChecksumMismatch): + sigv4_req.environ['wsgi.input'].read() + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_valid(self): + # apparently valid value provokes the not implemented error + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode(b'12345678') + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-crc64nvme': crc} + ) + with self.assertRaises(S3NotImplemented) as cm: + SigV4Request(req.environ) + self.assertIn( + b'The x-amz-checksum-crc64nvme algorithm is not supported.', + cm.exception.body) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_crc64nvme_invalid(self): + # the not implemented error is raised before the value is validated + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-crc64nvme': 'not-a-valid-crc'} + ) + with self.assertRaises(S3NotImplemented) as cm: + SigV4Request(req.environ) + self.assertIn( + b'The x-amz-checksum-crc64nvme algorithm is not supported.', + cm.exception.body) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_sha1_ok(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode( + hashlib.sha1(b'abcdefghijklmnopqrstuvwxyz\n').digest()) + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-sha1': crc} + ) + sigv4_req = SigV4Request(req.environ) + self.assertEqual(b'abcdefghijklmnopqrstuvwxyz\n', + sigv4_req.environ['wsgi.input'].read()) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_sha1_mismatch(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode(hashlib.sha1(b'not-the-body').digest()) + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-sha1': crc} + ) + sigv4_req = SigV4Request(req.environ) + with self.assertRaises(S3InputChecksumMismatch): + sigv4_req.environ['wsgi.input'].read() + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_unsupported(self): + body = 'a\r\nabcdefghij\r\n' \ + 'a\r\nklmnopqrst\r\n' \ + '7\r\nuvwxyz\n\r\n' \ + '0\r\n' + crc = base64.b64encode( + checksum.crc32c(b'abcdefghijklmnopqrstuvwxyz\n').digest()) + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + body=body, + extra_headers={'x-amz-checksum-crc32c': crc} + ) + with patch('swift.common.middleware.s3api.s3request.checksum.' + '_select_crc32c_impl', side_effect=NotImplementedError): + with self.assertRaises(S3NotImplemented): + SigV4Request(req.environ) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_hdr_and_trailer(self): + wsgi_input = io.BytesIO(b'123') + self.assertEqual(0, wsgi_input.tell()) + headers = { + 'x-amz-checksum-sha256': + 'EBCn52FhCYCsWRNZyHH3JN4VDyNEDrtZWaxMByTJHZE=', + 'x-amz-trailer': 'x-amz-checksum-sha256' + } + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + wsgi_input=wsgi_input, + extra_headers=headers + ) + with self.assertRaises(InvalidRequest) as cm: + SigV4Request(req.environ) + self.assertIn('Expecting a single x-amz-checksum- header', + str(cm.exception.body)) + + @patch.object(S3Request, '_validate_dates', lambda *a: None) + def test_sig_v4_strm_unsgnd_pyld_trl_checksum_algo_mismatch(self): + wsgi_input = io.BytesIO(b'123') + self.assertEqual(0, wsgi_input.tell()) + headers = { + 'x-amz-sdk-checksum-algorithm': 'crc32', + 'x-amz-checksum-sha256': + 'EBCn52FhCYCsWRNZyHH3JN4VDyNEDrtZWaxMByTJHZE=', + } + req = self._make_sig_v4_streaming_unsigned_payload_trailer_req( + wsgi_input=wsgi_input, + extra_headers=headers + ) + with self.assertRaises(InvalidRequest) as cm: + SigV4Request(req.environ) + self.assertIn('Value for x-amz-sdk-checksum-algorithm header is ' + 'invalid.', str(cm.exception.body)) class TestSigV4Request(S3ApiTestCase): @@ -2537,5 +2886,26 @@ class TestStreamingInput(S3ApiTestCase): do_test('skunk-cignature=ok', s3request.S3InputChunkSignatureMismatch) +class TestModuleFunctions(unittest.TestCase): + def test_get_checksum_hasher(self): + def do_test(crc): + hasher = _get_checksum_hasher('x-amz-checksum-%s' % crc) + self.assertEqual(crc, hasher.name) + + do_test('crc32') + do_test('crc32c') + do_test('sha1') + do_test('sha256') + + def test_get_checksum_hasher_invalid(self): + def do_test(crc): + with self.assertRaises(s3response.S3NotImplemented): + _get_checksum_hasher('x-amz-checksum-%s' % crc) + + do_test('crc64nvme') + do_test('nonsense') + do_test('') + + if __name__ == '__main__': unittest.main() diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index abb22dbd9d..cb5bdfbcf7 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -2445,6 +2445,28 @@ cluster_dfw1 = http://dfw1.host/v1/ self.fail('Invalid results from pure function:\n%s' % '\n'.join(failures)) + def test_strict_b64decode_allow_line_breaks(self): + with self.assertRaises(ValueError): + utils.strict_b64decode(b'AA\nA=') + self.assertEqual( + b'\x00\x00', + utils.strict_b64decode(b'AA\nA=', allow_line_breaks=True)) + + def test_strict_b64decode_exact_size(self): + self.assertEqual(b'\x00\x00', + utils.strict_b64decode(b'AAA=')) + self.assertEqual(b'\x00\x00', + utils.strict_b64decode(b'AAA=', exact_size=2)) + with self.assertRaises(ValueError): + utils.strict_b64decode(b'AAA=', exact_size=1) + with self.assertRaises(ValueError): + utils.strict_b64decode(b'AAA=', exact_size=3) + + def test_base64_str(self): + self.assertEqual('Zm9v', utils.base64_str(b'foo')) + self.assertEqual('Zm9vZA==', utils.base64_str(b'food')) + self.assertEqual('IGZvbw==', utils.base64_str(b' foo')) + def test_cap_length(self): self.assertEqual(utils.cap_length(None, 3), None) self.assertEqual(utils.cap_length('', 3), '')