From 01d7eef889c23e11a8befa2bd2fecbef9906c135 Mon Sep 17 00:00:00 2001 From: Shreeya Deshpande Date: Thu, 16 Jan 2025 13:40:43 -0500 Subject: [PATCH] Add labeled metrics to s3api Add checksum headers as labels Change-Id: I30418548dfe509d98d3673d3f8bd7b963faedb8b Signed-off-by: Shreeya Deshpande --- swift/common/middleware/s3api/s3api.py | 118 +++- swift/common/middleware/s3api/utils.py | 35 ++ swift/common/utils/__init__.py | 1 + test/unit/common/middleware/s3api/__init__.py | 6 +- .../common/middleware/s3api/test_s3api.py | 571 +++++++++++++++++- .../common/middleware/s3api/test_utils.py | 18 + 6 files changed, 729 insertions(+), 20 deletions(-) diff --git a/swift/common/middleware/s3api/s3api.py b/swift/common/middleware/s3api/s3api.py index 5d3f80f50a..bd928049e0 100644 --- a/swift/common/middleware/s3api/s3api.py +++ b/swift/common/middleware/s3api/s3api.py @@ -145,27 +145,61 @@ import json from paste.deploy import loadwsgi from urllib.parse import parse_qs +from swift.common import swob from swift.common.constraints import valid_api_version from swift.common.middleware.listing_formats import \ MAX_CONTAINER_LISTING_CONTENT_LENGTH from swift.common.request_helpers import append_log_info from swift.common.wsgi import PipelineWrapper, loadcontext, WSGIContext +from swift.common.statsd_client import get_labeled_statsd_client from swift.common.middleware import app_property from swift.common.middleware.s3api.exception import NotS3Request, \ InvalidSubresource -from swift.common.middleware.s3api.s3request import get_request_class +from swift.common.middleware.s3api import s3request from swift.common.middleware.s3api.s3response import ErrorResponse, \ InternalError, MethodNotAllowed, S3ResponseBase, S3NotImplemented from swift.common.utils import get_logger, config_true_value, \ config_positive_int_value, split_path, closing_if_possible, \ list_from_csv, parse_header, checksum -from swift.common.middleware.s3api.utils import Config +from swift.common.middleware.s3api.utils import Config, \ + classify_checksum_header_value, make_header_label from swift.common.middleware.s3api.acl_handlers import get_acl_handler from swift.common.registry import register_swift_info, \ register_sensitive_header, register_sensitive_param +# https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html +WELL_KNOWN_SPECIFIC_SHA256_VALUES = ( + 'UNSIGNED-PAYLOAD', + 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD', + 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER', + 'STREAMING-AWS4-ECDSA-P256-SHA256-PAYLOAD', + 'STREAMING-AWS4-ECDSA-P256-SHA256-PAYLOAD-TRAILER' +) +# https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html +# https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html#AmazonS3-Type-Object-ChecksumAlgorithm +# https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html +# docs are unclear whether the header value is the (un-)hyphenated form + +# algorithms for x-amz-checksum-algorithm/ x-amz-sdk-checksum-algorithm +WELL_KNOWN_CHECKSUM_ALGORITHMS = ( + 'CRC64NVME', + 'CRC32', + 'CRC32C', + 'SHA1', + 'SHA256' +) +WELL_KNOWN_CHECKSUM_HEADERS = ( + 'x-amz-checksum-crc32', + 'x-amz-checksum-crc32c', + 'x-amz-checksum-sha1', + 'x-amz-checksum-sha256', + 'x-amz-checksum-crc64nvme' +) + + class ListingEtagMiddleware(object): def __init__(self, app): self.app = app @@ -299,6 +333,8 @@ class S3ApiMiddleware(object): self.logger = get_logger( wsgi_conf, log_route='s3api', statsd_tail_prefix='s3api') + self.statsd = get_labeled_statsd_client(wsgi_conf, self.logger) + self.check_pipeline(wsgi_conf) checksum.log_selected_implementation(self.logger) @@ -316,7 +352,78 @@ class S3ApiMiddleware(object): # Not S3, apparently return False + def _make_req_header_labels(self, env): + req_headers = swob.HeaderEnvironProxy(env) + labels = {} + for hdr_key, hdr_val in req_headers.items(): + label_val = None + hdr_key = hdr_key.lower() + label_key = make_header_label(hdr_key) + if hdr_key == 'content-encoding': + if 'aws-chunked' in list_from_csv(hdr_val.lower()): + label_val = 'aws-chunked' + elif hdr_key == 'transfer-encoding': + if 'chunked' in list_from_csv(hdr_val.lower()): + label_val = 'chunked' + elif hdr_key == 'x-amz-decoded-content-length': + label_val = True + elif hdr_key == 'x-amz-content-sha256': + if hdr_val in WELL_KNOWN_SPECIFIC_SHA256_VALUES: + label_val = hdr_val + else: + label_val = classify_checksum_header_value(hdr_val) + elif hdr_key == 'content-md5': + label_val = classify_checksum_header_value(hdr_val) + elif hdr_key in s3request.CHECKSUMS_BY_HEADER.keys(): + label_val = classify_checksum_header_value(hdr_val) + elif hdr_key == 'x-amz-trailer': + if hdr_val.lower() in s3request.CHECKSUMS_BY_HEADER.keys(): + label_val = hdr_val.lower() + else: + label_val = 'unknown' + elif hdr_key in ('x-amz-checksum-algorithm', + 'x-amz-sdk-checksum-algorithm'): + hdr_val_normalised = hdr_val.upper().replace('-', '') + if hdr_val_normalised in WELL_KNOWN_CHECKSUM_ALGORITHMS: + label_val = hdr_val_normalised + else: + label_val = 'unknown' + + if label_val is not None: + labels[label_key] = label_val + + return labels + + def _emit_response_header_stats(self, env, resp, labels): + if not labels: + return + + labels['status'] = resp.status_int + labels['method'] = env.get('REQUEST_METHOD') + swift_path = env.get('swift.backend_path') + if swift_path: + vers, acc, con, obj = split_path(swift_path, 1, 4, True) + if obj: + labels['type'] = 'object' + labels['account'] = acc + labels['container'] = con + elif con: + labels['type'] = 'container' + labels['account'] = acc + labels['container'] = con + elif acc: + labels['account'] = acc + labels['type'] = 'account' + else: + labels['type'] = 'UNKNOWN' + else: + labels['type'] = 'UNKNOWN' + + self.statsd.increment("swift_s3_checksum_algo_request", labels=labels) + def __call__(self, env, start_response): + # get metrics header labels before any mutation of the headers + req_header_labels = self._make_req_header_labels(env) origin = env.get('HTTP_ORIGIN') if self.conf.cors_preflight_allow_origin and \ self.is_s3_cors_preflight(env): @@ -346,11 +453,11 @@ class S3ApiMiddleware(object): return [b''] try: - req_class = get_request_class(env, self.conf.s3_acl) + req_class = s3request.get_request_class(env, self.conf.s3_acl) req = req_class(env, self.app, self.conf) resp = self.handle_request(req) except NotS3Request: - resp = self.app + return self.app(env, start_response) except InvalidSubresource as e: self.logger.debug(e.cause) except ErrorResponse as err_resp: @@ -370,6 +477,9 @@ class S3ApiMiddleware(object): if 's3api.backend_path' in env and 'swift.backend_path' not in env: env['swift.backend_path'] = env['s3api.backend_path'] + # emit metric with header labels now path and status may be available + self._emit_response_header_stats(env, resp, req_header_labels) + return resp(env, start_response) def handle_request(self, req): diff --git a/swift/common/middleware/s3api/utils.py b/swift/common/middleware/s3api/utils.py index cee6379974..dc956a189b 100644 --- a/swift/common/middleware/s3api/utils.py +++ b/swift/common/middleware/s3api/utils.py @@ -55,6 +55,10 @@ def snake_to_camel(snake): return snake.title().replace('_', '') +def make_header_label(header): + return 'header_' + header.lower().replace('-', '_') + + def unique_id(): result = base64.urlsafe_b64encode(str(uuid.uuid4()).encode('ascii')) return result.decode('ascii') @@ -72,6 +76,37 @@ def utf8decode(s): return s +def is_valid_base64(s): + try: + base64.b64decode(s) + return True + except Exception: + return False + + +def is_valid_hash(hash_string): + try: + int(hash_string, 16) + except ValueError: + return False + return True + + +def classify_checksum_header_value(value): + if is_valid_hash(value): + if len(value) in (8, 16, 20, 32, 64, 128, 256, 512): + return 'hash_%d' % len(value) + elif is_valid_base64(value): + # crc32 -> b64_8 + # crc64 -> b64_12 + # md5 -> b64_24 + # sha1 -> b64_28 + # sha256 -> b64_44 + if len(value) in (8, 12, 24, 28, 44): + return 'b64_%d' % len(value) + return 'unknown' + + def validate_bucket_name(name, dns_compliant_bucket_names): """ Validates the name of the bucket against S3 criteria, diff --git a/swift/common/utils/__init__.py b/swift/common/utils/__init__.py index ce6318e1f6..b0e29f5b25 100644 --- a/swift/common/utils/__init__.py +++ b/swift/common/utils/__init__.py @@ -187,6 +187,7 @@ SWIFT_CONF_FILE = '/etc/swift/swift.conf' O_TMPFILE = getattr(os, 'O_TMPFILE', 0o20000000 | os.O_DIRECTORY) MD5_OF_EMPTY_STRING = 'd41d8cd98f00b204e9800998ecf8427e' + RESERVED_BYTE = b'\x00' RESERVED_STR = u'\x00' RESERVED = '\x00' diff --git a/test/unit/common/middleware/s3api/__init__.py b/test/unit/common/middleware/s3api/__init__.py index ad425f8c95..9cd4ea8c12 100644 --- a/test/unit/common/middleware/s3api/__init__.py +++ b/test/unit/common/middleware/s3api/__init__.py @@ -30,6 +30,7 @@ from swift.common.middleware.s3api.subresource import Owner, encode_acl, \ Grant, User, ACL, PERMISSIONS, AllUsers, AuthenticatedUsers from test.unit.common.middleware.helpers import FakeSwift +from test.debug_logger import FakeLabeledStatsdClient class FakeAuthApp(object): @@ -118,8 +119,11 @@ class S3ApiTestCase(unittest.TestCase): self.swift = FakeSwift() self.app = self._wrap_app(self.swift) self.app._pipeline_final_app = self.swift - self.s3api = filter_factory({}, **self.conf)(self.app) + with mock.patch('swift.common.statsd_client.LabeledStatsdClient', + FakeLabeledStatsdClient): + self.s3api = filter_factory({}, **self.conf)(self.app) self.logger = self.s3api.logger = self.swift.logger + self.statsd = self.s3api.statsd # if you change the registered acl response for /bucket or # /bucket/object tearDown will complain at you; you can set this to diff --git a/test/unit/common/middleware/s3api/test_s3api.py b/test/unit/common/middleware/s3api/test_s3api.py index 8a16a32db2..056f32edf1 100644 --- a/test/unit/common/middleware/s3api/test_s3api.py +++ b/test/unit/common/middleware/s3api/test_s3api.py @@ -15,6 +15,7 @@ # limitations under the License. import base64 +import hashlib import io import unittest from unittest.mock import patch, MagicMock @@ -41,7 +42,8 @@ from swift.common.utils import md5, get_logger from keystonemiddleware.auth_token import AuthProtocol from keystoneauth1.access import AccessInfoV2 -from test.debug_logger import debug_logger, FakeStatsdClient +from test.debug_logger import debug_logger, FakeStatsdClient, \ + FakeLabeledStatsdClient from test.unit.common.middleware.s3api import S3ApiTestCase from test.unit.common.middleware.helpers import FakeSwift from test.unit.common.middleware.s3api.test_s3token import \ @@ -52,6 +54,9 @@ from swift.common.middleware.s3api.s3api import filter_factory, \ S3ApiMiddleware from swift.common.middleware.s3api.s3token import S3Token +SHA256_OF_EMPTY_STRING = \ + 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' + class TestListingMiddleware(S3ApiTestCase): def test_s3_etag_in_json(self): @@ -255,14 +260,42 @@ class TestS3ApiMiddleware(S3ApiTestCase): mock_crc64nvme.__name__ = 'crc64nvme_isal' S3ApiMiddleware(None, {}) self.assertEqual( - {'info': ['Using crc32c_isal implementation for CRC32C.', - 'Using crc64nvme_isal implementation for CRC64NVME.']}, + { + 'debug': [ + 'Labeled statsd mode: disabled (fake-swift)', + ], + 'info': [ + 'Using crc32c_isal implementation for CRC32C.', + 'Using crc64nvme_isal implementation for CRC64NVME.', + ], + }, self.logger.all_log_lines()) + def test_init_statsd_options_user_labels(self): + conf = { + 'log_statsd_host': 'example.com', + 'log_statsd_port': '1234', + 'statsd_label_mode': 'dogstatsd', + 'statsd_emit_legacy': False, + 'statsd_user_label_userdefined': 'whatever', + } + with mock.patch('swift.common.statsd_client.LabeledStatsdClient', + FakeLabeledStatsdClient): + s3api = S3ApiMiddleware(None, conf) + + statsd = s3api.statsd + self.assertIsInstance(statsd, FakeLabeledStatsdClient) + statsd.increment('baz', labels={'label_foo': 'foo'}) + self.assertEqual( + [(b'baz:1|c|#label_foo:foo,user_userdefined:whatever', + ('example.com', 1234))], + statsd.sendto_calls) + def test_non_s3_request_passthrough(self): req = Request.blank('/something') status, headers, body = self.call_s3api(req) self.assertEqual(body, b'FAKE APP') + self.assertFalse(self.statsd.calls['increment']) def test_bad_format_authorization(self): req = Request.blank('/something', @@ -699,30 +732,52 @@ class TestS3ApiMiddleware(S3ApiTestCase): get_log_info(req.environ)) def test_bucket_virtual_hosted_style(self): - req = Request.blank('/', - environ={'HTTP_HOST': 'bucket.localhost:80', - 'REQUEST_METHOD': 'HEAD', - 'HTTP_AUTHORIZATION': - 'AWS test:tester:hmac'}, - headers={'Date': self.get_date_header()}) + req = Request.blank( + '/', + environ={'HTTP_HOST': 'bucket.localhost:80', + 'REQUEST_METHOD': 'HEAD', + 'HTTP_AUTHORIZATION': + 'AWS test:tester:hmac'}, + headers={'Date': self.get_date_header(), + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING}) status, headers, body = self.call_s3api(req) self.assertEqual(status.split()[0], '200') self.assertIn('swift.backend_path', req.environ) self.assertEqual('/v1/AUTH_test/bucket', req.environ['swift.backend_path']) + exp_labels = {'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'HEAD', + 'type': 'container', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'} + self.assertEqual([(('swift_s3_checksum_algo_request',), + {'labels': exp_labels})], + self.statsd.calls['increment']) def test_object_virtual_hosted_style(self): - req = Request.blank('/object', - environ={'HTTP_HOST': 'bucket.localhost:80', - 'REQUEST_METHOD': 'HEAD', - 'HTTP_AUTHORIZATION': - 'AWS test:tester:hmac'}, - headers={'Date': self.get_date_header()}) + req = Request.blank( + '/object', + environ={'HTTP_HOST': 'bucket.localhost:80', + 'REQUEST_METHOD': 'HEAD', + 'HTTP_AUTHORIZATION': + 'AWS test:tester:hmac'}, + headers={'Date': self.get_date_header(), + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING}) status, headers, body = self.call_s3api(req) self.assertEqual(status.split()[0], '200') self.assertIn('swift.backend_path', req.environ) self.assertEqual('/v1/AUTH_test/bucket/object', req.environ['swift.backend_path']) + exp_labels = {'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'HEAD', + 'type': 'object', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'} + self.assertEqual([(('swift_s3_checksum_algo_request',), + {'labels': exp_labels})], + self.statsd.calls['increment']) def test_token_generation(self): self.swift.register('HEAD', '/v1/AUTH_test/bucket+segments/' @@ -1790,6 +1845,492 @@ class TestS3ApiMiddleware(S3ApiTestCase): ['HEAD /bucket/object s3:err:AccessDenied.invalid_credential'], self.logger.get_lines_for_level('info')) + def _do_test_emit_header_stats(self, extra_headers, + method='PUT', + path='/bucket/object'): + authz_header = 'AWS4-HMAC-SHA256 ' + ', '.join([ + 'Credential=test:tester/%s/us-east-1/s3/aws4_request' % + self.get_v4_amz_date_header().split('T', 1)[0], + 'SignedHeaders=host;x-amz-date', + 'Signature=X', + ]) + headers = { + 'Authorization': authz_header, + 'X-Amz-Date': self.get_v4_amz_date_header(), + 'Content-Type': 'text/plain', + 'Content-Length': '0', + } + headers.update(extra_headers) + req = Request.blank(path, headers=headers, body='') + req.method = method + self.statsd.clear() + + # verify that request headers are sampled before request is handled by + # mocking the controller to mutate the request headers + orig_get_response = S3Request.get_response + captured_envs = [] + + def mock_handler(req, *args, **kwargs): + # note: only requests that succeed in constructing an S3Request + # will reach this handler + captured_envs.append(req) + resp = orig_get_response(req, *args, **kwargs) + for k in extra_headers: + req.headers.pop(k, None) + return resp + + with mock.patch('swift.common.middleware.s3api.s3request.S3Request.' + 'get_response', mock_handler): + _, _, body = self.call_s3api(req) + self.assertEqual([(('swift_s3_checksum_algo_request',), mock.ANY)], + self.statsd.calls['increment']) + kwargs = self.statsd.calls['increment'][0][1] + self.assertIn('labels', kwargs) + return kwargs['labels'] + + def test_emit_stats_x_amx_content_sha256_real_hash(self): + headers = {'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amx_content_sha256_real_hash_GET(self): + # boto3 sends this header with GETs... + headers = {'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + resp_body = json.dumps([]).encode('ascii') + self.swift.register('GET', '/v1/AUTH_test', swob.HTTPOk, {}, resp_body) + labels = self._do_test_emit_header_stats(headers, + method='GET', + path='/') + self.assertEqual({'account': 'AUTH_test', + 'method': 'GET', + 'type': 'account', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + self.swift.register('GET', '/v1/AUTH_test/bucket', swob.HTTPOk, {}, + resp_body) + labels = self._do_test_emit_header_stats(headers, + method='GET', + path='/bucket') + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'GET', + 'type': 'container', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + labels = self._do_test_emit_header_stats(headers, + method='GET', + path='/bucket/object') + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'GET', + 'type': 'object', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + self.swift.register('GET', '/v1/AUTH_test/bucket/object', + swob.HTTPNotFound, {}, "") + labels = self._do_test_emit_header_stats(headers, + method='GET', + path='/bucket/object') + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'GET', + 'type': 'object', + 'status': 404, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amx_checksum_sha256_real_hash(self): + headers = {'X-Amz-Checksum-SHA256': base64.b64encode( + hashlib.sha256().digest())} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_checksum_sha256': 'b64_44'}, + labels) + + def test_emit_stats_x_amx_content_sha256_supported_aliases(self): + def do_test(alias): + headers = {'X-Amz-Content-SHA256': alias} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 200, + 'header_x_amz_content_sha256': alias}, + labels) + + do_test('UNSIGNED-PAYLOAD') + + def test_emit_stats_x_amx_content_sha256_supported_streaming_aliases(self): + def do_test(alias): + headers = {'X-Amz-Content-SHA256': alias, + 'x-amz-decoded-content-length': '0'} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 400, # incomplete payload + 'header_x_amz_decoded_content_length': True, + 'header_x_amz_content_sha256': alias}, + labels) + + do_test('STREAMING-UNSIGNED-PAYLOAD-TRAILER') + do_test('STREAMING-AWS4-HMAC-SHA256-PAYLOAD') + do_test('STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER') + + def test_emit_stats_x_amx_content_sha256_unsupported_aliases(self): + def do_test(alias): + headers = {'X-Amz-Content-SHA256': alias, + 'x-amz-decoded-content-length': '0'} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 501, + 'header_x_amz_decoded_content_length': True, + 'header_x_amz_content_sha256': alias}, + labels) + + do_test('STREAMING-AWS4-ECDSA-P256-SHA256-PAYLOAD') + do_test('STREAMING-AWS4-ECDSA-P256-SHA256-PAYLOAD-TRAILER') + + def test_emit_stats_x_amx_content_sha256_invalid(self): + def do_test(value): + headers = {'X-Amz-Content-SHA256': value} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_content_sha256': 'unknown'}, + labels) + do_test('0' * 63) + do_test('UNSIGNED-NONSENSE') + + def test_emit_stats_content_md5(self): + headers = {'Content-MD5': base64.b64encode(md5(b'').digest()), + # X-Amz-Content-SHA256 is required + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 200, + 'header_content_md5': 'b64_24', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + headers = {'Content-MD5': 'nonsense', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_content_md5': 'b64_8', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_content_encoding(self): + headers = {'Content-Encoding': 'aws-chunked', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 200, + 'header_content_encoding': 'aws-chunked', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + headers = {'Content-Encoding': 'aws-chunked,gzip', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 200, + 'header_content_encoding': 'aws-chunked', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + # s3api sees 'aws-chunked' in 'not-aws-chunked' and treats the + # request as unsupported rather than ignoring 'not-aws-chunked' ! + headers = {'Content-Encoding': 'not-aws-chunked', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_transfer_encoding(self): + headers = {'Transfer-Encoding': 'chunked', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'type': 'object', + 'method': 'PUT', + 'status': 200, + 'header_transfer_encoding': 'chunked', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + headers = {'Transfer-Encoding': 'chunked,gzip', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'type': 'object', + 'method': 'PUT', + 'status': 200, + 'header_transfer_encoding': 'chunked', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + headers = {'Transfer-Encoding': 'aws-chunked', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'type': 'object', + 'method': 'PUT', + 'status': 200, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amz_decoded_content_length(self): + headers = {'X-Amz-Decoded-Content-Length': '123', + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 200, + 'header_x_amz_decoded_content_length': True, + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amz_checksum_crc32(self): + headers = {'X-Amz-Checksum-Crc32': base64.b64encode(b'1234'), + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 400, # bad digest + 'header_x_amz_checksum_crc32': 'b64_8', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + headers = {'X-Amz-Checksum-Crc32': base64.b64encode(b'123'), # bad + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_checksum_crc32': 'unknown', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amz_checksum_crc32c(self): + headers = {'X-Amz-Checksum-Crc32c': base64.b64encode(b'1234'), + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 400, # bad digest + 'header_x_amz_checksum_crc32c': 'b64_8', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + headers = {'X-Amz-Checksum-Crc32c': base64.b64encode(b'123'), # bad + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_checksum_crc32c': 'unknown', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amz_checksum_crc64nvme(self): + headers = {'X-Amz-Checksum-Crc32c': base64.b64encode(b'12345678'), + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'type': 'UNKNOWN', + 'method': 'PUT', + 'status': 400, # bad digest + 'header_x_amz_checksum_crc32c': 'b64_12', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amz_checksum_sha1(self): + headers = {'X-Amz-Checksum-SHA1': base64.b64encode(b'1234' * 5), + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 400, # bad digest + 'header_x_amz_checksum_sha1': 'b64_28', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + headers = {'X-Amz-Checksum-SHA1': base64.b64encode(b'123' * 5), # bad + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, # invalid header value + 'header_x_amz_checksum_sha1': 'unknown', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_multiple_x_amz_checksums(self): + headers = {'X-Amz-Checksum-SHA1': base64.b64encode(b'1234' * 5), + 'X-Amz-Checksum-CRC32': base64.b64encode(b'1234'), + 'X-Amz-Content-SHA256': SHA256_OF_EMPTY_STRING} + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'type': 'UNKNOWN', + 'status': 400, + 'method': 'PUT', + 'header_x_amz_checksum_crc32': 'b64_8', + 'header_x_amz_checksum_sha1': 'b64_28', + 'header_x_amz_content_sha256': 'hash_64'}, + labels) + + def test_emit_stats_x_amz_trailer_unknown(self): + def do_test(header_value): + headers = { + 'X-Amz-Trailer': header_value, + 'X-Amz-Content-SHA256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'x-amz-decoded-content-length': '0' + } + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_decoded_content_length': True, + 'header_x_amz_trailer': 'unknown', + 'header_x_amz_content_sha256': + 'STREAMING-UNSIGNED-PAYLOAD-TRAILER'}, + labels) + + do_test('content-md5') + do_test('x-amz-checksum-sha2') + do_test('content-md5,x-amz-checksum-sha256') + do_test('x-amz-checksum-sha256,x-amz-checksum-crc32') + + def test_emit_stats_x_amz_trailer(self): + def do_test(header_value): + headers = { + 'X-Amz-Trailer': header_value, + 'X-Amz-Content-SHA256': 'STREAMING-UNSIGNED-PAYLOAD-TRAILER', + 'x-amz-decoded-content-length': '0' + } + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'account': 'AUTH_test', + 'container': 'bucket', + 'method': 'PUT', + 'type': 'object', + 'status': 400, # IncompleteBody + 'header_x_amz_decoded_content_length': True, + 'header_x_amz_trailer': header_value, + 'header_x_amz_content_sha256': + 'STREAMING-UNSIGNED-PAYLOAD-TRAILER'}, + labels) + + do_test('x-amz-checksum-crc32') + do_test('x-amz-checksum-crc32c') + with mock.patch('swift.common.utils.checksum.crc64nvme_isal') \ + as mock_crc64nvme: + mock_crc64nvme.__name__ = 'crc64nvme_isal' + do_test('x-amz-checksum-crc64nvme') + do_test('x-amz-checksum-sha1') + do_test('x-amz-checksum-sha256') + + def test_emit_stats_x_amz_sdk_checksum_algorithm(self): + def do_test(algo): + headers = { + 'x-amz-sdk-checksum-algorithm': algo, + } + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_sdk_checksum_algorithm': + algo.replace('-', '')}, + labels) + do_test('CRC32') + do_test('CRC32C') + do_test('CRC64NVME') + do_test('SHA1') + do_test('SHA256') + do_test('CRC-32') + do_test('CRC-32C') + do_test('CRC-64NVME') + do_test('SHA-1') + do_test('SHA-256') + + def test_emit_stats_x_amz_checksum_algorithm(self): + def do_test(algo): + headers = { + 'X-Amz-Checksum-Algorithm': algo, + } + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_checksum_algorithm': + algo.replace('-', '')}, + labels) + do_test('CRC32') + do_test('CRC32C') + do_test('CRC64NVME') + do_test('SHA1') + do_test('SHA256') + do_test('CRC-32') + do_test('CRC-32C') + do_test('CRC-64NVME') + do_test('SHA-1') + do_test('SHA-256') + + def test_emit_stats_x_amz_checksum_algorithm_unknown(self): + headers = { + 'X-Amz-Checksum-Algorithm': 'CRC128', + } + labels = self._do_test_emit_header_stats(headers) + self.assertEqual({'method': 'PUT', + 'type': 'UNKNOWN', + 'status': 400, + 'header_x_amz_checksum_algorithm': 'unknown'}, + labels) + def test_access_user_id_logging(self): # verify that proxy logging gets access_user_id from S3 requests environ = {'REQUEST_METHOD': 'GET'} diff --git a/test/unit/common/middleware/s3api/test_utils.py b/test/unit/common/middleware/s3api/test_utils.py index 79a2233f24..ba6878a6ca 100644 --- a/test/unit/common/middleware/s3api/test_utils.py +++ b/test/unit/common/middleware/s3api/test_utils.py @@ -20,6 +20,7 @@ import unittest from swift.common.swob import Request from swift.common.middleware.s3api import utils, s3request from swift.common.middleware.s3api.exception import InvalidBucketNameParseError +from swift.common.middleware.s3api.utils import make_header_label strs = [ ('Owner', 'owner'), @@ -37,6 +38,23 @@ class TestS3ApiUtils(unittest.TestCase): for s1, s2 in strs: self.assertEqual(s1, utils.snake_to_camel(s2)) + def test_make_header_label(self): + self.assertEqual('header_aa_b_c', make_header_label('Aa-B-C')) + self.assertEqual('header_aa_b_c', make_header_label('AA_B_C')) + self.assertEqual('header_aa_b_c', make_header_label('aA-b-c')) + + def test_classify_checksum_header_value(self): + self.assertEqual( + utils.classify_checksum_header_value('00000000'), 'hash_8') + self.assertEqual( + utils.classify_checksum_header_value('a' * 64), 'hash_64') + self.assertEqual( + utils.classify_checksum_header_value('STUVWXYZ'), 'b64_8') + self.assertEqual( + utils.classify_checksum_header_value('abcdef&1'), 'unknown') + self.assertEqual( + utils.classify_checksum_header_value('z'), 'unknown') + def test_validate_bucket_name(self): # good cases self.assertTrue(utils.validate_bucket_name('bucket', True))