From 42b4cdc53844f153bb26498ca661d4d6e3e5fa99 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Wed, 15 Aug 2018 23:59:16 +0000 Subject: [PATCH] s3api: Include '-' in S3 ETags of normal SLOs Ordinary objects in S3 use the MD5 of the object as the ETag, just like Swift. Multipart Uploads follow a different format, notably including a dash followed by the number of segments. Several clients use this difference to change their behavior based upon the presence of a dash in object ETags, not only during object download but during upload and listing, too. In particular, this can disable upload/download integrity checks or cause the client to issue HEAD requests to see whether the MD5 was stored in metadata on the object. The goal of this patch is to get as many of the benefits of the dash as we can, even for data that was uploaded via the Swift API or that predated the related-changes. To that end (and for S3 API requests *only*), look for any indication that an object may be an SLO and tack on a literal '-N' to the end of the ETag. Why 'N'? Two main reasons: - We don't necessarily know how many segments there are, and don't want to use additional backend requests to find out (particularly when it would require *multiple* additional requests as in a bucket listing). - We don't want to provide an arbitrary number (as ProxyFS does [1]) because it would look *too much* like an S3 ETag, and if any client actually cares about the *exact* ETag generation algorithm, I'd prefer to have a way to distinguish between the two. This modified ETag will be used in key GET/HEAD responses via the S3 API, where SLOs are always indicated with a X-Static-Large-Object header. Either the modified or original ETag may be used for conditional requests via the S3 API. Bucket listings via the S3 API *may* present the modified ETag, but only if the JSON container listing includes an 'slo_etag' key for the object; see the related SLO patch for when that started happening. There should be no impact for the Swift API. [1] https://github.com/swiftstack/ProxyFS/blob/1.6.4/pfs_middleware/pfs_middleware/middleware.py#L443-L455 Change-Id: If4c47d7b13dcb4b3d04c52bb08b15ca2069cd15c Related-Change: Ibe68c44bef6c17605863e9084503e8f5dc577fab Related-Change: I67478923619b00ec1a37d56b6fec6a218453dafc --- .../middleware/s3api/controllers/bucket.py | 6 ++++ .../middleware/s3api/controllers/obj.py | 17 +++++++++-- swift/common/middleware/s3api/s3response.py | 6 ++++ .../common/middleware/s3api/test_bucket.py | 28 ++++++++++--------- .../middleware/s3api/test_s3response.py | 5 +++- 5 files changed, 45 insertions(+), 17 deletions(-) diff --git a/swift/common/middleware/s3api/controllers/bucket.py b/swift/common/middleware/s3api/controllers/bucket.py index c996a5ee74..b4f662acb5 100644 --- a/swift/common/middleware/s3api/controllers/bucket.py +++ b/swift/common/middleware/s3api/controllers/bucket.py @@ -225,8 +225,14 @@ class BucketController(Controller): if 's3_etag' in o: # New-enough MUs are already in the right format etag = o['s3_etag'] + elif 'slo_etag' in o: + # SLOs may be in something *close* to the MU format + etag = '"%s-N"' % o['slo_etag'].strip('"') else: + # Normal objects just use the MD5 etag = '"%s"' % o['hash'] + # This also catches sufficiently-old SLOs, but we have + # no way to identify those from container listings SubElement(contents, 'ETag').text = etag SubElement(contents, 'Size').text = str(o['bytes']) if fetch_owner or listing_type != 'version-2': diff --git a/swift/common/middleware/s3api/controllers/obj.py b/swift/common/middleware/s3api/controllers/obj.py index e199482b14..0a5a341d50 100644 --- a/swift/common/middleware/s3api/controllers/obj.py +++ b/swift/common/middleware/s3api/controllers/obj.py @@ -16,7 +16,7 @@ from swift.common.http import HTTP_OK, HTTP_PARTIAL_CONTENT, HTTP_NO_CONTENT from swift.common.request_helpers import update_etag_is_at_header from swift.common.swob import Range, content_range_header_value -from swift.common.utils import public +from swift.common.utils import public, list_from_csv from swift.common.middleware.s3api.utils import S3Timestamp, sysmeta_header from swift.common.middleware.s3api.controllers.base import Controller @@ -62,8 +62,19 @@ class ObjectController(Controller): return resp def GETorHEAD(self, req): - if any(match_header in req.headers - for match_header in ('if-match', 'if-none-match')): + had_match = False + for match_header in ('if-match', 'if-none-match'): + if match_header not in req.headers: + continue + had_match = True + for value in list_from_csv(req.headers[match_header]): + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + if value.endswith('-N'): + # Deal with fake S3-like etags for SLOs uploaded via Swift + req.headers[match_header] += ', ' + value[:-2] + + if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) diff --git a/swift/common/middleware/s3api/s3response.py b/swift/common/middleware/s3api/s3response.py index da79ce3b54..23192ef2b4 100644 --- a/swift/common/middleware/s3api/s3response.py +++ b/swift/common/middleware/s3api/s3response.py @@ -138,6 +138,12 @@ class S3Response(S3ResponseBase, swob.Response): # Multipart uploads in AWS have ETags like # - headers['etag'] = override_etag + elif self.is_slo and 'etag' in headers: + # Many AWS clients use the presence of a '-' to decide whether + # to attempt client-side download validation, so even if we + # didn't store the AWS-style header, tack on a '-N'. (Use 'N' + # because we don't actually know how many parts there are.) + headers['etag'] += '-N' self.headers = headers diff --git a/test/unit/common/middleware/s3api/test_bucket.py b/test/unit/common/middleware/s3api/test_bucket.py index d48deca9a1..b53d936f17 100644 --- a/test/unit/common/middleware/s3api/test_bucket.py +++ b/test/unit/common/middleware/s3api/test_bucket.py @@ -42,6 +42,8 @@ class TestS3ApiBucket(S3ApiTestCase): (u'lily-\u062a', '2011-01-05T02:19:14.275290', 0, 390), ('mu', '2011-01-05T02:19:14.275290', 'md5-of-the-manifest; s3_etag=0', '3909'), + ('slo', '2011-01-05T02:19:14.275290', + 'md5-of-the-manifest', '3909'), ('with space', '2011-01-05T02:19:14.275290', 0, 390), ('with%20space', '2011-01-05T02:19:14.275290', 0, 390)) @@ -49,6 +51,7 @@ class TestS3ApiBucket(S3ApiTestCase): {'name': item[0], 'last_modified': str(item[1]), 'hash': str(item[2]), 'bytes': str(item[3])} for item in self.objects] + objects[5]['slo_etag'] = '"0"' object_list = json.dumps(objects) self.prefixes = ['rose', 'viola', 'lily'] @@ -159,15 +162,14 @@ class TestS3ApiBucket(S3ApiTestCase): objects = elem.iterchildren('Contents') - names = [] + items = [] for o in objects: - names.append(o.find('./Key').text) + items.append((o.find('./Key').text, o.find('./ETag').text)) self.assertEqual('2011-01-05T02:19:14.275Z', o.find('./LastModified').text) - self.assertEqual('"0"', o.find('./ETag').text) - - self.assertEqual( - names, [obj[0].encode('utf-8') for obj in self.objects]) + self.assertEqual(items, [ + (i[0].encode('utf-8'), '"0-N"' if i[0] == 'slo' else '"0"') + for i in self.objects]) def test_bucket_GET_url_encoded(self): bucket_name = 'junk' @@ -184,16 +186,15 @@ class TestS3ApiBucket(S3ApiTestCase): objects = elem.iterchildren('Contents') - names = [] + items = [] for o in objects: - names.append(o.find('./Key').text) + items.append((o.find('./Key').text, o.find('./ETag').text)) self.assertEqual('2011-01-05T02:19:14.275Z', o.find('./LastModified').text) - self.assertEqual('"0"', o.find('./ETag').text) - self.assertEqual(len(names), len(self.objects)) - for i in self.objects: - self.assertIn(quote(i[0].encode('utf-8')), names) + self.assertEqual(items, [ + (quote(i[0].encode('utf-8')), '"0-N"' if i[0] == 'slo' else '"0"') + for i in self.objects]) def test_bucket_GET_subdir(self): bucket_name = 'junk-subdir' @@ -529,7 +530,8 @@ class TestS3ApiBucket(S3ApiTestCase): self.assertEqual([v.find('./LastModified').text for v in versions], [v[1][:-3] + 'Z' for v in objects]) self.assertEqual([v.find('./ETag').text for v in versions], - ['"0"' for v in objects]) + ['"0-N"' if v[0] == 'slo' else '"0"' + for v in objects]) self.assertEqual([v.find('./Size').text for v in versions], [str(v[3]) for v in objects]) self.assertEqual([v.find('./Owner/ID').text for v in versions], diff --git a/test/unit/common/middleware/s3api/test_s3response.py b/test/unit/common/middleware/s3api/test_s3response.py index 2e2f7d825c..9ffdaf1db1 100644 --- a/test/unit/common/middleware/s3api/test_s3response.py +++ b/test/unit/common/middleware/s3api/test_s3response.py @@ -30,7 +30,10 @@ class TestResponse(unittest.TestCase): 'Etag': 'theetag'}) s3resp = S3Response.from_swift_resp(resp) self.assertEqual(expected, s3resp.is_slo) - self.assertEqual('"theetag"', s3resp.headers['ETag']) + if s3resp.is_slo: + self.assertEqual('"theetag-N"', s3resp.headers['ETag']) + else: + self.assertEqual('"theetag"', s3resp.headers['ETag']) def test_response_s3api_sysmeta_headers(self): for _server_type in ('object', 'container'):