s3api: Include '-' in S3 ETags of normal SLOs
Ordinary objects in S3 use the MD5 of the object as the ETag, just like Swift. Multipart Uploads follow a different format, notably including a dash followed by the number of segments. Several clients use this difference to change their behavior based upon the presence of a dash in object ETags, not only during object download but during upload and listing, too. In particular, this can disable upload/download integrity checks or cause the client to issue HEAD requests to see whether the MD5 was stored in metadata on the object. The goal of this patch is to get as many of the benefits of the dash as we can, even for data that was uploaded via the Swift API or that predated the related-changes. To that end (and for S3 API requests *only*), look for any indication that an object may be an SLO and tack on a literal '-N' to the end of the ETag. Why 'N'? Two main reasons: - We don't necessarily know how many segments there are, and don't want to use additional backend requests to find out (particularly when it would require *multiple* additional requests as in a bucket listing). - We don't want to provide an arbitrary number (as ProxyFS does [1]) because it would look *too much* like an S3 ETag, and if any client actually cares about the *exact* ETag generation algorithm, I'd prefer to have a way to distinguish between the two. This modified ETag will be used in key GET/HEAD responses via the S3 API, where SLOs are always indicated with a X-Static-Large-Object header. Either the modified or original ETag may be used for conditional requests via the S3 API. Bucket listings via the S3 API *may* present the modified ETag, but only if the JSON container listing includes an 'slo_etag' key for the object; see the related SLO patch for when that started happening. There should be no impact for the Swift API. [1] https://github.com/swiftstack/ProxyFS/blob/1.6.4/pfs_middleware/pfs_middleware/middleware.py#L443-L455 Change-Id: If4c47d7b13dcb4b3d04c52bb08b15ca2069cd15c Related-Change: Ibe68c44bef6c17605863e9084503e8f5dc577fab Related-Change: I67478923619b00ec1a37d56b6fec6a218453dafc
This commit is contained in:
@@ -225,8 +225,14 @@ class BucketController(Controller):
|
||||
if 's3_etag' in o:
|
||||
# New-enough MUs are already in the right format
|
||||
etag = o['s3_etag']
|
||||
elif 'slo_etag' in o:
|
||||
# SLOs may be in something *close* to the MU format
|
||||
etag = '"%s-N"' % o['slo_etag'].strip('"')
|
||||
else:
|
||||
# Normal objects just use the MD5
|
||||
etag = '"%s"' % o['hash']
|
||||
# This also catches sufficiently-old SLOs, but we have
|
||||
# no way to identify those from container listings
|
||||
SubElement(contents, 'ETag').text = etag
|
||||
SubElement(contents, 'Size').text = str(o['bytes'])
|
||||
if fetch_owner or listing_type != 'version-2':
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
from swift.common.http import HTTP_OK, HTTP_PARTIAL_CONTENT, HTTP_NO_CONTENT
|
||||
from swift.common.request_helpers import update_etag_is_at_header
|
||||
from swift.common.swob import Range, content_range_header_value
|
||||
from swift.common.utils import public
|
||||
from swift.common.utils import public, list_from_csv
|
||||
|
||||
from swift.common.middleware.s3api.utils import S3Timestamp, sysmeta_header
|
||||
from swift.common.middleware.s3api.controllers.base import Controller
|
||||
@@ -62,8 +62,19 @@ class ObjectController(Controller):
|
||||
return resp
|
||||
|
||||
def GETorHEAD(self, req):
|
||||
if any(match_header in req.headers
|
||||
for match_header in ('if-match', 'if-none-match')):
|
||||
had_match = False
|
||||
for match_header in ('if-match', 'if-none-match'):
|
||||
if match_header not in req.headers:
|
||||
continue
|
||||
had_match = True
|
||||
for value in list_from_csv(req.headers[match_header]):
|
||||
if value.startswith('"') and value.endswith('"'):
|
||||
value = value[1:-1]
|
||||
if value.endswith('-N'):
|
||||
# Deal with fake S3-like etags for SLOs uploaded via Swift
|
||||
req.headers[match_header] += ', ' + value[:-2]
|
||||
|
||||
if had_match:
|
||||
# Update where to look
|
||||
update_etag_is_at_header(req, sysmeta_header('object', 'etag'))
|
||||
|
||||
|
||||
@@ -138,6 +138,12 @@ class S3Response(S3ResponseBase, swob.Response):
|
||||
# Multipart uploads in AWS have ETags like
|
||||
# <MD5(part_etag1 || ... || part_etagN)>-<number of parts>
|
||||
headers['etag'] = override_etag
|
||||
elif self.is_slo and 'etag' in headers:
|
||||
# Many AWS clients use the presence of a '-' to decide whether
|
||||
# to attempt client-side download validation, so even if we
|
||||
# didn't store the AWS-style header, tack on a '-N'. (Use 'N'
|
||||
# because we don't actually know how many parts there are.)
|
||||
headers['etag'] += '-N'
|
||||
|
||||
self.headers = headers
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@ class TestS3ApiBucket(S3ApiTestCase):
|
||||
(u'lily-\u062a', '2011-01-05T02:19:14.275290', 0, 390),
|
||||
('mu', '2011-01-05T02:19:14.275290',
|
||||
'md5-of-the-manifest; s3_etag=0', '3909'),
|
||||
('slo', '2011-01-05T02:19:14.275290',
|
||||
'md5-of-the-manifest', '3909'),
|
||||
('with space', '2011-01-05T02:19:14.275290', 0, 390),
|
||||
('with%20space', '2011-01-05T02:19:14.275290', 0, 390))
|
||||
|
||||
@@ -49,6 +51,7 @@ class TestS3ApiBucket(S3ApiTestCase):
|
||||
{'name': item[0], 'last_modified': str(item[1]),
|
||||
'hash': str(item[2]), 'bytes': str(item[3])}
|
||||
for item in self.objects]
|
||||
objects[5]['slo_etag'] = '"0"'
|
||||
object_list = json.dumps(objects)
|
||||
|
||||
self.prefixes = ['rose', 'viola', 'lily']
|
||||
@@ -159,15 +162,14 @@ class TestS3ApiBucket(S3ApiTestCase):
|
||||
|
||||
objects = elem.iterchildren('Contents')
|
||||
|
||||
names = []
|
||||
items = []
|
||||
for o in objects:
|
||||
names.append(o.find('./Key').text)
|
||||
items.append((o.find('./Key').text, o.find('./ETag').text))
|
||||
self.assertEqual('2011-01-05T02:19:14.275Z',
|
||||
o.find('./LastModified').text)
|
||||
self.assertEqual('"0"', o.find('./ETag').text)
|
||||
|
||||
self.assertEqual(
|
||||
names, [obj[0].encode('utf-8') for obj in self.objects])
|
||||
self.assertEqual(items, [
|
||||
(i[0].encode('utf-8'), '"0-N"' if i[0] == 'slo' else '"0"')
|
||||
for i in self.objects])
|
||||
|
||||
def test_bucket_GET_url_encoded(self):
|
||||
bucket_name = 'junk'
|
||||
@@ -184,16 +186,15 @@ class TestS3ApiBucket(S3ApiTestCase):
|
||||
|
||||
objects = elem.iterchildren('Contents')
|
||||
|
||||
names = []
|
||||
items = []
|
||||
for o in objects:
|
||||
names.append(o.find('./Key').text)
|
||||
items.append((o.find('./Key').text, o.find('./ETag').text))
|
||||
self.assertEqual('2011-01-05T02:19:14.275Z',
|
||||
o.find('./LastModified').text)
|
||||
self.assertEqual('"0"', o.find('./ETag').text)
|
||||
|
||||
self.assertEqual(len(names), len(self.objects))
|
||||
for i in self.objects:
|
||||
self.assertIn(quote(i[0].encode('utf-8')), names)
|
||||
self.assertEqual(items, [
|
||||
(quote(i[0].encode('utf-8')), '"0-N"' if i[0] == 'slo' else '"0"')
|
||||
for i in self.objects])
|
||||
|
||||
def test_bucket_GET_subdir(self):
|
||||
bucket_name = 'junk-subdir'
|
||||
@@ -529,7 +530,8 @@ class TestS3ApiBucket(S3ApiTestCase):
|
||||
self.assertEqual([v.find('./LastModified').text for v in versions],
|
||||
[v[1][:-3] + 'Z' for v in objects])
|
||||
self.assertEqual([v.find('./ETag').text for v in versions],
|
||||
['"0"' for v in objects])
|
||||
['"0-N"' if v[0] == 'slo' else '"0"'
|
||||
for v in objects])
|
||||
self.assertEqual([v.find('./Size').text for v in versions],
|
||||
[str(v[3]) for v in objects])
|
||||
self.assertEqual([v.find('./Owner/ID').text for v in versions],
|
||||
|
||||
@@ -30,7 +30,10 @@ class TestResponse(unittest.TestCase):
|
||||
'Etag': 'theetag'})
|
||||
s3resp = S3Response.from_swift_resp(resp)
|
||||
self.assertEqual(expected, s3resp.is_slo)
|
||||
self.assertEqual('"theetag"', s3resp.headers['ETag'])
|
||||
if s3resp.is_slo:
|
||||
self.assertEqual('"theetag-N"', s3resp.headers['ETag'])
|
||||
else:
|
||||
self.assertEqual('"theetag"', s3resp.headers['ETag'])
|
||||
|
||||
def test_response_s3api_sysmeta_headers(self):
|
||||
for _server_type in ('object', 'container'):
|
||||
|
||||
Reference in New Issue
Block a user