s3api: Include '-' in S3 ETags of normal SLOs

Ordinary objects in S3 use the MD5 of the object as the ETag, just like
Swift. Multipart Uploads follow a different format, notably including a
dash followed by the number of segments.

Several clients use this difference to change their behavior based upon
the presence of a dash in object ETags, not only during object download
but during upload and listing, too. In particular, this can disable
upload/download integrity checks or cause the client to issue HEAD
requests to see whether the MD5 was stored in metadata on the object.

The goal of this patch is to get as many of the benefits of the dash as
we can, even for data that was uploaded via the Swift API or that
predated the related-changes. To that end (and for S3 API requests
*only*), look for any indication that an object may be an SLO and tack
on a literal '-N' to the end of the ETag. Why 'N'? Two main reasons:

 - We don't necessarily know how many segments there are, and don't want
   to use additional backend requests to find out (particularly when it
   would require *multiple* additional requests as in a bucket listing).
 - We don't want to provide an arbitrary number (as ProxyFS does [1])
   because it would look *too much* like an S3 ETag, and if any client
   actually cares about the *exact* ETag generation algorithm, I'd
   prefer to have a way to distinguish between the two.

This modified ETag will be used in key GET/HEAD responses via the S3
API, where SLOs are always indicated with a X-Static-Large-Object
header. Either the modified or original ETag may be used for conditional
requests via the S3 API. Bucket listings via the S3 API *may* present
the modified ETag, but only if the JSON container listing includes an
'slo_etag' key for the object; see the related SLO patch for when that
started happening.

There should be no impact for the Swift API.

[1] https://github.com/swiftstack/ProxyFS/blob/1.6.4/pfs_middleware/pfs_middleware/middleware.py#L443-L455

Change-Id: If4c47d7b13dcb4b3d04c52bb08b15ca2069cd15c
Related-Change: Ibe68c44bef6c17605863e9084503e8f5dc577fab
Related-Change: I67478923619b00ec1a37d56b6fec6a218453dafc
This commit is contained in:
Tim Burke
2018-08-15 23:59:16 +00:00
committed by Pete Zaitcev
parent 582f0585e8
commit 42b4cdc538
5 changed files with 45 additions and 17 deletions

View File

@@ -225,8 +225,14 @@ class BucketController(Controller):
if 's3_etag' in o:
# New-enough MUs are already in the right format
etag = o['s3_etag']
elif 'slo_etag' in o:
# SLOs may be in something *close* to the MU format
etag = '"%s-N"' % o['slo_etag'].strip('"')
else:
# Normal objects just use the MD5
etag = '"%s"' % o['hash']
# This also catches sufficiently-old SLOs, but we have
# no way to identify those from container listings
SubElement(contents, 'ETag').text = etag
SubElement(contents, 'Size').text = str(o['bytes'])
if fetch_owner or listing_type != 'version-2':

View File

@@ -16,7 +16,7 @@
from swift.common.http import HTTP_OK, HTTP_PARTIAL_CONTENT, HTTP_NO_CONTENT
from swift.common.request_helpers import update_etag_is_at_header
from swift.common.swob import Range, content_range_header_value
from swift.common.utils import public
from swift.common.utils import public, list_from_csv
from swift.common.middleware.s3api.utils import S3Timestamp, sysmeta_header
from swift.common.middleware.s3api.controllers.base import Controller
@@ -62,8 +62,19 @@ class ObjectController(Controller):
return resp
def GETorHEAD(self, req):
if any(match_header in req.headers
for match_header in ('if-match', 'if-none-match')):
had_match = False
for match_header in ('if-match', 'if-none-match'):
if match_header not in req.headers:
continue
had_match = True
for value in list_from_csv(req.headers[match_header]):
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]
if value.endswith('-N'):
# Deal with fake S3-like etags for SLOs uploaded via Swift
req.headers[match_header] += ', ' + value[:-2]
if had_match:
# Update where to look
update_etag_is_at_header(req, sysmeta_header('object', 'etag'))

View File

@@ -138,6 +138,12 @@ class S3Response(S3ResponseBase, swob.Response):
# Multipart uploads in AWS have ETags like
# <MD5(part_etag1 || ... || part_etagN)>-<number of parts>
headers['etag'] = override_etag
elif self.is_slo and 'etag' in headers:
# Many AWS clients use the presence of a '-' to decide whether
# to attempt client-side download validation, so even if we
# didn't store the AWS-style header, tack on a '-N'. (Use 'N'
# because we don't actually know how many parts there are.)
headers['etag'] += '-N'
self.headers = headers

View File

@@ -42,6 +42,8 @@ class TestS3ApiBucket(S3ApiTestCase):
(u'lily-\u062a', '2011-01-05T02:19:14.275290', 0, 390),
('mu', '2011-01-05T02:19:14.275290',
'md5-of-the-manifest; s3_etag=0', '3909'),
('slo', '2011-01-05T02:19:14.275290',
'md5-of-the-manifest', '3909'),
('with space', '2011-01-05T02:19:14.275290', 0, 390),
('with%20space', '2011-01-05T02:19:14.275290', 0, 390))
@@ -49,6 +51,7 @@ class TestS3ApiBucket(S3ApiTestCase):
{'name': item[0], 'last_modified': str(item[1]),
'hash': str(item[2]), 'bytes': str(item[3])}
for item in self.objects]
objects[5]['slo_etag'] = '"0"'
object_list = json.dumps(objects)
self.prefixes = ['rose', 'viola', 'lily']
@@ -159,15 +162,14 @@ class TestS3ApiBucket(S3ApiTestCase):
objects = elem.iterchildren('Contents')
names = []
items = []
for o in objects:
names.append(o.find('./Key').text)
items.append((o.find('./Key').text, o.find('./ETag').text))
self.assertEqual('2011-01-05T02:19:14.275Z',
o.find('./LastModified').text)
self.assertEqual('"0"', o.find('./ETag').text)
self.assertEqual(
names, [obj[0].encode('utf-8') for obj in self.objects])
self.assertEqual(items, [
(i[0].encode('utf-8'), '"0-N"' if i[0] == 'slo' else '"0"')
for i in self.objects])
def test_bucket_GET_url_encoded(self):
bucket_name = 'junk'
@@ -184,16 +186,15 @@ class TestS3ApiBucket(S3ApiTestCase):
objects = elem.iterchildren('Contents')
names = []
items = []
for o in objects:
names.append(o.find('./Key').text)
items.append((o.find('./Key').text, o.find('./ETag').text))
self.assertEqual('2011-01-05T02:19:14.275Z',
o.find('./LastModified').text)
self.assertEqual('"0"', o.find('./ETag').text)
self.assertEqual(len(names), len(self.objects))
for i in self.objects:
self.assertIn(quote(i[0].encode('utf-8')), names)
self.assertEqual(items, [
(quote(i[0].encode('utf-8')), '"0-N"' if i[0] == 'slo' else '"0"')
for i in self.objects])
def test_bucket_GET_subdir(self):
bucket_name = 'junk-subdir'
@@ -529,7 +530,8 @@ class TestS3ApiBucket(S3ApiTestCase):
self.assertEqual([v.find('./LastModified').text for v in versions],
[v[1][:-3] + 'Z' for v in objects])
self.assertEqual([v.find('./ETag').text for v in versions],
['"0"' for v in objects])
['"0-N"' if v[0] == 'slo' else '"0"'
for v in objects])
self.assertEqual([v.find('./Size').text for v in versions],
[str(v[3]) for v in objects])
self.assertEqual([v.find('./Owner/ID').text for v in versions],

View File

@@ -30,7 +30,10 @@ class TestResponse(unittest.TestCase):
'Etag': 'theetag'})
s3resp = S3Response.from_swift_resp(resp)
self.assertEqual(expected, s3resp.is_slo)
self.assertEqual('"theetag"', s3resp.headers['ETag'])
if s3resp.is_slo:
self.assertEqual('"theetag-N"', s3resp.headers['ETag'])
else:
self.assertEqual('"theetag"', s3resp.headers['ETag'])
def test_response_s3api_sysmeta_headers(self):
for _server_type in ('object', 'container'):