Include SLO ETag in container updates

Container servers will store an etag like

   <MD5 of manifest on disk>; slo_etag=<MD5 on concatenated ETags>

which the SLO middleware will break out into separate

   "hash": "<MD5 of manifest on disk",
   "slo_etag": "\"<MD5 of concatenated ETags\"",

keys for JSON listings. Text and XML listings are unaffected.

If a middleware left of SLO already specified a container update
override, the slo_etag parameter will be appended. If the base header
value was blank, the MD5 of the manifest will be inserted.

SLOs that were created on previous versions of Swift will continue to
just have the MD5 of the manifest in container listings.

Closes-Bug: 1618573
Change-Id: I67478923619b00ec1a37d56b6fec6a218453dafc
This commit is contained in:
Tim Burke 2016-05-20 13:17:16 -07:00
parent 1ab691f637
commit c4c98eb64d
5 changed files with 149 additions and 61 deletions

View File

@ -313,6 +313,7 @@ metadata which can be used for stats and billing purposes.
"""
import base64
from cgi import parse_header
from collections import defaultdict
from datetime import datetime
import json
@ -322,6 +323,8 @@ import six
import time
from hashlib import md5
from swift.common.exceptions import ListingIterError, SegmentError
from swift.common.middleware.listing_formats import \
MAX_CONTAINER_LISTING_CONTENT_LENGTH
from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \
HTTPMethodNotAllowed, HTTPRequestEntityTooLarge, HTTPLengthRequired, \
HTTPOk, HTTPPreconditionFailed, HTTPException, HTTPNotFound, \
@ -1276,6 +1279,14 @@ class StaticLargeObject(object):
'Etag': md5(json_data).hexdigest(),
})
# Ensure container listings have both etags. However, if any
# middleware to the left of us touched the base value, trust them.
override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
val, sep, params = req.headers.get(
override_header, '').partition(';')
req.headers[override_header] = '%s; slo_etag=%s' % (
(val or req.headers['Etag']) + sep + params, slo_etag)
env = req.environ
if not env.get('CONTENT_TYPE'):
guessed_type, _junk = mimetypes.guess_type(req.path_info)
@ -1408,6 +1419,30 @@ class StaticLargeObject(object):
out_content_type=out_content_type)
return resp
def handle_container_listing(self, req, start_response):
resp = req.get_response(self.app)
if not resp.is_success or resp.content_type != 'application/json':
return resp(req.environ, start_response)
if resp.content_length is None or \
resp.content_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH:
return resp(req.environ, start_response)
try:
listing = json.loads(resp.body)
except ValueError:
return resp(req.environ, start_response)
for item in listing:
if 'subdir' in item:
continue
etag, params = parse_header(item['hash'])
if 'slo_etag' in params:
item['slo_etag'] = '"%s"' % params.pop('slo_etag')
item['hash'] = etag + ''.join(
'; %s=%s' % kv for kv in params.items())
resp.body = json.dumps(listing).encode('ascii')
return resp(req.environ, start_response)
def __call__(self, env, start_response):
"""
WSGI entry point
@ -1417,10 +1452,15 @@ class StaticLargeObject(object):
req = Request(env)
try:
vrs, account, container, obj = req.split_path(4, 4, True)
vrs, account, container, obj = req.split_path(3, 4, True)
except ValueError:
return self.app(env, start_response)
if not obj:
if req.method == 'GET':
return self.handle_container_listing(req, start_response)
return self.app(env, start_response)
try:
if req.method == 'PUT' and \
req.params.get('multipart-manifest') == 'put':

View File

@ -830,7 +830,7 @@ class File(Base):
header_fields = self.header_fields(fields,
optional_fields=optional_fields)
header_fields['etag'] = header_fields['etag'].strip('"')
header_fields['etag'] = header_fields['etag']
return header_fields
def initialize(self, hdrs=None, parms=None):
@ -855,7 +855,7 @@ class File(Base):
if hdr[0].lower().startswith('x-object-meta-'):
self.metadata[hdr[0][14:]] = hdr[1]
if hdr[0].lower() == 'etag':
self.etag = hdr[1].strip('"')
self.etag = hdr[1]
if hdr[0].lower() == 'content-length':
self.size = int(hdr[1])
if hdr[0].lower() == 'last-modified':

View File

@ -271,14 +271,19 @@ class TestSlo(Base):
file_item.write(
json.dumps([self.env.seg_info['seg_a']]),
parms={'multipart-manifest': 'put'})
# The container listing has the etag of the actual manifest object
# contents which we get using multipart-manifest=get. Arguably this
# should be the etag that we get when NOT using multipart-manifest=get,
# to be consistent with size and content-type. But here we at least
# verify that it remains consistent when the object is updated with a
# POST.
# The container listing exposes BOTH the MD5 of the manifest content
# and the SLO MD5-of-MD5s by splitting the latter out into a separate
# key. These should remain consistent when the object is updated with
# a POST.
file_item.initialize(parms={'multipart-manifest': 'get'})
expected_etag = file_item.etag
manifest_etag = file_item.etag
self.assertFalse(manifest_etag.startswith('"'))
self.assertFalse(manifest_etag.endswith('"'))
file_item.initialize()
slo_etag = file_item.etag
self.assertTrue(slo_etag.startswith('"'))
self.assertTrue(slo_etag.endswith('"'))
listing = self.env.container.files(parms={'format': 'json'})
for f_dict in listing:
@ -286,7 +291,8 @@ class TestSlo(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual('application/octet-stream',
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -304,7 +310,8 @@ class TestSlo(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -322,7 +329,8 @@ class TestSlo(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -456,13 +464,14 @@ class TestSlo(Base):
self.assertEqual('c', file_contents[-2])
self.assertEqual('d', file_contents[-1])
def test_slo_etag_is_hash_of_etags(self):
def test_slo_etag_is_quote_wrapped_hash_of_etags(self):
# we have this check in test_slo_get_simple_manifest, too,
# but verify that it holds for HEAD requests
file_item = self.env.container.file('manifest-abcde')
self.assertEqual(self.manifest_abcde_etag, file_item.info()['etag'])
self.assertEqual('"%s"' % self.manifest_abcde_etag,
file_item.info()['etag'])
def test_slo_etag_is_hash_of_etags_submanifests(self):
def test_slo_etag_is_quote_wrapped_hash_of_etags_submanifests(self):
def hd(x):
return hashlib.md5(x).hexdigest()
@ -474,7 +483,7 @@ class TestSlo(Base):
hd('e'))
file_item = self.env.container.file('manifest-abcde-submanifest')
self.assertEqual(expected_etag, file_item.info()['etag'])
self.assertEqual('"%s"' % expected_etag, file_item.info()['etag'])
def test_slo_etag_mismatch(self):
file_item = self.env.container.file("manifest-a-bad-etag")
@ -657,32 +666,34 @@ class TestSlo(Base):
def test_slo_copy_the_manifest(self):
source = self.env.container.file("manifest-abcde")
source.initialize(parms={'multipart-manifest': 'get'})
source_contents = source.read(parms={'multipart-manifest': 'get'})
source_json = json.loads(source_contents)
manifest_etag = hashlib.md5(source_contents).hexdigest()
self.assertEqual(manifest_etag, source.etag)
source.initialize()
self.assertEqual('application/octet-stream', source.content_type)
source.initialize(parms={'multipart-manifest': 'get'})
source_hash = hashlib.md5()
source_hash.update(source_contents)
self.assertEqual(source_hash.hexdigest(), source.etag)
self.assertNotEqual(manifest_etag, source.etag)
slo_etag = source.etag
self.assertTrue(source.copy(self.env.container.name,
"copied-abcde-manifest-only",
parms={'multipart-manifest': 'get'}))
copied = self.env.container.file("copied-abcde-manifest-only")
copied.initialize(parms={'multipart-manifest': 'get'})
copied_contents = copied.read(parms={'multipart-manifest': 'get'})
try:
copied_json = json.loads(copied_contents)
except ValueError:
self.fail("COPY didn't copy the manifest (invalid json on GET)")
self.assertEqual(source_json, copied_json)
self.assertEqual(manifest_etag, copied.etag)
copied.initialize()
self.assertEqual('application/octet-stream', copied.content_type)
copied.initialize(parms={'multipart-manifest': 'get'})
copied_hash = hashlib.md5()
copied_hash.update(copied_contents)
self.assertEqual(copied_hash.hexdigest(), copied.etag)
self.assertEqual(slo_etag, copied.etag)
# verify the listing metadata
listing = self.env.container.files(parms={'format': 'json'})
@ -696,13 +707,15 @@ class TestSlo(Base):
actual = names['manifest-abcde']
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(source.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
self.assertIn('copied-abcde-manifest-only', names)
actual = names['copied-abcde-manifest-only']
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(copied.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
# Test copy manifest including data segments
source = self.env.container.file("mixed-object-data-manifest")
@ -727,14 +740,16 @@ class TestSlo(Base):
source = self.env.container.file("manifest-abcde")
source.content_type = 'application/octet-stream'
source.sync_metadata({'test': 'original'})
source.initialize(parms={'multipart-manifest': 'get'})
source_contents = source.read(parms={'multipart-manifest': 'get'})
source_json = json.loads(source_contents)
manifest_etag = hashlib.md5(source_contents).hexdigest()
self.assertEqual(manifest_etag, source.etag)
source.initialize()
self.assertEqual('application/octet-stream', source.content_type)
source.initialize(parms={'multipart-manifest': 'get'})
source_hash = hashlib.md5()
source_hash.update(source_contents)
self.assertEqual(source_hash.hexdigest(), source.etag)
self.assertNotEqual(manifest_etag, source.etag)
slo_etag = source.etag
self.assertEqual(source.metadata['test'], 'original')
self.assertTrue(
@ -744,18 +759,18 @@ class TestSlo(Base):
'X-Object-Meta-Test': 'updated'}))
copied = self.env.container.file("copied-abcde-manifest-only")
copied.initialize(parms={'multipart-manifest': 'get'})
copied_contents = copied.read(parms={'multipart-manifest': 'get'})
try:
copied_json = json.loads(copied_contents)
except ValueError:
self.fail("COPY didn't copy the manifest (invalid json on GET)")
self.assertEqual(source_json, copied_json)
self.assertEqual(manifest_etag, copied.etag)
copied.initialize()
self.assertEqual('image/jpeg', copied.content_type)
copied.initialize(parms={'multipart-manifest': 'get'})
copied_hash = hashlib.md5()
copied_hash.update(copied_contents)
self.assertEqual(copied_hash.hexdigest(), copied.etag)
self.assertEqual(slo_etag, copied.etag)
self.assertEqual(copied.metadata['test'], 'updated')
# verify the listing metadata
@ -771,13 +786,15 @@ class TestSlo(Base):
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
# the container listing should have the etag of the manifest contents
self.assertEqual(source.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
self.assertIn('copied-abcde-manifest-only', names)
actual = names['copied-abcde-manifest-only']
self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes'])
self.assertEqual('image/jpeg', actual['content_type'])
self.assertEqual(copied.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
def test_slo_copy_the_manifest_account(self):
acct = self.env.conn.account_name

View File

@ -1094,13 +1094,14 @@ class TestSymlinkToSloSegments(Base):
parms={'multipart-manifest': 'put'})
# The container listing has the etag of the actual manifest object
# contents which we get using multipart-manifest=get. Arguably this
# should be the etag that we get when NOT using multipart-manifest=get,
# to be consistent with size and content-type. But here we at least
# verify that it remains consistent when the object is updated with a
# POST.
# contents which we get using multipart-manifest=get. New enough swift
# also exposes the etag that we get when NOT using
# multipart-manifest=get. Verify that both remain consistent when the
# object is updated with a POST.
file_item.initialize()
slo_etag = file_item.etag
file_item.initialize(parms={'multipart-manifest': 'get'})
expected_etag = file_item.etag
manifest_etag = file_item.etag
listing = self.env.container.files(parms={'format': 'json'})
for f_dict in listing:
@ -1108,7 +1109,8 @@ class TestSymlinkToSloSegments(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual('application/octet-stream',
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -1126,7 +1128,8 @@ class TestSymlinkToSloSegments(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -1144,7 +1147,8 @@ class TestSymlinkToSloSegments(Base):
self.assertEqual(1024 * 1024, f_dict['bytes'])
self.assertEqual(file_item.content_type,
f_dict['content_type'])
self.assertEqual(expected_etag, f_dict['hash'])
self.assertEqual(manifest_etag, f_dict['hash'])
self.assertEqual(slo_etag, f_dict['slo_etag'])
break
else:
self.fail('Failed to find manifest file in container listing')
@ -1156,7 +1160,7 @@ class TestSymlinkToSloSegments(Base):
expected_etag = expected_hash.hexdigest()
file_item = self.env.container.file('manifest-linkto-ab')
self.assertEqual(expected_etag, file_item.info()['etag'])
self.assertEqual('"%s"' % expected_etag, file_item.info()['etag'])
def test_slo_copy(self):
file_item = self.env.container.file("manifest-linkto-ab")
@ -1171,12 +1175,16 @@ class TestSymlinkToSloSegments(Base):
source = self.env.container.file("manifest-linkto-ab")
source_contents = source.read(parms={'multipart-manifest': 'get'})
source_json = json.loads(source_contents)
manifest_etag = hashlib.md5(source_contents).hexdigest()
source.initialize()
slo_etag = source.etag
self.assertEqual('application/octet-stream', source.content_type)
source.initialize(parms={'multipart-manifest': 'get'})
source_hash = hashlib.md5()
source_hash.update(source_contents)
self.assertEqual(source_hash.hexdigest(), source.etag)
self.assertEqual(manifest_etag, source.etag)
self.assertEqual('application/json; charset=utf-8',
source.content_type)
# now, copy the manifest
self.assertTrue(source.copy(self.env.container.name,
@ -1193,12 +1201,14 @@ class TestSymlinkToSloSegments(Base):
# make sure content of copied manifest is the same as original man.
self.assertEqual(source_json, copied_json)
copied.initialize()
self.assertEqual(copied.etag, slo_etag)
self.assertEqual('application/octet-stream', copied.content_type)
copied.initialize(parms={'multipart-manifest': 'get'})
copied_hash = hashlib.md5()
copied_hash.update(copied_contents)
self.assertEqual(copied_hash.hexdigest(), copied.etag)
self.assertEqual(copied_hash.hexdigest(), source.etag)
self.assertEqual(source_contents, copied_contents)
self.assertEqual(copied.etag, manifest_etag)
self.assertEqual('application/json; charset=utf-8',
copied.content_type)
# verify the listing metadata
listing = self.env.container.files(parms={'format': 'json'})
@ -1212,13 +1222,15 @@ class TestSymlinkToSloSegments(Base):
actual = names['manifest-linkto-ab']
self.assertEqual(2 * 1024 * 1024, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(source.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
self.assertIn('copied-ab-manifest-only', names)
actual = names['copied-ab-manifest-only']
self.assertEqual(2 * 1024 * 1024, actual['bytes'])
self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(copied.etag, actual['hash'])
self.assertEqual(manifest_etag, actual['hash'])
self.assertEqual(slo_etag, actual['slo_etag'])
class TestSymlinkDlo(Base):

View File

@ -418,12 +418,18 @@ class TestSloPutManifest(SloTestCase):
list(self.slo.handle_multipart_put(req, fake_start_response))
def test_handle_multipart_put_success(self):
override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
headers = {
'Accept': 'test',
override_header: '; params=are important',
}
req = Request.blank(
'/v1/AUTH_test/c/man?multipart-manifest=put',
environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'},
environ={'REQUEST_METHOD': 'PUT'}, headers=headers,
body=test_json_data)
for h in ('X-Static-Large-Object', 'X-Object-Sysmeta-Slo-Etag',
'X-Object-Sysmeta-Slo-Size'):
# Sanity
self.assertNotIn(h, req.headers)
status, headers, body = self.call_slo(req)
@ -431,9 +437,16 @@ class TestSloPutManifest(SloTestCase):
self.assertIn(('Etag', gen_etag), headers)
self.assertIn('X-Static-Large-Object', req.headers)
self.assertEqual(req.headers['X-Static-Large-Object'], 'True')
self.assertIn('Etag', req.headers)
self.assertIn('X-Object-Sysmeta-Slo-Etag', req.headers)
self.assertIn('X-Object-Sysmeta-Container-Update-Override-Etag',
req.headers)
self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Etag'],
md5hex('etagoftheobjectsegment'))
gen_etag.strip('"'))
self.assertEqual(
req.headers['X-Object-Sysmeta-Container-Update-Override-Etag'],
'%s; params=are important; slo_etag=%s' % (
req.headers['Etag'], gen_etag.strip('"')))
self.assertIn('X-Object-Sysmeta-Slo-Size', req.headers)
self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Size'], '100')
self.assertIn('Content-Type', req.headers)
@ -968,13 +981,15 @@ class TestSloPutManifest(SloTestCase):
'size_bytes': None},
{'path': '/cont/object', 'etag': None,
'size_bytes': None, 'range': '10-40'}])
override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
req = Request.blank(
'/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
environ={'REQUEST_METHOD': 'PUT'}, body=good_data)
environ={'REQUEST_METHOD': 'PUT'}, body=good_data,
headers={override_header: 'my custom etag'})
status, headers, body = self.call_slo(req)
self.assertEqual(('201 Created', ''), (status, body))
expected_etag = '"%s"' % md5hex('ab:1-1;b:0-0;aetagoftheobjectsegment:'
'10-40;')
expected_etag = '"%s"' % md5hex(
'ab:1-1;b:0-0;aetagoftheobjectsegment:10-40;')
self.assertEqual(expected_etag, dict(headers)['Etag'])
self.assertEqual([
('HEAD', '/v1/AUTH_test/checktest/a_1'), # Only once!
@ -984,6 +999,9 @@ class TestSloPutManifest(SloTestCase):
self.assertEqual(
('PUT', '/v1/AUTH_test/checktest/man_3?multipart-manifest=put'),
self.app.calls[-1])
self.assertEqual(
'my custom etag; slo_etag=%s' % expected_etag.strip('"'),
self.app.headers[-1].get(override_header))
# Check that we still populated the manifest properly from our HEADs
req = Request.blank(
@ -3854,5 +3872,6 @@ class TestSwiftInfo(unittest.TestCase):
self.assertEqual(1, mware.concurrency)
self.assertEqual(3, mware.bulk_deleter.delete_concurrency)
if __name__ == '__main__':
unittest.main()