diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index 106c9dae89..39f18d593c 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -313,6 +313,7 @@ metadata which can be used for stats and billing purposes. """ import base64 +from cgi import parse_header from collections import defaultdict from datetime import datetime import json @@ -322,6 +323,8 @@ import six import time from hashlib import md5 from swift.common.exceptions import ListingIterError, SegmentError +from swift.common.middleware.listing_formats import \ + MAX_CONTAINER_LISTING_CONTENT_LENGTH from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \ HTTPMethodNotAllowed, HTTPRequestEntityTooLarge, HTTPLengthRequired, \ HTTPOk, HTTPPreconditionFailed, HTTPException, HTTPNotFound, \ @@ -1276,6 +1279,14 @@ class StaticLargeObject(object): 'Etag': md5(json_data).hexdigest(), }) + # Ensure container listings have both etags. However, if any + # middleware to the left of us touched the base value, trust them. + override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag' + val, sep, params = req.headers.get( + override_header, '').partition(';') + req.headers[override_header] = '%s; slo_etag=%s' % ( + (val or req.headers['Etag']) + sep + params, slo_etag) + env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) @@ -1408,6 +1419,30 @@ class StaticLargeObject(object): out_content_type=out_content_type) return resp + def handle_container_listing(self, req, start_response): + resp = req.get_response(self.app) + if not resp.is_success or resp.content_type != 'application/json': + return resp(req.environ, start_response) + if resp.content_length is None or \ + resp.content_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH: + return resp(req.environ, start_response) + try: + listing = json.loads(resp.body) + except ValueError: + return resp(req.environ, start_response) + + for item in listing: + if 'subdir' in item: + continue + etag, params = parse_header(item['hash']) + if 'slo_etag' in params: + item['slo_etag'] = '"%s"' % params.pop('slo_etag') + item['hash'] = etag + ''.join( + '; %s=%s' % kv for kv in params.items()) + + resp.body = json.dumps(listing).encode('ascii') + return resp(req.environ, start_response) + def __call__(self, env, start_response): """ WSGI entry point @@ -1417,10 +1452,15 @@ class StaticLargeObject(object): req = Request(env) try: - vrs, account, container, obj = req.split_path(4, 4, True) + vrs, account, container, obj = req.split_path(3, 4, True) except ValueError: return self.app(env, start_response) + if not obj: + if req.method == 'GET': + return self.handle_container_listing(req, start_response) + return self.app(env, start_response) + try: if req.method == 'PUT' and \ req.params.get('multipart-manifest') == 'put': diff --git a/test/functional/swift_test_client.py b/test/functional/swift_test_client.py index 06902b3f07..32a72f437e 100644 --- a/test/functional/swift_test_client.py +++ b/test/functional/swift_test_client.py @@ -830,7 +830,7 @@ class File(Base): header_fields = self.header_fields(fields, optional_fields=optional_fields) - header_fields['etag'] = header_fields['etag'].strip('"') + header_fields['etag'] = header_fields['etag'] return header_fields def initialize(self, hdrs=None, parms=None): @@ -855,7 +855,7 @@ class File(Base): if hdr[0].lower().startswith('x-object-meta-'): self.metadata[hdr[0][14:]] = hdr[1] if hdr[0].lower() == 'etag': - self.etag = hdr[1].strip('"') + self.etag = hdr[1] if hdr[0].lower() == 'content-length': self.size = int(hdr[1]) if hdr[0].lower() == 'last-modified': diff --git a/test/functional/test_slo.py b/test/functional/test_slo.py index 78d478e3c5..3dd5983dd4 100644 --- a/test/functional/test_slo.py +++ b/test/functional/test_slo.py @@ -271,14 +271,19 @@ class TestSlo(Base): file_item.write( json.dumps([self.env.seg_info['seg_a']]), parms={'multipart-manifest': 'put'}) - # The container listing has the etag of the actual manifest object - # contents which we get using multipart-manifest=get. Arguably this - # should be the etag that we get when NOT using multipart-manifest=get, - # to be consistent with size and content-type. But here we at least - # verify that it remains consistent when the object is updated with a - # POST. + # The container listing exposes BOTH the MD5 of the manifest content + # and the SLO MD5-of-MD5s by splitting the latter out into a separate + # key. These should remain consistent when the object is updated with + # a POST. file_item.initialize(parms={'multipart-manifest': 'get'}) - expected_etag = file_item.etag + manifest_etag = file_item.etag + self.assertFalse(manifest_etag.startswith('"')) + self.assertFalse(manifest_etag.endswith('"')) + + file_item.initialize() + slo_etag = file_item.etag + self.assertTrue(slo_etag.startswith('"')) + self.assertTrue(slo_etag.endswith('"')) listing = self.env.container.files(parms={'format': 'json'}) for f_dict in listing: @@ -286,7 +291,8 @@ class TestSlo(Base): self.assertEqual(1024 * 1024, f_dict['bytes']) self.assertEqual('application/octet-stream', f_dict['content_type']) - self.assertEqual(expected_etag, f_dict['hash']) + self.assertEqual(manifest_etag, f_dict['hash']) + self.assertEqual(slo_etag, f_dict['slo_etag']) break else: self.fail('Failed to find manifest file in container listing') @@ -304,7 +310,8 @@ class TestSlo(Base): self.assertEqual(1024 * 1024, f_dict['bytes']) self.assertEqual(file_item.content_type, f_dict['content_type']) - self.assertEqual(expected_etag, f_dict['hash']) + self.assertEqual(manifest_etag, f_dict['hash']) + self.assertEqual(slo_etag, f_dict['slo_etag']) break else: self.fail('Failed to find manifest file in container listing') @@ -322,7 +329,8 @@ class TestSlo(Base): self.assertEqual(1024 * 1024, f_dict['bytes']) self.assertEqual(file_item.content_type, f_dict['content_type']) - self.assertEqual(expected_etag, f_dict['hash']) + self.assertEqual(manifest_etag, f_dict['hash']) + self.assertEqual(slo_etag, f_dict['slo_etag']) break else: self.fail('Failed to find manifest file in container listing') @@ -456,13 +464,14 @@ class TestSlo(Base): self.assertEqual('c', file_contents[-2]) self.assertEqual('d', file_contents[-1]) - def test_slo_etag_is_hash_of_etags(self): + def test_slo_etag_is_quote_wrapped_hash_of_etags(self): # we have this check in test_slo_get_simple_manifest, too, # but verify that it holds for HEAD requests file_item = self.env.container.file('manifest-abcde') - self.assertEqual(self.manifest_abcde_etag, file_item.info()['etag']) + self.assertEqual('"%s"' % self.manifest_abcde_etag, + file_item.info()['etag']) - def test_slo_etag_is_hash_of_etags_submanifests(self): + def test_slo_etag_is_quote_wrapped_hash_of_etags_submanifests(self): def hd(x): return hashlib.md5(x).hexdigest() @@ -474,7 +483,7 @@ class TestSlo(Base): hd('e')) file_item = self.env.container.file('manifest-abcde-submanifest') - self.assertEqual(expected_etag, file_item.info()['etag']) + self.assertEqual('"%s"' % expected_etag, file_item.info()['etag']) def test_slo_etag_mismatch(self): file_item = self.env.container.file("manifest-a-bad-etag") @@ -657,32 +666,34 @@ class TestSlo(Base): def test_slo_copy_the_manifest(self): source = self.env.container.file("manifest-abcde") + source.initialize(parms={'multipart-manifest': 'get'}) source_contents = source.read(parms={'multipart-manifest': 'get'}) source_json = json.loads(source_contents) + manifest_etag = hashlib.md5(source_contents).hexdigest() + self.assertEqual(manifest_etag, source.etag) + source.initialize() self.assertEqual('application/octet-stream', source.content_type) - source.initialize(parms={'multipart-manifest': 'get'}) - source_hash = hashlib.md5() - source_hash.update(source_contents) - self.assertEqual(source_hash.hexdigest(), source.etag) + self.assertNotEqual(manifest_etag, source.etag) + slo_etag = source.etag self.assertTrue(source.copy(self.env.container.name, "copied-abcde-manifest-only", parms={'multipart-manifest': 'get'})) copied = self.env.container.file("copied-abcde-manifest-only") + copied.initialize(parms={'multipart-manifest': 'get'}) copied_contents = copied.read(parms={'multipart-manifest': 'get'}) try: copied_json = json.loads(copied_contents) except ValueError: self.fail("COPY didn't copy the manifest (invalid json on GET)") self.assertEqual(source_json, copied_json) + self.assertEqual(manifest_etag, copied.etag) + copied.initialize() self.assertEqual('application/octet-stream', copied.content_type) - copied.initialize(parms={'multipart-manifest': 'get'}) - copied_hash = hashlib.md5() - copied_hash.update(copied_contents) - self.assertEqual(copied_hash.hexdigest(), copied.etag) + self.assertEqual(slo_etag, copied.etag) # verify the listing metadata listing = self.env.container.files(parms={'format': 'json'}) @@ -696,13 +707,15 @@ class TestSlo(Base): actual = names['manifest-abcde'] self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes']) self.assertEqual('application/octet-stream', actual['content_type']) - self.assertEqual(source.etag, actual['hash']) + self.assertEqual(manifest_etag, actual['hash']) + self.assertEqual(slo_etag, actual['slo_etag']) self.assertIn('copied-abcde-manifest-only', names) actual = names['copied-abcde-manifest-only'] self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes']) self.assertEqual('application/octet-stream', actual['content_type']) - self.assertEqual(copied.etag, actual['hash']) + self.assertEqual(manifest_etag, actual['hash']) + self.assertEqual(slo_etag, actual['slo_etag']) # Test copy manifest including data segments source = self.env.container.file("mixed-object-data-manifest") @@ -727,14 +740,16 @@ class TestSlo(Base): source = self.env.container.file("manifest-abcde") source.content_type = 'application/octet-stream' source.sync_metadata({'test': 'original'}) + source.initialize(parms={'multipart-manifest': 'get'}) source_contents = source.read(parms={'multipart-manifest': 'get'}) source_json = json.loads(source_contents) + manifest_etag = hashlib.md5(source_contents).hexdigest() + self.assertEqual(manifest_etag, source.etag) + source.initialize() self.assertEqual('application/octet-stream', source.content_type) - source.initialize(parms={'multipart-manifest': 'get'}) - source_hash = hashlib.md5() - source_hash.update(source_contents) - self.assertEqual(source_hash.hexdigest(), source.etag) + self.assertNotEqual(manifest_etag, source.etag) + slo_etag = source.etag self.assertEqual(source.metadata['test'], 'original') self.assertTrue( @@ -744,18 +759,18 @@ class TestSlo(Base): 'X-Object-Meta-Test': 'updated'})) copied = self.env.container.file("copied-abcde-manifest-only") + copied.initialize(parms={'multipart-manifest': 'get'}) copied_contents = copied.read(parms={'multipart-manifest': 'get'}) try: copied_json = json.loads(copied_contents) except ValueError: self.fail("COPY didn't copy the manifest (invalid json on GET)") self.assertEqual(source_json, copied_json) + self.assertEqual(manifest_etag, copied.etag) + copied.initialize() self.assertEqual('image/jpeg', copied.content_type) - copied.initialize(parms={'multipart-manifest': 'get'}) - copied_hash = hashlib.md5() - copied_hash.update(copied_contents) - self.assertEqual(copied_hash.hexdigest(), copied.etag) + self.assertEqual(slo_etag, copied.etag) self.assertEqual(copied.metadata['test'], 'updated') # verify the listing metadata @@ -771,13 +786,15 @@ class TestSlo(Base): self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes']) self.assertEqual('application/octet-stream', actual['content_type']) # the container listing should have the etag of the manifest contents - self.assertEqual(source.etag, actual['hash']) + self.assertEqual(manifest_etag, actual['hash']) + self.assertEqual(slo_etag, actual['slo_etag']) self.assertIn('copied-abcde-manifest-only', names) actual = names['copied-abcde-manifest-only'] self.assertEqual(4 * 1024 * 1024 + 1, actual['bytes']) self.assertEqual('image/jpeg', actual['content_type']) - self.assertEqual(copied.etag, actual['hash']) + self.assertEqual(manifest_etag, actual['hash']) + self.assertEqual(slo_etag, actual['slo_etag']) def test_slo_copy_the_manifest_account(self): acct = self.env.conn.account_name diff --git a/test/functional/test_symlink.py b/test/functional/test_symlink.py index abdcf4b7ab..8f82486a75 100755 --- a/test/functional/test_symlink.py +++ b/test/functional/test_symlink.py @@ -1094,13 +1094,14 @@ class TestSymlinkToSloSegments(Base): parms={'multipart-manifest': 'put'}) # The container listing has the etag of the actual manifest object - # contents which we get using multipart-manifest=get. Arguably this - # should be the etag that we get when NOT using multipart-manifest=get, - # to be consistent with size and content-type. But here we at least - # verify that it remains consistent when the object is updated with a - # POST. + # contents which we get using multipart-manifest=get. New enough swift + # also exposes the etag that we get when NOT using + # multipart-manifest=get. Verify that both remain consistent when the + # object is updated with a POST. + file_item.initialize() + slo_etag = file_item.etag file_item.initialize(parms={'multipart-manifest': 'get'}) - expected_etag = file_item.etag + manifest_etag = file_item.etag listing = self.env.container.files(parms={'format': 'json'}) for f_dict in listing: @@ -1108,7 +1109,8 @@ class TestSymlinkToSloSegments(Base): self.assertEqual(1024 * 1024, f_dict['bytes']) self.assertEqual('application/octet-stream', f_dict['content_type']) - self.assertEqual(expected_etag, f_dict['hash']) + self.assertEqual(manifest_etag, f_dict['hash']) + self.assertEqual(slo_etag, f_dict['slo_etag']) break else: self.fail('Failed to find manifest file in container listing') @@ -1126,7 +1128,8 @@ class TestSymlinkToSloSegments(Base): self.assertEqual(1024 * 1024, f_dict['bytes']) self.assertEqual(file_item.content_type, f_dict['content_type']) - self.assertEqual(expected_etag, f_dict['hash']) + self.assertEqual(manifest_etag, f_dict['hash']) + self.assertEqual(slo_etag, f_dict['slo_etag']) break else: self.fail('Failed to find manifest file in container listing') @@ -1144,7 +1147,8 @@ class TestSymlinkToSloSegments(Base): self.assertEqual(1024 * 1024, f_dict['bytes']) self.assertEqual(file_item.content_type, f_dict['content_type']) - self.assertEqual(expected_etag, f_dict['hash']) + self.assertEqual(manifest_etag, f_dict['hash']) + self.assertEqual(slo_etag, f_dict['slo_etag']) break else: self.fail('Failed to find manifest file in container listing') @@ -1156,7 +1160,7 @@ class TestSymlinkToSloSegments(Base): expected_etag = expected_hash.hexdigest() file_item = self.env.container.file('manifest-linkto-ab') - self.assertEqual(expected_etag, file_item.info()['etag']) + self.assertEqual('"%s"' % expected_etag, file_item.info()['etag']) def test_slo_copy(self): file_item = self.env.container.file("manifest-linkto-ab") @@ -1171,12 +1175,16 @@ class TestSymlinkToSloSegments(Base): source = self.env.container.file("manifest-linkto-ab") source_contents = source.read(parms={'multipart-manifest': 'get'}) source_json = json.loads(source_contents) + manifest_etag = hashlib.md5(source_contents).hexdigest() + source.initialize() + slo_etag = source.etag self.assertEqual('application/octet-stream', source.content_type) + source.initialize(parms={'multipart-manifest': 'get'}) - source_hash = hashlib.md5() - source_hash.update(source_contents) - self.assertEqual(source_hash.hexdigest(), source.etag) + self.assertEqual(manifest_etag, source.etag) + self.assertEqual('application/json; charset=utf-8', + source.content_type) # now, copy the manifest self.assertTrue(source.copy(self.env.container.name, @@ -1193,12 +1201,14 @@ class TestSymlinkToSloSegments(Base): # make sure content of copied manifest is the same as original man. self.assertEqual(source_json, copied_json) copied.initialize() + self.assertEqual(copied.etag, slo_etag) self.assertEqual('application/octet-stream', copied.content_type) + copied.initialize(parms={'multipart-manifest': 'get'}) - copied_hash = hashlib.md5() - copied_hash.update(copied_contents) - self.assertEqual(copied_hash.hexdigest(), copied.etag) - self.assertEqual(copied_hash.hexdigest(), source.etag) + self.assertEqual(source_contents, copied_contents) + self.assertEqual(copied.etag, manifest_etag) + self.assertEqual('application/json; charset=utf-8', + copied.content_type) # verify the listing metadata listing = self.env.container.files(parms={'format': 'json'}) @@ -1212,13 +1222,15 @@ class TestSymlinkToSloSegments(Base): actual = names['manifest-linkto-ab'] self.assertEqual(2 * 1024 * 1024, actual['bytes']) self.assertEqual('application/octet-stream', actual['content_type']) - self.assertEqual(source.etag, actual['hash']) + self.assertEqual(manifest_etag, actual['hash']) + self.assertEqual(slo_etag, actual['slo_etag']) self.assertIn('copied-ab-manifest-only', names) actual = names['copied-ab-manifest-only'] self.assertEqual(2 * 1024 * 1024, actual['bytes']) self.assertEqual('application/octet-stream', actual['content_type']) - self.assertEqual(copied.etag, actual['hash']) + self.assertEqual(manifest_etag, actual['hash']) + self.assertEqual(slo_etag, actual['slo_etag']) class TestSymlinkDlo(Base): diff --git a/test/unit/common/middleware/test_slo.py b/test/unit/common/middleware/test_slo.py index df880897f1..40f157e3dd 100644 --- a/test/unit/common/middleware/test_slo.py +++ b/test/unit/common/middleware/test_slo.py @@ -418,12 +418,18 @@ class TestSloPutManifest(SloTestCase): list(self.slo.handle_multipart_put(req, fake_start_response)) def test_handle_multipart_put_success(self): + override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag' + headers = { + 'Accept': 'test', + override_header: '; params=are important', + } req = Request.blank( '/v1/AUTH_test/c/man?multipart-manifest=put', - environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'}, + environ={'REQUEST_METHOD': 'PUT'}, headers=headers, body=test_json_data) for h in ('X-Static-Large-Object', 'X-Object-Sysmeta-Slo-Etag', 'X-Object-Sysmeta-Slo-Size'): + # Sanity self.assertNotIn(h, req.headers) status, headers, body = self.call_slo(req) @@ -431,9 +437,16 @@ class TestSloPutManifest(SloTestCase): self.assertIn(('Etag', gen_etag), headers) self.assertIn('X-Static-Large-Object', req.headers) self.assertEqual(req.headers['X-Static-Large-Object'], 'True') + self.assertIn('Etag', req.headers) self.assertIn('X-Object-Sysmeta-Slo-Etag', req.headers) + self.assertIn('X-Object-Sysmeta-Container-Update-Override-Etag', + req.headers) self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Etag'], - md5hex('etagoftheobjectsegment')) + gen_etag.strip('"')) + self.assertEqual( + req.headers['X-Object-Sysmeta-Container-Update-Override-Etag'], + '%s; params=are important; slo_etag=%s' % ( + req.headers['Etag'], gen_etag.strip('"'))) self.assertIn('X-Object-Sysmeta-Slo-Size', req.headers) self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Size'], '100') self.assertIn('Content-Type', req.headers) @@ -968,13 +981,15 @@ class TestSloPutManifest(SloTestCase): 'size_bytes': None}, {'path': '/cont/object', 'etag': None, 'size_bytes': None, 'range': '10-40'}]) + override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag' req = Request.blank( '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', - environ={'REQUEST_METHOD': 'PUT'}, body=good_data) + environ={'REQUEST_METHOD': 'PUT'}, body=good_data, + headers={override_header: 'my custom etag'}) status, headers, body = self.call_slo(req) self.assertEqual(('201 Created', ''), (status, body)) - expected_etag = '"%s"' % md5hex('ab:1-1;b:0-0;aetagoftheobjectsegment:' - '10-40;') + expected_etag = '"%s"' % md5hex( + 'ab:1-1;b:0-0;aetagoftheobjectsegment:10-40;') self.assertEqual(expected_etag, dict(headers)['Etag']) self.assertEqual([ ('HEAD', '/v1/AUTH_test/checktest/a_1'), # Only once! @@ -984,6 +999,9 @@ class TestSloPutManifest(SloTestCase): self.assertEqual( ('PUT', '/v1/AUTH_test/checktest/man_3?multipart-manifest=put'), self.app.calls[-1]) + self.assertEqual( + 'my custom etag; slo_etag=%s' % expected_etag.strip('"'), + self.app.headers[-1].get(override_header)) # Check that we still populated the manifest properly from our HEADs req = Request.blank( @@ -3854,5 +3872,6 @@ class TestSwiftInfo(unittest.TestCase): self.assertEqual(1, mware.concurrency) self.assertEqual(3, mware.bulk_deleter.delete_concurrency) + if __name__ == '__main__': unittest.main()