From bb716573ab5c8455348ec013feb894421e0e1f1c Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Wed, 20 May 2015 00:39:41 -0700 Subject: [PATCH] Allow SLO PUTs to forgo per-segment integrity checks While manifests still require 'etag' and 'size_bytes' fields for each segment (to catch user errors like 'etaf' or 'size_btyes'), an explicit null for either will skip that particular integrity check and instead use whatever value is retrieved when HEADing the segment. So, if a user uploads a manifest like: [{"path": "/con/obj_seg_1", "etag": null, "size_bytes": 1048576}, {"path": "/con/obj_seg_2", "etag": "etag2", "size_bytes": null}, {"path": "/con/obj_seg_3", "etag": null, "size_bytes": null}] then the etag will only be verified for the /con/obj_seg_2 segment, and the segment size will only be verified for the /con/obj_seg_1 segment. However, the manifest that's ultimately stored (and can be retrieved with a ?multipart-manifest=get query-string) will still look like: [{"name": "/con/obj_seg_1", "hash": "etag1", "bytes": 1048576, ...}, {"name": "/con/obj_seg_2", "hash": "etag2", "bytes": 1048576, ...}, {"name": "/con/obj_seg_3", "hash": "etag3", "bytes": 1234, ...}] This allows the middleware to continue performing integrity checks on object GET. Change-Id: I2c4e585221387dd02a8679a50398d6b614407b12 DocImpact --- swift/common/middleware/slo.py | 54 ++++++++++++----- test/functional/tests.py | 79 +++++++++++++++++++++++++ test/unit/common/middleware/test_slo.py | 55 +++++++++++++++++ 3 files changed, 172 insertions(+), 16 deletions(-) diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index d8df829981..3c3ad7feac 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -36,8 +36,8 @@ json data format. The data to be supplied for each segment is:: path: the path to the segment (not including account) /container/object_name - etag: the etag given back when the segment was PUT - size_bytes: the size of the segment in bytes + etag: the etag given back when the segment was PUT, or null + size_bytes: the size of the segment in bytes, or null The format of the list will be:: @@ -48,15 +48,25 @@ The format of the list will be:: The number of object segments is limited to a configurable amount, default 1000. Each segment, except for the final one, must be at least 1 megabyte -(configurable). On upload, the middleware will head every segment passed in and -verify the size and etag of each. If any of the objects do not match (not +(configurable). On upload, the middleware will head every segment passed in to +verify: + + 1. the segment exists (i.e. the HEAD was successful); + 2. the segment meets minimum size requirements (if not the last segment); + 3. if the user provided a non-null etag, the etag matches; and + 4. if the user provided a non-null size_bytes, the size_bytes matches. + +Note that the etag and size_bytes keys are still required; this acts as a guard +against user errors such as typos. If any of the objects fail to verify (not found, size/etag mismatch, below minimum size) then the user will receive a 4xx error response. If everything does match, the user will receive a 2xx response and the SLO object is ready for downloading. Behind the scenes, on success, a json manifest generated from the user input is sent to object servers with an extra "X-Static-Large-Object: True" header -and a modified Content-Type. The parameter: swift_bytes=$total_size will be +and a modified Content-Type. The items in this manifest will include the etag +and size_bytes for each segment, regardless of whether the client specified +them for verification. The parameter: swift_bytes=$total_size will be appended to the existing Content-Type, where total_size is the sum of all the included segments' size_bytes. This extra parameter will be hidden from the user. @@ -73,9 +83,11 @@ Retrieving a Large Object A GET request to the manifest object will return the concatenation of the objects from the manifest much like DLO. If any of the segments from the -manifest are not found or their Etag/Content Length no longer match the -connection will drop. In this case a 409 Conflict will be logged in the proxy -logs and the user will receive incomplete results. +manifest are not found or their Etag/Content Length have changed since upload, +the connection will drop. In this case a 409 Conflict will be logged in the +proxy logs and the user will receive incomplete results. Note that this will be +enforced regardless of whether the user perfomed per-segment validation during +upload. The headers from this GET or HEAD request will return the metadata attached to the manifest object itself with some exceptions:: @@ -594,8 +606,11 @@ class StaticLargeObject(object): try: seg_size = int(seg_dict['size_bytes']) except (ValueError, TypeError): - raise HTTPBadRequest('Invalid Manifest File') - if seg_size < self.min_segment_size and \ + if seg_dict['size_bytes'] is None: + seg_size = None + else: + raise HTTPBadRequest('Invalid Manifest File') + if seg_size is not None and seg_size < self.min_segment_size and \ index < len(parsed_data) - 1: raise HTTPBadRequest( 'Each segment, except the last, must be at least ' @@ -613,11 +628,18 @@ class StaticLargeObject(object): head_seg_resp = \ Request.blank(obj_path, new_env).get_response(self) if head_seg_resp.is_success: - total_size += seg_size - if seg_size != head_seg_resp.content_length: + if head_seg_resp.content_length < self.min_segment_size and \ + index < len(parsed_data) - 1: + raise HTTPBadRequest( + 'Each segment, except the last, must be at least ' + '%d bytes.' % self.min_segment_size) + total_size += head_seg_resp.content_length + if seg_size is not None and \ + seg_size != head_seg_resp.content_length: problem_segments.append([quote(obj_name), 'Size Mismatch']) - if seg_dict['etag'] == head_seg_resp.etag: - slo_etag.update(seg_dict['etag']) + if seg_dict['etag'] is None or \ + seg_dict['etag'] == head_seg_resp.etag: + slo_etag.update(head_seg_resp.etag) else: problem_segments.append([quote(obj_name), 'Etag Mismatch']) if head_seg_resp.last_modified: @@ -629,8 +651,8 @@ class StaticLargeObject(object): last_modified_formatted = \ last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f') seg_data = {'name': '/' + seg_dict['path'].lstrip('/'), - 'bytes': seg_size, - 'hash': seg_dict['etag'], + 'bytes': head_seg_resp.content_length, + 'hash': head_seg_resp.etag, 'content_type': head_seg_resp.content_type, 'last_modified': last_modified_formatted} if config_true_value( diff --git a/test/functional/tests.py b/test/functional/tests.py index 3fbbdd784e..df96e5c4da 100644 --- a/test/functional/tests.py +++ b/test/functional/tests.py @@ -2152,6 +2152,15 @@ class TestSloEnv(object): seg_info['seg_e']]), parms={'multipart-manifest': 'put'}) + file_item = cls.container.file("manifest-db") + file_item.write( + json.dumps([ + {'path': seg_info['seg_d']['path'], 'etag': None, + 'size_bytes': None}, + {'path': seg_info['seg_b']['path'], 'etag': None, + 'size_bytes': None}, + ]), parms={'multipart-manifest': 'put'}) + class TestSlo(Base): env = TestSloEnv @@ -2259,6 +2268,52 @@ class TestSlo(Base): else: self.fail("Expected ResponseError but didn't get it") + def test_slo_unspecified_etag(self): + file_item = self.env.container.file("manifest-a-unspecified-etag") + file_item.write( + json.dumps([{ + 'size_bytes': 1024 * 1024, + 'etag': None, + 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), + parms={'multipart-manifest': 'put'}) + self.assert_status(201) + + def test_slo_unspecified_size(self): + file_item = self.env.container.file("manifest-a-unspecified-size") + file_item.write( + json.dumps([{ + 'size_bytes': None, + 'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(), + 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), + parms={'multipart-manifest': 'put'}) + self.assert_status(201) + + def test_slo_missing_etag(self): + file_item = self.env.container.file("manifest-a-missing-etag") + try: + file_item.write( + json.dumps([{ + 'size_bytes': 1024 * 1024, + 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), + parms={'multipart-manifest': 'put'}) + except ResponseError as err: + self.assertEqual(400, err.status) + else: + self.fail("Expected ResponseError but didn't get it") + + def test_slo_missing_size(self): + file_item = self.env.container.file("manifest-a-missing-size") + try: + file_item.write( + json.dumps([{ + 'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(), + 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), + parms={'multipart-manifest': 'put'}) + except ResponseError as err: + self.assertEqual(400, err.status) + else: + self.fail("Expected ResponseError but didn't get it") + def test_slo_overwrite_segment_with_manifest(self): file_item = self.env.container.file("seg_b") try: @@ -2367,6 +2422,30 @@ class TestSlo(Base): except ValueError: self.fail("GET with multipart-manifest=get got invalid json") + def test_slo_get_the_manifest_with_details_from_server(self): + manifest = self.env.container.file("manifest-db") + got_body = manifest.read(parms={'multipart-manifest': 'get'}) + + self.assertEqual('application/json; charset=utf-8', + manifest.content_type) + try: + value = json.loads(got_body) + except ValueError: + self.fail("GET with multipart-manifest=get got invalid json") + + self.assertEqual(len(value), 2) + self.assertEqual(value[0]['bytes'], 1024 * 1024) + self.assertEqual(value[0]['hash'], + hashlib.md5('d' * 1024 * 1024).hexdigest()) + self.assertEqual(value[0]['name'], + '/%s/seg_d' % self.env.container.name.decode("utf-8")) + + self.assertEqual(value[1]['bytes'], 1024 * 1024) + self.assertEqual(value[1]['hash'], + hashlib.md5('b' * 1024 * 1024).hexdigest()) + self.assertEqual(value[1]['name'], + '/%s/seg_b' % self.env.container.name.decode("utf-8")) + def test_slo_head_the_manifest(self): manifest = self.env.container.file("manifest-abcde") got_info = manifest.info(parms={'multipart-manifest': 'get'}) diff --git a/test/unit/common/middleware/test_slo.py b/test/unit/common/middleware/test_slo.py index d70a25ccc4..86a11734d3 100644 --- a/test/unit/common/middleware/test_slo.py +++ b/test/unit/common/middleware/test_slo.py @@ -441,6 +441,61 @@ class TestSloPutManifest(SloTestCase): self.assertEqual(status, '409 Conflict') self.assertEqual(self.app.call_count, 1) + def test_handle_multipart_put_skip_size_check(self): + good_data = json.dumps( + [{'path': '/checktest/a_1', 'etag': 'a', 'size_bytes': None}, + {'path': '/checktest/b_2', 'etag': 'b', 'size_bytes': None}]) + req = Request.blank( + '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', + environ={'REQUEST_METHOD': 'PUT'}, body=good_data) + status, headers, body = self.call_slo(req) + self.assertEquals(self.app.call_count, 3) + + # Check that we still populated the manifest properly from our HEADs + req = Request.blank( + # this string looks weird, but it's just an artifact + # of FakeSwift + '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', + environ={'REQUEST_METHOD': 'GET'}) + status, headers, body = self.call_app(req) + manifest_data = json.loads(body) + self.assertEquals(1, manifest_data[0]['bytes']) + self.assertEquals(2, manifest_data[1]['bytes']) + + def test_handle_multipart_put_skip_size_check_still_uses_min_size(self): + with patch.object(self.slo, 'min_segment_size', 50): + test_json_data = json.dumps([{'path': '/cont/small_object', + 'etag': 'etagoftheobjectsegment', + 'size_bytes': None}, + {'path': '/cont/small_object', + 'etag': 'etagoftheobjectsegment', + 'size_bytes': 100}]) + req = Request.blank('/v1/AUTH_test/c/o', body=test_json_data) + with self.assertRaises(HTTPException) as cm: + self.slo.handle_multipart_put(req, fake_start_response) + self.assertEquals(cm.exception.status_int, 400) + + def test_handle_multipart_put_skip_etag_check(self): + good_data = json.dumps( + [{'path': '/checktest/a_1', 'etag': None, 'size_bytes': 1}, + {'path': '/checktest/b_2', 'etag': None, 'size_bytes': 2}]) + req = Request.blank( + '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', + environ={'REQUEST_METHOD': 'PUT'}, body=good_data) + status, headers, body = self.call_slo(req) + self.assertEquals(self.app.call_count, 3) + + # Check that we still populated the manifest properly from our HEADs + req = Request.blank( + # this string looks weird, but it's just an artifact + # of FakeSwift + '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', + environ={'REQUEST_METHOD': 'GET'}) + status, headers, body = self.call_app(req) + manifest_data = json.loads(body) + self.assertEquals('a', manifest_data[0]['hash']) + self.assertEquals('b', manifest_data[1]['hash']) + class TestSloDeleteManifest(SloTestCase):