Merge "Allow SLO PUTs to forgo per-segment integrity checks"

2015-05-27 16:07:29 +00:00 · 2015-05-27 16:07:29 +00:00 · 5bffeb29b8
commit 5bffeb29b8
parent ccb07cfd4d bb716573ab
3 changed files with 172 additions and 16 deletions
--- a/swift/common/middleware/slo.py
+++ b/swift/common/middleware/slo.py
@ -36,8 +36,8 @@ json data format. The data to be supplied for each segment is::
    path: the path to the segment (not including account)
          /container/object_name
-    etag: the etag given back when the segment was PUT
+    etag: the etag given back when the segment was PUT, or null
-    size_bytes: the size of the segment in bytes
+    size_bytes: the size of the segment in bytes, or null
 The format of the list will be::
@ -48,15 +48,25 @@ The format of the list will be::
 The number of object segments is limited to a configurable amount, default
 1000. Each segment, except for the final one, must be at least 1 megabyte
-(configurable). On upload, the middleware will head every segment passed in and
+(configurable). On upload, the middleware will head every segment passed in to
-verify the size and etag of each. If any of the objects do not match (not
+verify:
 1. the segment exists (i.e. the HEAD was successful);
 2. the segment meets minimum size requirements (if not the last segment);
 3. if the user provided a non-null etag, the etag matches; and
 4. if the user provided a non-null size_bytes, the size_bytes matches.
 Note that the etag and size_bytes keys are still required; this acts as a guard
 against user errors such as typos. If any of the objects fail to verify (not
 found, size/etag mismatch, below minimum size) then the user will receive a 4xx
 error response. If everything does match, the user will receive a 2xx response
 and the SLO object is ready for downloading.
 Behind the scenes, on success, a json manifest generated from the user input is
 sent to object servers with an extra "X-Static-Large-Object: True" header
-and a modified Content-Type. The parameter: swift_bytes=$total_size will be
+and a modified Content-Type. The items in this manifest will include the etag
 and size_bytes for each segment, regardless of whether the client specified
 them for verification. The parameter: swift_bytes=$total_size will be
 appended to the existing Content-Type, where total_size is the sum of all
 the included segments' size_bytes. This extra parameter will be hidden from
 the user.
@ -73,9 +83,11 @@ Retrieving a Large Object
 A GET request to the manifest object will return the concatenation of the
 objects from the manifest much like DLO. If any of the segments from the
-manifest are not found or their Etag/Content Length no longer match the
+manifest are not found or their Etag/Content Length have changed since upload,
-connection will drop. In this case a 409 Conflict will be logged in the proxy
+the connection will drop. In this case a 409 Conflict will be logged in the
-logs and the user will receive incomplete results.
+proxy logs and the user will receive incomplete results. Note that this will be
 enforced regardless of whether the user perfomed per-segment validation during
 upload.
 The headers from this GET or HEAD request will return the metadata attached
 to the manifest object itself with some exceptions::
@ -594,8 +606,11 @@ class StaticLargeObject(object):
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                if seg_dict['size_bytes'] is None:
                    seg_size = None
                else:
                    raise HTTPBadRequest('Invalid Manifest File')
-            if seg_size < self.min_segment_size and \
+            if seg_size is not None and seg_size < self.min_segment_size and \
                    index < len(parsed_data) - 1:
                raise HTTPBadRequest(
                    'Each segment, except the last, must be at least '
@ -613,11 +628,18 @@ class StaticLargeObject(object):
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self)
            if head_seg_resp.is_success:
-                total_size += seg_size
+                if head_seg_resp.content_length < self.min_segment_size and \
-                if seg_size != head_seg_resp.content_length:
+                        index < len(parsed_data) - 1:
                    raise HTTPBadRequest(
                        'Each segment, except the last, must be at least '
                        '%d bytes.' % self.min_segment_size)
                total_size += head_seg_resp.content_length
                if seg_size is not None and \
                        seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
-                if seg_dict['etag'] == head_seg_resp.etag:
+                if seg_dict['etag'] is None or \
-                    slo_etag.update(seg_dict['etag'])
+                        seg_dict['etag'] == head_seg_resp.etag:
                    slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
@ -629,8 +651,8 @@ class StaticLargeObject(object):
                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
-                            'bytes': seg_size,
+                            'bytes': head_seg_resp.content_length,
-                            'hash': seg_dict['etag'],
+                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if config_true_value(
--- a/test/functional/tests.py
+++ b/test/functional/tests.py
@ -2152,6 +2152,15 @@ class TestSloEnv(object):
                seg_info['seg_e']]),
            parms={'multipart-manifest': 'put'})
        file_item = cls.container.file("manifest-db")
        file_item.write(
            json.dumps([
                {'path': seg_info['seg_d']['path'], 'etag': None,
                 'size_bytes': None},
                {'path': seg_info['seg_b']['path'], 'etag': None,
                 'size_bytes': None},
            ]), parms={'multipart-manifest': 'put'})
 class TestSlo(Base):
    env = TestSloEnv
@ -2259,6 +2268,52 @@ class TestSlo(Base):
        else:
            self.fail("Expected ResponseError but didn't get it")
    def test_slo_unspecified_etag(self):
        file_item = self.env.container.file("manifest-a-unspecified-etag")
        file_item.write(
            json.dumps([{
                'size_bytes': 1024 * 1024,
                'etag': None,
                'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
            parms={'multipart-manifest': 'put'})
        self.assert_status(201)
    def test_slo_unspecified_size(self):
        file_item = self.env.container.file("manifest-a-unspecified-size")
        file_item.write(
            json.dumps([{
                'size_bytes': None,
                'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(),
                'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
            parms={'multipart-manifest': 'put'})
        self.assert_status(201)
    def test_slo_missing_etag(self):
        file_item = self.env.container.file("manifest-a-missing-etag")
        try:
            file_item.write(
                json.dumps([{
                    'size_bytes': 1024 * 1024,
                    'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
                parms={'multipart-manifest': 'put'})
        except ResponseError as err:
            self.assertEqual(400, err.status)
        else:
            self.fail("Expected ResponseError but didn't get it")
    def test_slo_missing_size(self):
        file_item = self.env.container.file("manifest-a-missing-size")
        try:
            file_item.write(
                json.dumps([{
                    'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(),
                    'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
                parms={'multipart-manifest': 'put'})
        except ResponseError as err:
            self.assertEqual(400, err.status)
        else:
            self.fail("Expected ResponseError but didn't get it")
    def test_slo_overwrite_segment_with_manifest(self):
        file_item = self.env.container.file("seg_b")
        try:
@ -2367,6 +2422,30 @@ class TestSlo(Base):
        except ValueError:
            self.fail("GET with multipart-manifest=get got invalid json")
    def test_slo_get_the_manifest_with_details_from_server(self):
        manifest = self.env.container.file("manifest-db")
        got_body = manifest.read(parms={'multipart-manifest': 'get'})
        self.assertEqual('application/json; charset=utf-8',
                         manifest.content_type)
        try:
            value = json.loads(got_body)
        except ValueError:
            self.fail("GET with multipart-manifest=get got invalid json")
        self.assertEqual(len(value), 2)
        self.assertEqual(value[0]['bytes'], 1024 * 1024)
        self.assertEqual(value[0]['hash'],
                         hashlib.md5('d' * 1024 * 1024).hexdigest())
        self.assertEqual(value[0]['name'],
                         '/%s/seg_d' % self.env.container.name.decode("utf-8"))
        self.assertEqual(value[1]['bytes'], 1024 * 1024)
        self.assertEqual(value[1]['hash'],
                         hashlib.md5('b' * 1024 * 1024).hexdigest())
        self.assertEqual(value[1]['name'],
                         '/%s/seg_b' % self.env.container.name.decode("utf-8"))
    def test_slo_head_the_manifest(self):
        manifest = self.env.container.file("manifest-abcde")
        got_info = manifest.info(parms={'multipart-manifest': 'get'})
--- a/test/unit/common/middleware/test_slo.py
+++ b/test/unit/common/middleware/test_slo.py
@ -441,6 +441,61 @@ class TestSloPutManifest(SloTestCase):
        self.assertEqual(status, '409 Conflict')
        self.assertEqual(self.app.call_count, 1)
    def test_handle_multipart_put_skip_size_check(self):
        good_data = json.dumps(
            [{'path': '/checktest/a_1', 'etag': 'a', 'size_bytes': None},
             {'path': '/checktest/b_2', 'etag': 'b', 'size_bytes': None}])
        req = Request.blank(
            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
            environ={'REQUEST_METHOD': 'PUT'}, body=good_data)
        status, headers, body = self.call_slo(req)
        self.assertEquals(self.app.call_count, 3)
        # Check that we still populated the manifest properly from our HEADs
        req = Request.blank(
            # this string looks weird, but it's just an artifact
            # of FakeSwift
            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
            environ={'REQUEST_METHOD': 'GET'})
        status, headers, body = self.call_app(req)
        manifest_data = json.loads(body)
        self.assertEquals(1, manifest_data[0]['bytes'])
        self.assertEquals(2, manifest_data[1]['bytes'])
    def test_handle_multipart_put_skip_size_check_still_uses_min_size(self):
        with patch.object(self.slo, 'min_segment_size', 50):
            test_json_data = json.dumps([{'path': '/cont/small_object',
                                          'etag': 'etagoftheobjectsegment',
                                          'size_bytes': None},
                                         {'path': '/cont/small_object',
                                          'etag': 'etagoftheobjectsegment',
                                          'size_bytes': 100}])
            req = Request.blank('/v1/AUTH_test/c/o', body=test_json_data)
            with self.assertRaises(HTTPException) as cm:
                self.slo.handle_multipart_put(req, fake_start_response)
            self.assertEquals(cm.exception.status_int, 400)
    def test_handle_multipart_put_skip_etag_check(self):
        good_data = json.dumps(
            [{'path': '/checktest/a_1', 'etag': None, 'size_bytes': 1},
             {'path': '/checktest/b_2', 'etag': None, 'size_bytes': 2}])
        req = Request.blank(
            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
            environ={'REQUEST_METHOD': 'PUT'}, body=good_data)
        status, headers, body = self.call_slo(req)
        self.assertEquals(self.app.call_count, 3)
        # Check that we still populated the manifest properly from our HEADs
        req = Request.blank(
            # this string looks weird, but it's just an artifact
            # of FakeSwift
            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
            environ={'REQUEST_METHOD': 'GET'})
        status, headers, body = self.call_app(req)
        manifest_data = json.loads(body)
        self.assertEquals('a', manifest_data[0]['hash'])
        self.assertEquals('b', manifest_data[1]['hash'])
 class TestSloDeleteManifest(SloTestCase):