Merge "Allow SLO PUTs to forgo per-segment integrity checks"

2015-05-27 16:07:29 +00:00 · 2015-05-27 16:07:29 +00:00 · 5bffeb29b8
commit 5bffeb29b8
parent ccb07cfd4d bb716573ab
3 changed files with 172 additions and 16 deletions
--- a/swift/common/middleware/slo.py
+++ b/swift/common/middleware/slo.py
@ -36,8 +36,8 @@ json data format. The data to be supplied for each segment is::

    path: the path to the segment (not including account)
          /container/object_name
-    etag: the etag given back when the segment was PUT
-    size_bytes: the size of the segment in bytes
+    etag: the etag given back when the segment was PUT, or null
+    size_bytes: the size of the segment in bytes, or null

 The format of the list will be::

@ -48,15 +48,25 @@ The format of the list will be::

 The number of object segments is limited to a configurable amount, default
 1000. Each segment, except for the final one, must be at least 1 megabyte
-(configurable). On upload, the middleware will head every segment passed in and
-verify the size and etag of each. If any of the objects do not match (not
+(configurable). On upload, the middleware will head every segment passed in to
+verify:
+
+ 1. the segment exists (i.e. the HEAD was successful);
+ 2. the segment meets minimum size requirements (if not the last segment);
+ 3. if the user provided a non-null etag, the etag matches; and
+ 4. if the user provided a non-null size_bytes, the size_bytes matches.
+
+Note that the etag and size_bytes keys are still required; this acts as a guard
+against user errors such as typos. If any of the objects fail to verify (not
 found, size/etag mismatch, below minimum size) then the user will receive a 4xx
 error response. If everything does match, the user will receive a 2xx response
 and the SLO object is ready for downloading.

 Behind the scenes, on success, a json manifest generated from the user input is
 sent to object servers with an extra "X-Static-Large-Object: True" header
-and a modified Content-Type. The parameter: swift_bytes=$total_size will be
+and a modified Content-Type. The items in this manifest will include the etag
+and size_bytes for each segment, regardless of whether the client specified
+them for verification. The parameter: swift_bytes=$total_size will be
 appended to the existing Content-Type, where total_size is the sum of all
 the included segments' size_bytes. This extra parameter will be hidden from
 the user.
@ -73,9 +83,11 @@ Retrieving a Large Object

 A GET request to the manifest object will return the concatenation of the
 objects from the manifest much like DLO. If any of the segments from the
-manifest are not found or their Etag/Content Length no longer match the
-connection will drop. In this case a 409 Conflict will be logged in the proxy
-logs and the user will receive incomplete results.
+manifest are not found or their Etag/Content Length have changed since upload,
+the connection will drop. In this case a 409 Conflict will be logged in the
+proxy logs and the user will receive incomplete results. Note that this will be
+enforced regardless of whether the user perfomed per-segment validation during
+upload.

 The headers from this GET or HEAD request will return the metadata attached
 to the manifest object itself with some exceptions::
@ -594,8 +606,11 @@ class StaticLargeObject(object):
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
-                raise HTTPBadRequest('Invalid Manifest File')
-            if seg_size < self.min_segment_size and \
+                if seg_dict['size_bytes'] is None:
+                    seg_size = None
+                else:
+                    raise HTTPBadRequest('Invalid Manifest File')
+            if seg_size is not None and seg_size < self.min_segment_size and \
                    index < len(parsed_data) - 1:
                raise HTTPBadRequest(
                    'Each segment, except the last, must be at least '
@ -613,11 +628,18 @@ class StaticLargeObject(object):
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self)
            if head_seg_resp.is_success:
-                total_size += seg_size
-                if seg_size != head_seg_resp.content_length:
+                if head_seg_resp.content_length < self.min_segment_size and \
+                        index < len(parsed_data) - 1:
+                    raise HTTPBadRequest(
+                        'Each segment, except the last, must be at least '
+                        '%d bytes.' % self.min_segment_size)
+                total_size += head_seg_resp.content_length
+                if seg_size is not None and \
+                        seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
-                if seg_dict['etag'] == head_seg_resp.etag:
-                    slo_etag.update(seg_dict['etag'])
+                if seg_dict['etag'] is None or \
+                        seg_dict['etag'] == head_seg_resp.etag:
+                    slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
@ -629,8 +651,8 @@ class StaticLargeObject(object):
                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
-                            'bytes': seg_size,
-                            'hash': seg_dict['etag'],
+                            'bytes': head_seg_resp.content_length,
+                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if config_true_value(
--- a/test/functional/tests.py
+++ b/test/functional/tests.py
@ -2152,6 +2152,15 @@ class TestSloEnv(object):
                seg_info['seg_e']]),
            parms={'multipart-manifest': 'put'})

+        file_item = cls.container.file("manifest-db")
+        file_item.write(
+            json.dumps([
+                {'path': seg_info['seg_d']['path'], 'etag': None,
+                 'size_bytes': None},
+                {'path': seg_info['seg_b']['path'], 'etag': None,
+                 'size_bytes': None},
+            ]), parms={'multipart-manifest': 'put'})
+

 class TestSlo(Base):
    env = TestSloEnv
@ -2259,6 +2268,52 @@ class TestSlo(Base):
        else:
            self.fail("Expected ResponseError but didn't get it")

+    def test_slo_unspecified_etag(self):
+        file_item = self.env.container.file("manifest-a-unspecified-etag")
+        file_item.write(
+            json.dumps([{
+                'size_bytes': 1024 * 1024,
+                'etag': None,
+                'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
+            parms={'multipart-manifest': 'put'})
+        self.assert_status(201)
+
+    def test_slo_unspecified_size(self):
+        file_item = self.env.container.file("manifest-a-unspecified-size")
+        file_item.write(
+            json.dumps([{
+                'size_bytes': None,
+                'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(),
+                'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
+            parms={'multipart-manifest': 'put'})
+        self.assert_status(201)
+
+    def test_slo_missing_etag(self):
+        file_item = self.env.container.file("manifest-a-missing-etag")
+        try:
+            file_item.write(
+                json.dumps([{
+                    'size_bytes': 1024 * 1024,
+                    'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
+                parms={'multipart-manifest': 'put'})
+        except ResponseError as err:
+            self.assertEqual(400, err.status)
+        else:
+            self.fail("Expected ResponseError but didn't get it")
+
+    def test_slo_missing_size(self):
+        file_item = self.env.container.file("manifest-a-missing-size")
+        try:
+            file_item.write(
+                json.dumps([{
+                    'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(),
+                    'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]),
+                parms={'multipart-manifest': 'put'})
+        except ResponseError as err:
+            self.assertEqual(400, err.status)
+        else:
+            self.fail("Expected ResponseError but didn't get it")
+
    def test_slo_overwrite_segment_with_manifest(self):
        file_item = self.env.container.file("seg_b")
        try:
@ -2367,6 +2422,30 @@ class TestSlo(Base):
        except ValueError:
            self.fail("GET with multipart-manifest=get got invalid json")

+    def test_slo_get_the_manifest_with_details_from_server(self):
+        manifest = self.env.container.file("manifest-db")
+        got_body = manifest.read(parms={'multipart-manifest': 'get'})
+
+        self.assertEqual('application/json; charset=utf-8',
+                         manifest.content_type)
+        try:
+            value = json.loads(got_body)
+        except ValueError:
+            self.fail("GET with multipart-manifest=get got invalid json")
+
+        self.assertEqual(len(value), 2)
+        self.assertEqual(value[0]['bytes'], 1024 * 1024)
+        self.assertEqual(value[0]['hash'],
+                         hashlib.md5('d' * 1024 * 1024).hexdigest())
+        self.assertEqual(value[0]['name'],
+                         '/%s/seg_d' % self.env.container.name.decode("utf-8"))
+
+        self.assertEqual(value[1]['bytes'], 1024 * 1024)
+        self.assertEqual(value[1]['hash'],
+                         hashlib.md5('b' * 1024 * 1024).hexdigest())
+        self.assertEqual(value[1]['name'],
+                         '/%s/seg_b' % self.env.container.name.decode("utf-8"))
+
    def test_slo_head_the_manifest(self):
        manifest = self.env.container.file("manifest-abcde")
        got_info = manifest.info(parms={'multipart-manifest': 'get'})
--- a/test/unit/common/middleware/test_slo.py
+++ b/test/unit/common/middleware/test_slo.py
@ -441,6 +441,61 @@ class TestSloPutManifest(SloTestCase):
        self.assertEqual(status, '409 Conflict')
        self.assertEqual(self.app.call_count, 1)

+    def test_handle_multipart_put_skip_size_check(self):
+        good_data = json.dumps(
+            [{'path': '/checktest/a_1', 'etag': 'a', 'size_bytes': None},
+             {'path': '/checktest/b_2', 'etag': 'b', 'size_bytes': None}])
+        req = Request.blank(
+            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
+            environ={'REQUEST_METHOD': 'PUT'}, body=good_data)
+        status, headers, body = self.call_slo(req)
+        self.assertEquals(self.app.call_count, 3)
+
+        # Check that we still populated the manifest properly from our HEADs
+        req = Request.blank(
+            # this string looks weird, but it's just an artifact
+            # of FakeSwift
+            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
+            environ={'REQUEST_METHOD': 'GET'})
+        status, headers, body = self.call_app(req)
+        manifest_data = json.loads(body)
+        self.assertEquals(1, manifest_data[0]['bytes'])
+        self.assertEquals(2, manifest_data[1]['bytes'])
+
+    def test_handle_multipart_put_skip_size_check_still_uses_min_size(self):
+        with patch.object(self.slo, 'min_segment_size', 50):
+            test_json_data = json.dumps([{'path': '/cont/small_object',
+                                          'etag': 'etagoftheobjectsegment',
+                                          'size_bytes': None},
+                                         {'path': '/cont/small_object',
+                                          'etag': 'etagoftheobjectsegment',
+                                          'size_bytes': 100}])
+            req = Request.blank('/v1/AUTH_test/c/o', body=test_json_data)
+            with self.assertRaises(HTTPException) as cm:
+                self.slo.handle_multipart_put(req, fake_start_response)
+            self.assertEquals(cm.exception.status_int, 400)
+
+    def test_handle_multipart_put_skip_etag_check(self):
+        good_data = json.dumps(
+            [{'path': '/checktest/a_1', 'etag': None, 'size_bytes': 1},
+             {'path': '/checktest/b_2', 'etag': None, 'size_bytes': 2}])
+        req = Request.blank(
+            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
+            environ={'REQUEST_METHOD': 'PUT'}, body=good_data)
+        status, headers, body = self.call_slo(req)
+        self.assertEquals(self.app.call_count, 3)
+
+        # Check that we still populated the manifest properly from our HEADs
+        req = Request.blank(
+            # this string looks weird, but it's just an artifact
+            # of FakeSwift
+            '/v1/AUTH_test/checktest/man_3?multipart-manifest=put',
+            environ={'REQUEST_METHOD': 'GET'})
+        status, headers, body = self.call_app(req)
+        manifest_data = json.loads(body)
+        self.assertEquals('a', manifest_data[0]['hash'])
+        self.assertEquals('b', manifest_data[1]['hash'])
+

 class TestSloDeleteManifest(SloTestCase):