diff --git a/doc/source/api/large_objects.rst b/doc/source/api/large_objects.rst index 24bb4eeab2..739f5fea91 100644 --- a/doc/source/api/large_objects.rst +++ b/doc/source/api/large_objects.rst @@ -50,29 +50,31 @@ Static large objects To create a static large object, divide your content into pieces and create (upload) a segment object to contain each piece. -You must record the ``ETag`` response header that the **PUT** operation -returns. Alternatively, you can calculate the MD5 checksum of the -segment prior to uploading and include this in the ``ETag`` request -header. This ensures that the upload cannot corrupt your data. - -List the name of each segment object along with its size and MD5 -checksum in order. - Create a manifest object. Include the ``multipart-manifest=put`` query string at the end of the manifest object name to indicate that this is a manifest object. The body of the **PUT** request on the manifest object comprises a json -list, where each element contains the following attributes: +list, where each element is an object representing a segment. These objects +may contain the following attributes: -- ``path``. The container and object name in the format: +- ``path`` (required). The container and object name in the format: ``{container-name}/{object-name}`` -- ``etag``. The MD5 checksum of the content of the segment object. This - value must match the ``ETag`` of that object. +- ``etag`` (optional). If provided, this value must match the ``ETag`` + of the segment object. This was included in the response headers when + the segment was created. Generally, this will be the MD5 sum of the + segment. -- ``size_bytes``. The size of the segment object. This value must match - the ``Content-Length`` of that object. +- ``size_bytes`` (optional). The size of the segment object. If provided, + this value must match the ``Content-Length`` of that object. + +- ``range`` (optional). The subset of the referenced object that should + be used for segment data. This behaves similar to the ``Range`` header. + If omitted, the entire object will be used. + +Providing the optional ``etag`` and ``size_bytes`` attributes for each +segment ensures that the upload cannot corrupt your data. **Example Static large object manifest list** diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index 24bc5a447b..5aedb91997 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -39,10 +39,10 @@ Key Description =========== ======================================================== path the path to the segment object (not including account) /container/object_name -etag the ETag given back when the segment object was PUT, - or null -size_bytes the size of the complete segment object in - bytes, or null +etag (optional) the ETag given back when the segment object + was PUT +size_bytes (optional) the size of the complete segment object in + bytes range (optional) the (inclusive) range within the object to use as a segment. If omitted, the entire object is used. =========== ======================================================== @@ -67,8 +67,8 @@ head every segment passed in to verify: 5. if the user provided a range, it is a singular, syntactically correct range that is satisfiable given the size of the object. -Note that the etag and size_bytes keys are still required; this acts as a guard -against user errors such as typos. If any of the objects fail to verify (not +Note that the etag and size_bytes keys are optional; if ommitted, the +verification is not performed. If any of the objects fail to verify (not found, size/etag mismatch, below minimum size, invalid range) then the user will receive a 4xx error response. If everything does match, the user will receive a 2xx response and the SLO object is ready for downloading. @@ -106,12 +106,10 @@ If a user uploads this manifest: .. code:: - [{"path": "/con/obj_seg_1", "etag": null, "size_bytes": 2097152, - "range": "0-1048576"}, - {"path": "/con/obj_seg_2", "etag": null, "size_bytes": 2097152, + [{"path": "/con/obj_seg_1", "size_bytes": 2097152, "range": "0-1048576"}, + {"path": "/con/obj_seg_2", "size_bytes": 2097152, "range": "512-1550000"}, - {"path": "/con/obj_seg_1", "etag": null, "size_bytes": 2097152, - "range": "-2048"}] + {"path": "/con/obj_seg_1", "size_bytes": 2097152, "range": "-2048"}] The segment will consist of the first 1048576 bytes of /con/obj_seg_1, followed by bytes 513 through 1550000 (inclusive) of /con/obj_seg_2, and @@ -230,8 +228,8 @@ DEFAULT_MAX_MANIFEST_SEGMENTS = 1000 DEFAULT_MAX_MANIFEST_SIZE = 1024 * 1024 * 2 # 2 MiB -REQUIRED_SLO_KEYS = set(['path', 'etag', 'size_bytes']) -OPTIONAL_SLO_KEYS = set(['range']) +REQUIRED_SLO_KEYS = set(['path']) +OPTIONAL_SLO_KEYS = set(['range', 'etag', 'size_bytes']) ALLOWED_SLO_KEYS = REQUIRED_SLO_KEYS | OPTIONAL_SLO_KEYS SYSMETA_SLO_ETAG = get_sys_meta_prefix('object') + 'slo-etag' @@ -301,10 +299,10 @@ def parse_and_validate_input(req_body, req_path): if not isinstance(seg_dict['path'], six.string_types): errors.append("Index %d: \"path\" must be a string" % seg_index) continue - if not (seg_dict['etag'] is None or + if not (seg_dict.get('etag') is None or isinstance(seg_dict['etag'], six.string_types)): - errors.append( - "Index %d: \"etag\" must be a string or null" % seg_index) + errors.append('Index %d: "etag" must be a string or null ' + '(if provided)' % seg_index) continue if '/' not in seg_dict['path'].strip('/'): @@ -313,7 +311,7 @@ def parse_and_validate_input(req_body, req_path): "the form /container/object." % seg_index) continue - seg_size = seg_dict['size_bytes'] + seg_size = seg_dict.get('size_bytes') if seg_size is not None: try: seg_size = int(seg_size) @@ -932,10 +930,10 @@ class StaticLargeObject(object): problem_segments.append( [quote(obj_name), 'Too small; each segment must be at least 1 byte.']) - if seg_dict['size_bytes'] is not None and \ + if seg_dict.get('size_bytes') is not None and \ seg_dict['size_bytes'] != head_seg_resp.content_length: problem_segments.append([quote(obj_name), 'Size Mismatch']) - if seg_dict['etag'] is not None and \ + if seg_dict.get('etag') is not None and \ seg_dict['etag'] != head_seg_resp.etag: problem_segments.append([quote(obj_name), 'Etag Mismatch']) if head_seg_resp.last_modified: diff --git a/test/functional/test_slo.py b/test/functional/test_slo.py index b4ced42d4b..9e73020912 100644 --- a/test/functional/test_slo.py +++ b/test/functional/test_slo.py @@ -473,9 +473,36 @@ class TestSlo(Base): def test_slo_missing_etag(self): file_item = self.env.container.file("manifest-a-missing-etag") + file_item.write( + json.dumps([{ + 'size_bytes': 1024 * 1024, + 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), + parms={'multipart-manifest': 'put'}) + self.assert_status(201) + + def test_slo_missing_size(self): + file_item = self.env.container.file("manifest-a-missing-size") + file_item.write( + json.dumps([{ + 'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(), + 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), + parms={'multipart-manifest': 'put'}) + self.assert_status(201) + + def test_slo_path_only(self): + file_item = self.env.container.file("manifest-a-path-only") + file_item.write( + json.dumps([{ + 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), + parms={'multipart-manifest': 'put'}) + self.assert_status(201) + + def test_slo_typo_etag(self): + file_item = self.env.container.file("manifest-a-typo-etag") try: file_item.write( json.dumps([{ + 'teag': hashlib.md5('a' * 1024 * 1024).hexdigest(), 'size_bytes': 1024 * 1024, 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), parms={'multipart-manifest': 'put'}) @@ -484,12 +511,13 @@ class TestSlo(Base): else: self.fail("Expected ResponseError but didn't get it") - def test_slo_missing_size(self): - file_item = self.env.container.file("manifest-a-missing-size") + def test_slo_typo_size(self): + file_item = self.env.container.file("manifest-a-typo-size") try: file_item.write( json.dumps([{ 'etag': hashlib.md5('a' * 1024 * 1024).hexdigest(), + 'siz_bytes': 1024 * 1024, 'path': '/%s/%s' % (self.env.container.name, 'seg_a')}]), parms={'multipart-manifest': 'put'}) except ResponseError as err: diff --git a/test/unit/common/middleware/test_slo.py b/test/unit/common/middleware/test_slo.py index 6f428b74de..f932a1d5aa 100644 --- a/test/unit/common/middleware/test_slo.py +++ b/test/unit/common/middleware/test_slo.py @@ -168,6 +168,18 @@ class TestSloMiddleware(SloTestCase): 'size_bytes': 100, 'foo': 'bar', 'baz': 'quux'}]))) + # This also catches typos + self.assertEqual( + 'Index 0: extraneous keys "egat"\n', + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'egat': 'etagoftheobjectsegment', + 'size_bytes': 100}]))) + self.assertEqual( + 'Index 0: extraneous keys "siez_bytes"\n', + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'etagoftheobjectsegment', + 'siez_bytes': 100}]))) + def test_bogus_input_ranges(self): self.assertEqual( "Index 0: invalid range\n", @@ -568,9 +580,11 @@ class TestSloPutManifest(SloTestCase): ], sorted(errors)) def test_handle_multipart_put_skip_size_check(self): - good_data = json.dumps( - [{'path': '/checktest/a_1', 'etag': 'a', 'size_bytes': None}, - {'path': '/checktest/b_2', 'etag': 'b', 'size_bytes': None}]) + good_data = json.dumps([ + # Explicit None will skip it + {'path': '/checktest/a_1', 'etag': 'a', 'size_bytes': None}, + # ...as will omitting it entirely + {'path': '/checktest/b_2', 'etag': 'b'}]) req = Request.blank( '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', environ={'REQUEST_METHOD': 'PUT'}, body=good_data) @@ -618,9 +632,11 @@ class TestSloPutManifest(SloTestCase): self.assertIn('Etag Mismatch', cm.exception.body) def test_handle_multipart_put_skip_etag_check(self): - good_data = json.dumps( - [{'path': '/checktest/a_1', 'etag': None, 'size_bytes': 1}, - {'path': '/checktest/b_2', 'etag': None, 'size_bytes': 2}]) + good_data = json.dumps([ + # Explicit None will skip it + {'path': '/checktest/a_1', 'etag': None, 'size_bytes': 1}, + # ...as will omitting it entirely + {'path': '/checktest/b_2', 'size_bytes': 2}]) req = Request.blank( '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', environ={'REQUEST_METHOD': 'PUT'}, body=good_data) @@ -686,6 +702,7 @@ class TestSloPutManifest(SloTestCase): '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', environ={'REQUEST_METHOD': 'PUT'}, body=good_data) status, headers, body = self.call_slo(req) + self.assertEqual(('201 Created', ''), (status, body)) expected_etag = '"%s"' % md5hex('ab:1-1;b:0-0;aetagoftheobjectsegment:' '10-40;') self.assertEqual(expected_etag, dict(headers)['Etag'])