diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index d0d5263401..0cd80bc7e9 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -184,32 +184,132 @@ DEFAULT_MAX_MANIFEST_SEGMENTS = 1000 DEFAULT_MAX_MANIFEST_SIZE = 1024 * 1024 * 2 # 2 MiB -def parse_input(raw_data): +REQUIRED_SLO_KEYS = set(['path', 'etag', 'size_bytes']) +OPTIONAL_SLO_KEYS = set(['range']) +ALLOWED_SLO_KEYS = REQUIRED_SLO_KEYS | OPTIONAL_SLO_KEYS + + +def parse_and_validate_input(req_body, req_path, min_segment_size): """ - Given a request will parse the body and return a list of dictionaries - :raises: HTTPException on parse errors + Given a request body, parses it and returns a list of dictionaries. + + The output structure is nearly the same as the input structure, but it + is not an exact copy. Given a valid input dictionary `d_in`, its + corresponding output dictionary `d_out` will be as follows: + + * d_out['etag'] == d_in['etag'] + + * d_out['path'] == d_in['path'] + + * d_in['size_bytes'] can be a string ("12") or an integer (12), but + d_out['size_bytes'] is an integer. + + * (optional) d_in['range'] is a string of the form "M-N", "M-", or + "-N", where M and N are non-negative integers. d_out['range'] is the + corresponding swob.Range object. If d_in does not have a key + 'range', neither will d_out. + + :raises: HTTPException on parse errors or semantic errors (e.g. bogus + JSON structure, syntactically invalid ranges) + :returns: a list of dictionaries on success """ try: - parsed_data = json.loads(raw_data) + parsed_data = json.loads(req_body) except ValueError: - raise HTTPBadRequest("Manifest must be valid json.") + raise HTTPBadRequest("Manifest must be valid JSON.\n") - req_keys = set(['path', 'etag', 'size_bytes']) - opt_keys = set(['range']) - try: - for seg_dict in parsed_data: - if (not (req_keys <= set(seg_dict) <= req_keys | opt_keys) or - '/' not in seg_dict['path'].lstrip('/')): - raise HTTPBadRequest('Invalid SLO Manifest File') + if not isinstance(parsed_data, list): + raise HTTPBadRequest("Manifest must be a list.\n") - if seg_dict.get('range'): - try: - seg_dict['range'] = Range('bytes=%s' % seg_dict['range']) - except ValueError: - raise HTTPBadRequest('Invalid SLO Manifest File') - except (AttributeError, TypeError): - raise HTTPBadRequest('Invalid SLO Manifest File') + # If we got here, req_path refers to an object, so this won't ever raise + # ValueError. + vrs, account, _junk = split_path(req_path, 3, 3, True) + + errors = [] + num_segs = len(parsed_data) + for seg_index, seg_dict in enumerate(parsed_data): + if not isinstance(seg_dict, dict): + errors.append("Index %d: not a JSON object" % seg_index) + continue + + missing_keys = [k for k in REQUIRED_SLO_KEYS if k not in seg_dict] + if missing_keys: + errors.append( + "Index %d: missing keys %s" + % (seg_index, + ", ".join('"%s"' % (mk,) for mk in sorted(missing_keys)))) + continue + + extraneous_keys = [k for k in seg_dict if k not in ALLOWED_SLO_KEYS] + if extraneous_keys: + errors.append( + "Index %d: extraneous keys %s" + % (seg_index, + ", ".join('"%s"' % (ek,) + for ek in sorted(extraneous_keys)))) + continue + + if not isinstance(seg_dict['path'], basestring): + errors.append("Index %d: \"path\" must be a string" % seg_index) + continue + if not (seg_dict['etag'] is None or + isinstance(seg_dict['etag'], basestring)): + errors.append( + "Index %d: \"etag\" must be a string or null" % seg_index) + continue + + if '/' not in seg_dict['path'].strip('/'): + errors.append( + "Index %d: path does not refer to an object. Path must be of " + "the form /container/object." % seg_index) + continue + + seg_size = seg_dict['size_bytes'] + if seg_size is not None: + try: + seg_size = int(seg_size) + seg_dict['size_bytes'] = seg_size + except (TypeError, ValueError): + errors.append("Index %d: invalid size_bytes" % seg_index) + continue + if (seg_size < min_segment_size and seg_index < num_segs - 1): + errors.append("Index %d: too small; each segment, except " + "the last, must be at least %d bytes." + % (seg_index, min_segment_size)) + continue + + obj_path = '/'.join(['', vrs, account, seg_dict['path'].lstrip('/')]) + if req_path == quote(obj_path): + errors.append( + "Index %d: manifest must not include itself as a segment" + % seg_index) + continue + + if seg_dict.get('range'): + try: + seg_dict['range'] = Range('bytes=%s' % seg_dict['range']) + except ValueError: + errors.append("Index %d: invalid range" % seg_index) + continue + + if len(seg_dict['range'].ranges) > 1: + errors.append("Index %d: multiple ranges (only one allowed)" + % seg_index) + continue + + # If the user *told* us the object's size, we can check range + # satisfiability right now. If they lied about the size, we'll + # fail that validation later. + if (seg_size is not None and + len(seg_dict['range'].ranges_for_length(seg_size)) != 1): + errors.append("Index %d: unsatisfiable range" % seg_index) + continue + + if errors: + error_message = "".join(e + "\n" for e in errors) + raise HTTPBadRequest(error_message, + headers={"Content-Type": "text/plain"}) return parsed_data @@ -639,7 +739,9 @@ class StaticLargeObject(object): if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) - parsed_data = parse_input(req.body_file.read(self.max_manifest_size)) + parsed_data = parse_and_validate_input( + req.body_file.read(self.max_manifest_size), + req.path, self.min_segment_size) problem_segments = [] if len(parsed_data) > self.max_manifest_segments: @@ -658,23 +760,6 @@ class StaticLargeObject(object): if isinstance(obj_name, six.text_type): obj_name = obj_name.encode('utf-8') obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')]) - if req.path == quote(obj_path): - raise HTTPConflict( - 'Manifest object name "%s" ' - 'cannot be included in the manifest' - % obj_name) - try: - seg_size = int(seg_dict['size_bytes']) - except (ValueError, TypeError): - if seg_dict['size_bytes'] is None: - seg_size = None - else: - raise HTTPBadRequest('Invalid Manifest File') - if seg_size is not None and seg_size < self.min_segment_size and \ - index < len(parsed_data) - 1: - raise HTTPBadRequest( - 'Each segment, except the last, must be at least ' - '%d bytes.' % self.min_segment_size) new_env = req.environ.copy() new_env['PATH_INFO'] = obj_path @@ -693,34 +778,35 @@ class StaticLargeObject(object): if head_seg_resp.is_success: segment_length = head_seg_resp.content_length if seg_dict.get('range'): - # Since we now know the length, we can normalize the ranges + # Since we now know the length, we can normalize the + # range. We know that there is exactly one range + # requested since we checked that earlier in + # parse_and_validate_input(). ranges = seg_dict['range'].ranges_for_length( head_seg_resp.content_length) if not ranges: problem_segments.append([quote(obj_name), 'Unsatisfiable Range']) - elif len(ranges) > 1: - problem_segments.append([quote(obj_name), - 'Multiple Ranges']) elif ranges == [(0, head_seg_resp.content_length)]: # Just one range, and it exactly matches the object. # Why'd we do this again? - seg_dict['range'] = None + del seg_dict['range'] segment_length = head_seg_resp.content_length else: - range = ranges[0] - seg_dict['range'] = '%d-%d' % (range[0], range[1] - 1) - segment_length = range[1] - range[0] + rng = ranges[0] + seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1) + segment_length = rng[1] - rng[0] if segment_length < self.min_segment_size and \ index < len(parsed_data) - 1: - raise HTTPBadRequest( - 'Each segment, except the last, must be at least ' - '%d bytes.' % self.min_segment_size) + problem_segments.append( + [quote(obj_name), + 'Too small; each segment, except the last, must be ' + 'at least %d bytes.' % self.min_segment_size]) total_size += segment_length - if seg_size is not None and \ - seg_size != head_seg_resp.content_length: + if seg_dict['size_bytes'] is not None and \ + seg_dict['size_bytes'] != head_seg_resp.content_length: problem_segments.append([quote(obj_name), 'Size Mismatch']) if seg_dict['etag'] is None or \ seg_dict['etag'] == head_seg_resp.etag: diff --git a/test/functional/tests.py b/test/functional/tests.py index 8f7e2e8d17..6c8168213e 100644 --- a/test/functional/tests.py +++ b/test/functional/tests.py @@ -2440,7 +2440,7 @@ class TestSlo(Base): def test_slo_overwrite_segment_with_manifest(self): file_item = self.env.container.file("seg_b") - try: + with self.assertRaises(ResponseError) as catcher: file_item.write( json.dumps([ {'size_bytes': 1024 * 1024, @@ -2453,10 +2453,7 @@ class TestSlo(Base): 'etag': hashlib.md5('c' * 1024 * 1024).hexdigest(), 'path': '/%s/%s' % (self.env.container.name, 'seg_c')}]), parms={'multipart-manifest': 'put'}) - except ResponseError as err: - self.assertEqual(409, err.status) - else: - self.fail("Expected ResponseError but didn't get it") + self.assertEqual(400, catcher.exception.status) def test_slo_copy(self): file_item = self.env.container.file("manifest-abcde") diff --git a/test/unit/common/middleware/test_slo.py b/test/unit/common/middleware/test_slo.py index 59fb16b7f6..a82f3f977b 100644 --- a/test/unit/common/middleware/test_slo.py +++ b/test/unit/common/middleware/test_slo.py @@ -119,28 +119,171 @@ class TestSloMiddleware(SloTestCase): self.assertTrue( resp.startswith('X-Static-Large-Object is a reserved header')) - def test_parse_input(self): - self.assertRaises(HTTPException, slo.parse_input, 'some non json') - self.assertRaises(HTTPException, slo.parse_input, '[{}]') - self.assertRaises(HTTPException, slo.parse_input, json.dumps( - [{'path': '/cont/object', 'etag': 'etagoftheobjecitsegment', - 'size_bytes': 100, 'foo': 'bar'}])) - self.assertRaises(HTTPException, slo.parse_input, json.dumps( - [{'path': '/cont/object', 'etag': 'etagoftheobjecitsegment', - 'size_bytes': 100, 'range': 'non-range value'}])) + def _put_bogus_slo(self, manifest_text, + manifest_path='/v1/a/c/the-manifest', + min_segment_size=1): + with self.assertRaises(HTTPException) as catcher: + slo.parse_and_validate_input(manifest_text, manifest_path, + min_segment_size) + self.assertEqual(400, catcher.exception.status_int) + return catcher.exception.body + def _put_slo(self, manifest_text, manifest_path='/v1/a/c/the-manifest', + min_segment_size=1): + return slo.parse_and_validate_input(manifest_text, manifest_path, + min_segment_size) + + def test_bogus_input(self): + self.assertEqual('Manifest must be valid JSON.\n', + self._put_bogus_slo('some non json')) + + self.assertEqual('Manifest must be a list.\n', + self._put_bogus_slo('{}')) + + self.assertEqual('Index 0: not a JSON object\n', + self._put_bogus_slo('["zombocom"]')) + + def test_bogus_input_bad_keys(self): + self.assertEqual( + "Index 0: extraneous keys \"baz\", \"foo\"\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'etagoftheobjectsegment', + 'size_bytes': 100, + 'foo': 'bar', 'baz': 'quux'}]))) + + def test_bogus_input_ranges(self): + self.assertEqual( + "Index 0: invalid range\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'blah', + 'size_bytes': 100, 'range': 'non-range value'}]))) + + self.assertEqual( + "Index 0: multiple ranges (only one allowed)\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'blah', + 'size_bytes': 100, 'range': '1-20,30-40'}]))) + + def test_bogus_input_unsatisfiable_range(self): + self.assertEqual( + "Index 0: unsatisfiable range\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'blah', + 'size_bytes': 100, 'range': '8888-9999'}]))) + + # since size is optional, we have to be able to defer this check + segs = self._put_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'blah', + 'size_bytes': None, 'range': '8888-9999'}])) + self.assertEqual(1, len(segs)) + + def test_bogus_input_path(self): + self.assertEqual( + "Index 0: path does not refer to an object. Path must be of the " + "form /container/object.\n" + "Index 1: path does not refer to an object. Path must be of the " + "form /container/object.\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont', 'etag': 'etagoftheobjectsegment', + 'size_bytes': 100}, + {'path': '/c-trailing-slash/', 'etag': 'e', + 'size_bytes': 100}, + {'path': '/con/obj', 'etag': 'e', + 'size_bytes': 100}, + {'path': '/con/obj-trailing-slash/', 'etag': 'e', + 'size_bytes': 100}, + {'path': '/con/obj/with/slashes', 'etag': 'e', + 'size_bytes': 100}]))) + + def test_bogus_input_multiple(self): + self.assertEqual( + "Index 0: invalid range\nIndex 1: not a JSON object\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'etagoftheobjectsegment', + 'size_bytes': 100, 'range': 'non-range value'}, + None]))) + + def test_bogus_input_size_bytes(self): + self.assertEqual( + "Index 0: invalid size_bytes\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'blah', 'size_bytes': "fht"}, + {'path': '/cont/object', 'etag': 'blah', 'size_bytes': None}, + {'path': '/cont/object', 'etag': 'blah', 'size_bytes': 100}], + ))) + + self.assertEqual( + "Index 0: invalid size_bytes\n", + self._put_bogus_slo(json.dumps( + [{'path': '/cont/object', 'etag': 'blah', 'size_bytes': []}], + ))) + + def test_bogus_input_self_referential(self): + self.assertEqual( + "Index 0: manifest must not include itself as a segment\n", + self._put_bogus_slo(json.dumps( + [{'path': '/c/the-manifest', 'etag': 'gate', + 'size_bytes': 100, 'range': 'non-range value'}]))) + + def test_bogus_input_self_referential_non_ascii(self): + self.assertEqual( + "Index 0: manifest must not include itself as a segment\n", + self._put_bogus_slo( + json.dumps([{'path': u'/c/あ_1', + 'etag': 'a', 'size_bytes': 1}]), + manifest_path=quote(u'/v1/a/c/あ_1'))) + + def test_bogus_input_self_referential_last_segment(self): + test_json_data = json.dumps([ + {'path': '/c/seg_1', 'etag': 'a', 'size_bytes': 1}, + {'path': '/c/seg_2', 'etag': 'a', 'size_bytes': 1}, + {'path': '/c/seg_3', 'etag': 'a', 'size_bytes': 1}, + {'path': '/c/the-manifest', 'etag': 'a', 'size_bytes': 1}, + ]) + self.assertEqual( + "Index 3: manifest must not include itself as a segment\n", + self._put_bogus_slo( + test_json_data, + manifest_path=quote('/v1/a/c/the-manifest'))) + + def test_bogus_input_undersize_segment(self): + self.assertEqual( + "Index 1: too small; each segment, except the last, " + "must be at least 1000 bytes.\n" + "Index 2: too small; each segment, except the last, " + "must be at least 1000 bytes.\n", + self._put_bogus_slo( + json.dumps([ + {'path': u'/c/s1', 'etag': 'a', 'size_bytes': 1000}, + {'path': u'/c/s2', 'etag': 'b', 'size_bytes': 999}, + {'path': u'/c/s3', 'etag': 'c', 'size_bytes': 998}, + # No error for this one since size_bytes is unspecified + {'path': u'/c/s4', 'etag': 'd', 'size_bytes': None}, + {'path': u'/c/s5', 'etag': 'e', 'size_bytes': 996}]), + min_segment_size=1000)) + + def test_valid_input(self): data = json.dumps( - [{'path': '/cont/object', 'etag': 'etagoftheobjecitsegment', + [{'path': '/cont/object', 'etag': 'etagoftheobjectsegment', 'size_bytes': 100}]) - self.assertEqual('/cont/object', - slo.parse_input(data)[0]['path']) + self.assertEqual( + '/cont/object', + slo.parse_and_validate_input(data, '/v1/a/cont/man', 1)[0]['path']) data = json.dumps( - [{'path': '/cont/object', 'etag': 'etagoftheobjecitsegment', - 'size_bytes': 100, 'range': '0-40,30-90'}]) - parsed = slo.parse_input(data) + [{'path': '/cont/object', 'etag': 'etagoftheobjectsegment', + 'size_bytes': 100, 'range': '0-40'}]) + parsed = slo.parse_and_validate_input(data, '/v1/a/cont/man', 1) self.assertEqual('/cont/object', parsed[0]['path']) - self.assertEqual([(0, 40), (30, 90)], parsed[0]['range'].ranges) + self.assertEqual([(0, 40)], parsed[0]['range'].ranges) + + data = json.dumps( + [{'path': '/cont/object', 'etag': 'etagoftheobjectsegment', + 'size_bytes': None, 'range': '0-40'}]) + parsed = slo.parse_and_validate_input(data, '/v1/a/cont/man', 1) + self.assertEqual('/cont/object', parsed[0]['path']) + self.assertEqual(None, parsed[0]['size_bytes']) + self.assertEqual([(0, 40)], parsed[0]['range'].ranges) class TestSloPutManifest(SloTestCase): @@ -331,7 +474,7 @@ class TestSloPutManifest(SloTestCase): environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'}, body=test_xml_data) no_xml = self.slo(req.environ, fake_start_response) - self.assertEqual(no_xml, ['Manifest must be valid json.']) + self.assertEqual(no_xml, ['Manifest must be valid JSON.\n']) def test_handle_multipart_put_bad_data(self): bad_data = json.dumps([{'path': '/cont/object', @@ -358,6 +501,7 @@ class TestSloPutManifest(SloTestCase): 'etag': 'etagoftheobj', 'size_bytes': 100}]), json.dumps([{'path': 12, 'size_bytes': 100}]), json.dumps([{'path': 12, 'size_bytes': 100}]), + json.dumps([{'path': '/c/o', 'etag': 123, 'size_bytes': 100}]), json.dumps([{'path': None, 'etag': 'etagoftheobj', 'size_bytes': 100}])]: req = Request.blank( @@ -421,46 +565,6 @@ class TestSloPutManifest(SloTestCase): self.assertEqual(errors[4][0], '/checktest/slob') self.assertEqual(errors[4][1], 'Etag Mismatch') - def test_handle_multipart_put_manifest_equal_slo(self): - test_json_data = json.dumps([{'path': '/cont/object', - 'etag': 'etagoftheobjectsegment', - 'size_bytes': 100}]) - req = Request.blank( - '/v1/AUTH_test/cont/object?multipart-manifest=put', - environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'}, - body=test_json_data) - status, headers, body = self.call_slo(req) - self.assertEqual(status, '409 Conflict') - self.assertEqual(self.app.call_count, 0) - - def test_handle_multipart_put_manifest_equal_slo_non_ascii(self): - test_json_data = json.dumps([{'path': u'/cont/あ_1', - 'etag': 'a', - 'size_bytes': 1}]) - path = quote(u'/v1/AUTH_test/cont/あ_1') - req = Request.blank( - path + '?multipart-manifest=put', - environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'}, - body=test_json_data) - status, headers, body = self.call_slo(req) - self.assertEqual(status, '409 Conflict') - self.assertEqual(self.app.call_count, 0) - - def test_handle_multipart_put_manifest_equal_last_segment(self): - test_json_data = json.dumps([{'path': '/cont/object', - 'etag': 'etagoftheobjectsegment', - 'size_bytes': 100}, - {'path': '/cont/object2', - 'etag': 'etagoftheobjectsegment', - 'size_bytes': 100}]) - req = Request.blank( - '/v1/AUTH_test/cont/object2?multipart-manifest=put', - environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'}, - body=test_json_data) - status, headers, body = self.call_slo(req) - self.assertEqual(status, '409 Conflict') - self.assertEqual(self.app.call_count, 1) - def test_handle_multipart_put_skip_size_check(self): good_data = json.dumps( [{'path': '/checktest/a_1', 'etag': 'a', 'size_bytes': None}, @@ -495,6 +599,24 @@ class TestSloPutManifest(SloTestCase): self.slo.handle_multipart_put(req, fake_start_response) self.assertEqual(cm.exception.status_int, 400) + def test_handle_multipart_put_skip_size_check_no_early_bailout(self): + with patch.object(self.slo, 'min_segment_size', 50): + # The first is too small (it's 10 bytes but min size is 50), and + # the second has a bad etag. Make sure both errors show up in + # the response. + test_json_data = json.dumps([{'path': '/cont/small_object', + 'etag': 'etagoftheobjectsegment', + 'size_bytes': None}, + {'path': '/cont/object2', + 'etag': 'wrong wrong wrong', + 'size_bytes': 100}]) + req = Request.blank('/v1/AUTH_test/c/o', body=test_json_data) + with self.assertRaises(HTTPException) as cm: + self.slo.handle_multipart_put(req, fake_start_response) + self.assertEqual(cm.exception.status_int, 400) + self.assertIn('at least 50 bytes', cm.exception.body) + self.assertIn('Etag Mismatch', cm.exception.body) + def test_handle_multipart_put_skip_etag_check(self): good_data = json.dumps( [{'path': '/checktest/a_1', 'etag': None, 'size_bytes': 1}, @@ -526,6 +648,7 @@ class TestSloPutManifest(SloTestCase): with self.assertRaises(HTTPException) as catcher: self.slo.handle_multipart_put(req, fake_start_response) self.assertEqual(400, catcher.exception.status_int) + self.assertIn("Unsatisfiable Range", catcher.exception.body) def test_handle_single_ranges(self): good_data = json.dumps( @@ -572,25 +695,6 @@ class TestSloPutManifest(SloTestCase): self.assertEqual('etagoftheobjectsegment', manifest_data[3]['hash']) self.assertEqual('10-40', manifest_data[3]['range']) - def test_handle_multiple_ranges_error(self): - good_data = json.dumps( - [{'path': '/checktest/a_1', 'etag': None, - 'size_bytes': 1, 'range': '0-100'}, - {'path': '/checktest/b_2', 'etag': None, - 'size_bytes': 2, 'range': '-1,0-0'}, - {'path': '/cont/object', 'etag': None, - 'size_bytes': None, 'range': '10-30,20-40'}]) - req = Request.blank( - '/v1/AUTH_test/checktest/man_3?multipart-manifest=put', - environ={'REQUEST_METHOD': 'PUT'}, body=good_data) - status, headers, body = self.call_slo(req) - self.assertEqual(status, '400 Bad Request') - self.assertEqual(self.app.call_count, 3) - self.assertEqual(body, '\n'.join([ - 'Errors:', - '/checktest/b_2, Multiple Ranges', - '/cont/object, Multiple Ranges'])) - class TestSloDeleteManifest(SloTestCase):