Fix crash when a SLO repeats a segment.

If a SLO manifest specified the same segment twice with no range on
the latter, we would crash while trying to coalesce the segment
specifications into a single request (for efficiency). Now we handle
that missing range and do the right thing.

Change-Id: If13af444ed301ebd8fd34a0d96a330ded648f0c4
This commit is contained in:
Samuel Merritt 2015-11-25 17:09:34 -08:00
parent caebea16d6
commit c28709f8f2
3 changed files with 84 additions and 12 deletions

View File

@ -307,8 +307,10 @@ class SegmentedIterable(object):
'ERROR: While processing manifest %s, '
'max LO GET time of %ds exceeded' %
(self.name, self.max_get_time))
# Make sure that the segment is a plain old object, not some
# flavor of large object, so that we can check its MD5.
# The "multipart-manifest=get" query param ensures that the
# segment is a plain old object, not some flavor of large
# object; therefore, its etag is its MD5sum and hence we can
# check it.
path = seg_path + '?multipart-manifest=get'
seg_req = make_subrequest(
self.req.environ, path=path, method='GET',
@ -317,21 +319,35 @@ class SegmentedIterable(object):
agent=('%(orig)s ' + self.ua_suffix),
swift_source=self.swift_source)
seg_req_rangeval = None
if first_byte != 0 or not go_to_end:
seg_req.headers['Range'] = "bytes=%s-%s" % (
seg_req_rangeval = "%s-%s" % (
first_byte, '' if go_to_end else last_byte)
seg_req.headers['Range'] = "bytes=" + seg_req_rangeval
# We can only coalesce if paths match and we know the segment
# size (so we can check that the ranges will be allowed)
if pending_req and pending_req.path == seg_req.path and \
seg_size is not None:
new_range = '%s,%s' % (
pending_req.headers.get('Range',
'bytes=0-%s' % (seg_size - 1)),
seg_req.headers['Range'].split('bytes=')[1])
if Range(new_range).ranges_for_length(seg_size):
# Make a new Range object so that we don't goof up the
# existing one in case of invalid ranges. Note that a
# range set with too many individual byteranges is
# invalid, so we can combine N valid byteranges and 1
# valid byterange and get an invalid range set.
if pending_req.range:
new_range_str = str(pending_req.range)
else:
new_range_str = "bytes=0-%d" % (seg_size - 1)
if seg_req.range:
new_range_str += "," + seg_req_rangeval
else:
new_range_str += ",0-%d" % (seg_size - 1)
if Range(new_range_str).ranges_for_length(seg_size):
# Good news! We can coalesce the requests
pending_req.headers['Range'] = new_range
pending_req.headers['Range'] = new_range_str
continue
# else, Too many ranges, or too much backtracking, or ...

View File

@ -541,14 +541,15 @@ class Range(object):
def __str__(self):
string = 'bytes='
for start, end in self.ranges:
for i, (start, end) in enumerate(self.ranges):
if start is not None:
string += str(start)
string += '-'
if end is not None:
string += str(end)
string += ','
return string.rstrip(',')
if i < len(self.ranges) - 1:
string += ','
return string
def ranges_for_length(self, length):
"""

View File

@ -1306,6 +1306,61 @@ class TestSloGetManifest(SloTestCase):
self.assertFalse(
"SLO MultipartGET" in first_ua)
def test_get_manifest_repeated_segments(self):
_aabbccdd_manifest_json = json.dumps(
[{'name': '/gettest/a_5', 'hash': md5hex("a" * 5),
'content_type': 'text/plain', 'bytes': '5'},
{'name': '/gettest/a_5', 'hash': md5hex("a" * 5),
'content_type': 'text/plain', 'bytes': '5'},
{'name': '/gettest/b_10', 'hash': md5hex("b" * 10),
'content_type': 'text/plain', 'bytes': '10'},
{'name': '/gettest/b_10', 'hash': md5hex("b" * 10),
'content_type': 'text/plain', 'bytes': '10'},
{'name': '/gettest/c_15', 'hash': md5hex("c" * 15),
'content_type': 'text/plain', 'bytes': '15'},
{'name': '/gettest/c_15', 'hash': md5hex("c" * 15),
'content_type': 'text/plain', 'bytes': '15'},
{'name': '/gettest/d_20', 'hash': md5hex("d" * 20),
'content_type': 'text/plain', 'bytes': '20'},
{'name': '/gettest/d_20', 'hash': md5hex("d" * 20),
'content_type': 'text/plain', 'bytes': '20'}])
self.app.register(
'GET', '/v1/AUTH_test/gettest/manifest-aabbccdd',
swob.HTTPOk, {'Content-Type': 'application/json',
'X-Static-Large-Object': 'true',
'Etag': md5(_aabbccdd_manifest_json).hexdigest()},
_aabbccdd_manifest_json)
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-aabbccdd',
environ={'REQUEST_METHOD': 'GET'})
status, headers, body = self.call_slo(req)
headers = swob.HeaderKeyDict(headers)
self.assertEqual(status, '200 OK')
self.assertEqual(body, (
'aaaaaaaaaabbbbbbbbbbbbbbbbbbbbcccccccccccccccccccccccccccccc'
'dddddddddddddddddddddddddddddddddddddddd'))
self.assertEqual(self.app.calls, [
('GET', '/v1/AUTH_test/gettest/manifest-aabbccdd'),
('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'),
('GET', '/v1/AUTH_test/gettest/b_10?multipart-manifest=get'),
('GET', '/v1/AUTH_test/gettest/c_15?multipart-manifest=get'),
('GET', '/v1/AUTH_test/gettest/d_20?multipart-manifest=get')])
ranges = [c[2].get('Range') for c in self.app.calls_with_headers]
self.assertEqual(ranges, [
None,
'bytes=0-4,0-4',
'bytes=0-9,0-9',
'bytes=0-14,0-14',
'bytes=0-19,0-19'])
def test_if_none_match_matches(self):
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-abcd',