Tolerate RFC-compliant ETags

Since time immemorial, Swift has returned unquoted ETags for plain-old
Swift objects -- I hear tell that we once tried to change this, but
quickly backed it out when some clients broke.

However, some proxies (such as nginx) apparently may force the ETag to
adhere to the RFC, which states [1]:

    An entity-tag consists of an opaque *quoted* string

(emphasis mine). See the related bug for an instance of this happening.

Since we can still get the original ETag easily, we should tolerate the
more-compliant format.

[1] https://tools.ietf.org/html/rfc2616.html#section-3.11 or, if you
    prefer the new ones, https://tools.ietf.org/html/rfc7232#section-2.3

Change-Id: I7cfacab3f250a9443af4b67111ef8088d37d9171
Closes-Bug: 1681529
Related-Bug: 1678976
This commit is contained in:
Tim Burke 2017-04-10 18:09:53 -07:00
parent 058fb0323f
commit 64da481ccd
2 changed files with 42 additions and 21 deletions

View File

@ -377,10 +377,23 @@ class _SwiftReader(object):
self._actual_read = 0
self._content_length = None
self._actual_md5 = None
self._expected_etag = headers.get('etag')
self._expected_md5 = headers.get('etag', '')
if ('x-object-manifest' not in headers
and 'x-static-large-object' not in headers and checksum):
if len(self._expected_md5) > 1 and self._expected_md5[0] == '"' \
and self._expected_md5[-1] == '"':
self._expected_md5 = self._expected_md5[1:-1]
# Some headers indicate the MD5 of the response
# definitely *won't* match the ETag
bad_md5_headers = set([
'x-object-manifest',
'x-static-large-object',
])
if bad_md5_headers.intersection(headers):
# This isn't a useful checksum
self._expected_md5 = ''
if self._expected_md5 and checksum:
self._actual_md5 = md5()
if 'content-length' in headers:
@ -398,12 +411,12 @@ class _SwiftReader(object):
self._check_contents()
def _check_contents(self):
if self._actual_md5 and self._expected_etag:
if self._actual_md5 and self._expected_md5:
etag = self._actual_md5.hexdigest()
if etag != self._expected_etag:
if etag != self._expected_md5:
raise SwiftError('Error downloading {0}: md5sum != etag, '
'{1} != {2}'.format(
self._path, etag, self._expected_etag))
self._path, etag, self._expected_md5))
if (self._content_length is not None
and self._actual_read != self._content_length):

View File

@ -119,25 +119,26 @@ class TestSwiftReader(unittest.TestCase):
self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag)
self.assertFalse(sr._expected_md5)
self.assertIsNotNone(sr._actual_md5)
self.assertIs(type(sr._actual_md5), self.md5_type)
self.assertIsNone(sr._actual_md5)
def test_create_with_large_object_headers(self):
# md5 should not be initialized if large object headers are present
sr = self.sr('path', 'body', {'x-object-manifest': 'test'})
sr = self.sr('path', 'body', {'x-object-manifest': 'test',
'etag': '"%s"' % ('0' * 32)})
self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag)
self.assertFalse(sr._expected_md5)
self.assertIsNone(sr._actual_md5)
sr = self.sr('path', 'body', {'x-static-large-object': 'test'})
sr = self.sr('path', 'body', {'x-static-large-object': 'test',
'etag': '"%s"' % ('0' * 32)})
self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag)
self.assertFalse(sr._expected_md5)
self.assertIsNone(sr._actual_md5)
def test_create_with_ignore_checksum(self):
@ -146,7 +147,7 @@ class TestSwiftReader(unittest.TestCase):
self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag)
self.assertFalse(sr._expected_md5)
self.assertIsNone(sr._actual_md5)
def test_create_with_content_length(self):
@ -155,10 +156,9 @@ class TestSwiftReader(unittest.TestCase):
self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body')
self.assertEqual(sr._content_length, 5)
self.assertIsNone(sr._expected_etag)
self.assertFalse(sr._expected_md5)
self.assertIsNotNone(sr._actual_md5)
self.assertIs(type(sr._actual_md5), self.md5_type)
self.assertIsNone(sr._actual_md5)
# Check Contentlength raises error if it isn't an integer
self.assertRaises(SwiftError, self.sr, 'path', 'body',
@ -175,11 +175,17 @@ class TestSwiftReader(unittest.TestCase):
# Check error is raised if expected etag doesn't match calculated md5.
# md5 for a SwiftReader that has done nothing is
# d41d8cd98f00b204e9800998ecf8427e i.e md5 of nothing
sr = self.sr('path', BytesIO(b'body'), {'etag': 'doesntmatch'})
sr = self.sr('path', BytesIO(b'body'),
{'etag': md5(b'doesntmatch').hexdigest()})
self.assertRaises(SwiftError, _consume, sr)
sr = self.sr('path', BytesIO(b'body'),
{'etag': '841a2d689ad86bd1611447453c22c6fc'})
{'etag': md5(b'body').hexdigest()})
_consume(sr)
# Should still work if etag was quoted
sr = self.sr('path', BytesIO(b'body'),
{'etag': '"%s"' % md5(b'body').hexdigest()})
_consume(sr)
# Check error is raised if SwiftReader doesn't read the same length
@ -191,11 +197,13 @@ class TestSwiftReader(unittest.TestCase):
_consume(sr)
# Check that the iterator generates expected length and etag values
sr = self.sr('path', ['abc'.encode()] * 3, {})
sr = self.sr('path', ['abc'.encode()] * 3,
{'content-length': 9,
'etag': md5('abc'.encode() * 3).hexdigest()})
_consume(sr)
self.assertEqual(sr._actual_read, 9)
self.assertEqual(sr._actual_md5.hexdigest(),
'97ac82a5b825239e782d0339e2d7b910')
md5('abc'.encode() * 3).hexdigest())
class _TestServiceBase(unittest.TestCase):