Tolerate RFC-compliant ETags

Since time immemorial, Swift has returned unquoted ETags for plain-old
Swift objects -- I hear tell that we once tried to change this, but
quickly backed it out when some clients broke.

However, some proxies (such as nginx) apparently may force the ETag to
adhere to the RFC, which states [1]:

    An entity-tag consists of an opaque *quoted* string

(emphasis mine). See the related bug for an instance of this happening.

Since we can still get the original ETag easily, we should tolerate the
more-compliant format.

[1] https://tools.ietf.org/html/rfc2616.html#section-3.11 or, if you
    prefer the new ones, https://tools.ietf.org/html/rfc7232#section-2.3

Change-Id: I7cfacab3f250a9443af4b67111ef8088d37d9171
Closes-Bug: 1681529
Related-Bug: 1678976
This commit is contained in:
Tim Burke
2017-04-10 18:09:53 -07:00
parent 058fb0323f
commit 64da481ccd
2 changed files with 42 additions and 21 deletions

View File

@@ -377,10 +377,23 @@ class _SwiftReader(object):
self._actual_read = 0 self._actual_read = 0
self._content_length = None self._content_length = None
self._actual_md5 = None self._actual_md5 = None
self._expected_etag = headers.get('etag') self._expected_md5 = headers.get('etag', '')
if ('x-object-manifest' not in headers if len(self._expected_md5) > 1 and self._expected_md5[0] == '"' \
and 'x-static-large-object' not in headers and checksum): and self._expected_md5[-1] == '"':
self._expected_md5 = self._expected_md5[1:-1]
# Some headers indicate the MD5 of the response
# definitely *won't* match the ETag
bad_md5_headers = set([
'x-object-manifest',
'x-static-large-object',
])
if bad_md5_headers.intersection(headers):
# This isn't a useful checksum
self._expected_md5 = ''
if self._expected_md5 and checksum:
self._actual_md5 = md5() self._actual_md5 = md5()
if 'content-length' in headers: if 'content-length' in headers:
@@ -398,12 +411,12 @@ class _SwiftReader(object):
self._check_contents() self._check_contents()
def _check_contents(self): def _check_contents(self):
if self._actual_md5 and self._expected_etag: if self._actual_md5 and self._expected_md5:
etag = self._actual_md5.hexdigest() etag = self._actual_md5.hexdigest()
if etag != self._expected_etag: if etag != self._expected_md5:
raise SwiftError('Error downloading {0}: md5sum != etag, ' raise SwiftError('Error downloading {0}: md5sum != etag, '
'{1} != {2}'.format( '{1} != {2}'.format(
self._path, etag, self._expected_etag)) self._path, etag, self._expected_md5))
if (self._content_length is not None if (self._content_length is not None
and self._actual_read != self._content_length): and self._actual_read != self._content_length):

View File

@@ -119,25 +119,26 @@ class TestSwiftReader(unittest.TestCase):
self.assertEqual(sr._path, 'path') self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body') self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length) self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag) self.assertFalse(sr._expected_md5)
self.assertIsNotNone(sr._actual_md5) self.assertIsNone(sr._actual_md5)
self.assertIs(type(sr._actual_md5), self.md5_type)
def test_create_with_large_object_headers(self): def test_create_with_large_object_headers(self):
# md5 should not be initialized if large object headers are present # md5 should not be initialized if large object headers are present
sr = self.sr('path', 'body', {'x-object-manifest': 'test'}) sr = self.sr('path', 'body', {'x-object-manifest': 'test',
'etag': '"%s"' % ('0' * 32)})
self.assertEqual(sr._path, 'path') self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body') self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length) self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag) self.assertFalse(sr._expected_md5)
self.assertIsNone(sr._actual_md5) self.assertIsNone(sr._actual_md5)
sr = self.sr('path', 'body', {'x-static-large-object': 'test'}) sr = self.sr('path', 'body', {'x-static-large-object': 'test',
'etag': '"%s"' % ('0' * 32)})
self.assertEqual(sr._path, 'path') self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body') self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length) self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag) self.assertFalse(sr._expected_md5)
self.assertIsNone(sr._actual_md5) self.assertIsNone(sr._actual_md5)
def test_create_with_ignore_checksum(self): def test_create_with_ignore_checksum(self):
@@ -146,7 +147,7 @@ class TestSwiftReader(unittest.TestCase):
self.assertEqual(sr._path, 'path') self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body') self.assertEqual(sr._body, 'body')
self.assertIsNone(sr._content_length) self.assertIsNone(sr._content_length)
self.assertIsNone(sr._expected_etag) self.assertFalse(sr._expected_md5)
self.assertIsNone(sr._actual_md5) self.assertIsNone(sr._actual_md5)
def test_create_with_content_length(self): def test_create_with_content_length(self):
@@ -155,10 +156,9 @@ class TestSwiftReader(unittest.TestCase):
self.assertEqual(sr._path, 'path') self.assertEqual(sr._path, 'path')
self.assertEqual(sr._body, 'body') self.assertEqual(sr._body, 'body')
self.assertEqual(sr._content_length, 5) self.assertEqual(sr._content_length, 5)
self.assertIsNone(sr._expected_etag) self.assertFalse(sr._expected_md5)
self.assertIsNotNone(sr._actual_md5) self.assertIsNone(sr._actual_md5)
self.assertIs(type(sr._actual_md5), self.md5_type)
# Check Contentlength raises error if it isn't an integer # Check Contentlength raises error if it isn't an integer
self.assertRaises(SwiftError, self.sr, 'path', 'body', self.assertRaises(SwiftError, self.sr, 'path', 'body',
@@ -175,11 +175,17 @@ class TestSwiftReader(unittest.TestCase):
# Check error is raised if expected etag doesn't match calculated md5. # Check error is raised if expected etag doesn't match calculated md5.
# md5 for a SwiftReader that has done nothing is # md5 for a SwiftReader that has done nothing is
# d41d8cd98f00b204e9800998ecf8427e i.e md5 of nothing # d41d8cd98f00b204e9800998ecf8427e i.e md5 of nothing
sr = self.sr('path', BytesIO(b'body'), {'etag': 'doesntmatch'}) sr = self.sr('path', BytesIO(b'body'),
{'etag': md5(b'doesntmatch').hexdigest()})
self.assertRaises(SwiftError, _consume, sr) self.assertRaises(SwiftError, _consume, sr)
sr = self.sr('path', BytesIO(b'body'), sr = self.sr('path', BytesIO(b'body'),
{'etag': '841a2d689ad86bd1611447453c22c6fc'}) {'etag': md5(b'body').hexdigest()})
_consume(sr)
# Should still work if etag was quoted
sr = self.sr('path', BytesIO(b'body'),
{'etag': '"%s"' % md5(b'body').hexdigest()})
_consume(sr) _consume(sr)
# Check error is raised if SwiftReader doesn't read the same length # Check error is raised if SwiftReader doesn't read the same length
@@ -191,11 +197,13 @@ class TestSwiftReader(unittest.TestCase):
_consume(sr) _consume(sr)
# Check that the iterator generates expected length and etag values # Check that the iterator generates expected length and etag values
sr = self.sr('path', ['abc'.encode()] * 3, {}) sr = self.sr('path', ['abc'.encode()] * 3,
{'content-length': 9,
'etag': md5('abc'.encode() * 3).hexdigest()})
_consume(sr) _consume(sr)
self.assertEqual(sr._actual_read, 9) self.assertEqual(sr._actual_read, 9)
self.assertEqual(sr._actual_md5.hexdigest(), self.assertEqual(sr._actual_md5.hexdigest(),
'97ac82a5b825239e782d0339e2d7b910') md5('abc'.encode() * 3).hexdigest())
class _TestServiceBase(unittest.TestCase): class _TestServiceBase(unittest.TestCase):