Add improvements to MD5 validation.

With MD5Sum checking being added, a concern was brought up that It was
a change with no possibility of reverting to the old behaviour.
This change adds the flag '--ignore-checksum' to the upload subcommand
allowing the checks to be turned off.

Changed occurrences of the magic string for a null md5 to use a descriptive
constant instead.

Updated Error messages generated when validation fails. They should now be more descriptive
and not output a literal newline sequence.

Change-Id: Id1756cbb6700bb7e38f0ee0e75bc535e37f777ed
This commit is contained in:
Daniel Wakefield 2015-03-04 14:01:55 +00:00
parent 925c01ebfb
commit 13780f37c3
7 changed files with 107 additions and 55 deletions

@ -40,7 +40,7 @@ from swiftclient.command_helpers import (
stat_account, stat_container, stat_object
)
from swiftclient.utils import (
config_true_value, ReadableToIterable, LengthWrapper
config_true_value, ReadableToIterable, LengthWrapper, EMPTY_ETAG
)
from swiftclient.exceptions import ClientException
from swiftclient.multithreading import MultiThreadingManager
@ -174,7 +174,8 @@ _default_local_options = {
'delimiter': None,
'fail_fast': False,
'human': False,
'dir_marker': False
'dir_marker': False,
'checksum': True
}
POLICY = 'X-Storage-Policy'
@ -1410,16 +1411,16 @@ class SwiftService(object):
res['headers'] = put_headers
if options['changed']:
try:
_empty_string_etag = 'd41d8cd98f00b204e9800998ecf8427e'
headers = conn.head_object(container, obj)
ct = headers.get('content-type')
cl = int(headers.get('content-length'))
et = headers.get('etag')
mt = headers.get('x-object-meta-mtime')
if ct.split(';', 1)[0] == 'text/directory' and \
cl == 0 and \
et == _empty_string_etag and \
mt == put_headers['x-object-meta-mtime']:
if (ct.split(';', 1)[0] == 'text/directory' and
cl == 0 and
et == EMPTY_ETAG and
mt == put_headers['x-object-meta-mtime']):
res['success'] = True
return res
except ClientException as err:
@ -1467,17 +1468,19 @@ class SwiftService(object):
fp = open(path, 'rb')
fp.seek(segment_start)
contents = LengthWrapper(fp, segment_size, md5=True)
contents = LengthWrapper(fp, segment_size, md5=options['checksum'])
etag = conn.put_object(segment_container,
segment_name, contents,
content_length=segment_size,
response_dict=results_dict)
if etag and etag != contents.get_md5sum():
raise SwiftError('Segment upload failed: remote and local '
'object md5 did not match, {0} != {1}\n'
'remote segment has not been removed.'
.format(etag, contents.get_md5sum()))
if options['checksum'] and etag and etag != contents.get_md5sum():
raise SwiftError('Segment {0}: upload verification failed: '
'md5 mismatch, local {1} != remote {2} '
'(remote segment has not been removed)'
.format(segment_index,
contents.get_md5sum(),
etag))
res.update({
'success': True,
@ -1707,11 +1710,13 @@ class SwiftService(object):
obr = {}
if path is not None:
content_length = getsize(path)
contents = LengthWrapper(open(path, 'rb'), content_length,
md5=True)
contents = LengthWrapper(open(path, 'rb'),
content_length,
md5=options['checksum'])
else:
content_length = None
contents = ReadableToIterable(stream, md5=True)
contents = ReadableToIterable(stream,
md5=options['checksum'])
etag = conn.put_object(
container, obj, contents,
@ -1720,11 +1725,12 @@ class SwiftService(object):
)
res['response_dict'] = obr
if etag and etag != contents.get_md5sum():
raise SwiftError('Object upload failed: remote and local '
'object md5 did not match, {0} != {1}\n'
'remote object has not been removed.'
.format(etag, contents.get_md5sum()))
if (options['checksum'] and
etag and etag != contents.get_md5sum()):
raise SwiftError('Object upload verification failed: '
'md5 mismatch, local {0} != remote {1} '
'(remote object has not been removed)'
.format(contents.get_md5sum(), etag))
if old_manifest or old_slo_manifest_paths:
drs = []

@ -637,7 +637,7 @@ def st_post(parser, args, output_manager):
st_upload_options = '''[--changed] [--skip-identical] [--segment-size <size>]
[--segment-container <container>] [--leave-segments]
[--object-threads <thread>] [--segment-threads <threads>]
[--header <header>] [--use-slo]
[--header <header>] [--use-slo] [--ignore-checksum]
[--object-name <object-name>]
<container> <file_or_directory>
'''
@ -681,6 +681,7 @@ Optional arguments:
Upload file and name object to <object-name> or upload
dir and use <object-name> as object prefix instead of
folder name.
--ignore-checksum Turn off checksum validation for uploads.
'''.strip('\n')
@ -733,6 +734,9 @@ def st_upload(parser, args, output_manager):
'', '--object-name', dest='object_name',
help='Upload file and name object to <object-name> or upload dir and '
'use <object-name> as object prefix instead of folder name.')
parser.add_option(
'', '--ignore-checksum', dest='checksum', default=True,
action='store_false', help='Turn off checksum validation for uploads.')
(options, args) = parse_args(parser, args)
args = args[1:]
if len(args) < 2:
@ -848,7 +852,7 @@ def st_upload(parser, args, output_manager):
output_manager.error("%s" % error)
except SwiftError as e:
output_manager.error("%s" % e)
output_manager.error(e.value)
st_capabilities_options = "[<proxy_url>]"

@ -21,6 +21,7 @@ import time
import six
TRUE_VALUES = set(('true', '1', 'yes', 'on', 't', 'y'))
EMPTY_ETAG = 'd41d8cd98f00b204e9800998ecf8427e'
def config_true_value(value):

@ -563,8 +563,8 @@ class TestServiceUpload(testtools.TestCase):
if hasattr(self, 'assertDictEqual'):
self.assertDictEqual(a, b, m)
else:
self.assertTrue(isinstance(a, dict), m)
self.assertTrue(isinstance(b, dict), m)
self.assertIsInstance(a, dict, m)
self.assertIsInstance(b, dict, m)
self.assertEqual(len(a), len(b), m)
for k, v in a.items():
self.assertIn(k, b, m)
@ -605,7 +605,8 @@ class TestServiceUpload(testtools.TestCase):
segment_size=10,
segment_index=2,
obj_name='test_o',
options={'segment_container': None})
options={'segment_container': None,
'checksum': True})
self._assertDictEqual(r, expected_r)
@ -623,6 +624,45 @@ class TestServiceUpload(testtools.TestCase):
self.assertEqual(contents.read(), b'b' * 10)
self.assertEqual(contents.get_md5sum(), md5(b'b' * 10).hexdigest())
def test_etag_mismatch_with_ignore_checksum(self):
def _consuming_conn(*a, **kw):
contents = a[2]
contents.read() # Force md5 calculation
return 'badresponseetag'
with tempfile.NamedTemporaryFile() as f:
f.write(b'a' * 10)
f.write(b'b' * 10)
f.write(b'c' * 10)
f.flush()
mock_conn = mock.Mock()
mock_conn.put_object.side_effect = _consuming_conn
type(mock_conn).attempts = mock.PropertyMock(return_value=2)
s = SwiftService()
r = s._upload_segment_job(conn=mock_conn,
path=f.name,
container='test_c',
segment_name='test_s_1',
segment_start=10,
segment_size=10,
segment_index=2,
obj_name='test_o',
options={'segment_container': None,
'checksum': False})
self.assertNotIn('error', r)
self.assertEqual(mock_conn.put_object.call_count, 1)
mock_conn.put_object.assert_called_with('test_c_segments',
'test_s_1',
mock.ANY,
content_length=10,
response_dict={})
contents = mock_conn.put_object.call_args[0][2]
# Check that md5sum is not calculated.
self.assertEqual(contents.get_md5sum(), '')
def test_upload_segment_job_etag_mismatch(self):
def _consuming_conn(*a, **kw):
contents = a[2]
@ -648,10 +688,11 @@ class TestServiceUpload(testtools.TestCase):
segment_size=10,
segment_index=2,
obj_name='test_o',
options={'segment_container': None})
options={'segment_container': None,
'checksum': True})
self.assertIn('error', r)
self.assertTrue(r['error'].value.find('md5 did not match') >= 0)
self.assertIn('md5 mismatch', str(r['error']))
self.assertEqual(mock_conn.put_object.call_count, 1)
mock_conn.put_object.assert_called_with('test_c_segments',
@ -664,7 +705,7 @@ class TestServiceUpload(testtools.TestCase):
def test_upload_object_job_file(self):
# Uploading a file results in the file object being wrapped in a
# LengthWrapper. This test sets the options is such a way that much
# LengthWrapper. This test sets the options in such a way that much
# of _upload_object_job is skipped bringing the critical path down
# to around 60 lines to ease testing.
with tempfile.NamedTemporaryFile() as f:
@ -696,12 +737,11 @@ class TestServiceUpload(testtools.TestCase):
'skip_identical': False,
'leave_segments': True,
'header': '',
'segment_size': 0})
'segment_size': 0,
'checksum': True})
# Check for mtime and path separately as they are calculated
# from the temp file and will be different each time.
mtime = float(r['headers']['x-object-meta-mtime'])
self.assertAlmostEqual(mtime, expected_mtime, delta=1)
self.assertAlmostEqual(mtime, expected_mtime, delta=0.5)
del r['headers']['x-object-meta-mtime']
self.assertEqual(r['path'], f.name)
@ -718,7 +758,8 @@ class TestServiceUpload(testtools.TestCase):
self.assertIsInstance(contents, utils.LengthWrapper)
self.assertEqual(len(contents), 30)
# This read forces the LengthWrapper to calculate the md5
# for the read content.
# for the read content. This also checks that LengthWrapper was
# initialized with md5=True
self.assertEqual(contents.read(), b'a' * 30)
self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest())
@ -740,7 +781,7 @@ class TestServiceUpload(testtools.TestCase):
'success': True,
'path': None,
}
expected_mtime = round(time.time())
expected_mtime = float(time.time())
mock_conn = mock.Mock()
mock_conn.put_object.return_value = ''
@ -755,10 +796,11 @@ class TestServiceUpload(testtools.TestCase):
'skip_identical': False,
'leave_segments': True,
'header': '',
'segment_size': 0})
'segment_size': 0,
'checksum': True})
mtime = float(r['headers']['x-object-meta-mtime'])
self.assertAlmostEqual(mtime, expected_mtime, delta=10)
self.assertAlmostEqual(mtime, expected_mtime, delta=0.5)
del r['headers']['x-object-meta-mtime']
self._assertDictEqual(r, expected_r)
@ -771,7 +813,7 @@ class TestServiceUpload(testtools.TestCase):
contents = mock_conn.put_object.call_args[0][2]
self.assertIsInstance(contents, utils.ReadableToIterable)
self.assertEqual(contents.chunk_size, 65536)
# next retreives the first chunk of the stream or len(chunk_size)
# next retrieves the first chunk of the stream or len(chunk_size)
# or less, it also forces the md5 to be calculated.
self.assertEqual(next(contents), b'a' * 30)
self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest())
@ -801,11 +843,12 @@ class TestServiceUpload(testtools.TestCase):
'skip_identical': False,
'leave_segments': True,
'header': '',
'segment_size': 0})
'segment_size': 0,
'checksum': True})
self.assertEqual(r['success'], False)
self.assertIn('error', r)
self.assertTrue(r['error'].value.find('md5 did not match') >= 0)
self.assertIn('md5 mismatch', str(r['error']))
self.assertEqual(mock_conn.put_object.call_count, 1)
expected_headers = {'x-object-meta-mtime': mock.ANY}

@ -32,6 +32,7 @@ import swiftclient.utils
from os.path import basename, dirname
from tests.unit.test_swiftclient import MockHttpTest
from tests.unit.utils import CaptureOutput, fake_get_auth_keystone
from swiftclient.utils import EMPTY_ETAG
if six.PY2:
@ -314,7 +315,7 @@ class TestShell(unittest.TestCase):
'etag': '2cbbfe139a744d6abbe695e17f3c1991'},
objcontent),
({'content-type': 'text/plain',
'etag': 'd41d8cd98f00b204e9800998ecf8427e'},
'etag': EMPTY_ETAG},
'')
]
@ -370,7 +371,7 @@ class TestShell(unittest.TestCase):
@mock.patch('swiftclient.service.Connection')
def test_download_no_content_type(self, connection):
connection.return_value.get_object.return_value = [
{'etag': 'd41d8cd98f00b204e9800998ecf8427e'},
{'etag': EMPTY_ETAG},
'']
# Test downloading whole container
@ -400,8 +401,7 @@ class TestShell(unittest.TestCase):
def test_upload(self, connection, walk):
connection.return_value.head_object.return_value = {
'content-length': '0'}
connection.return_value.put_object.return_value = (
'd41d8cd98f00b204e9800998ecf8427e')
connection.return_value.put_object.return_value = EMPTY_ETAG
connection.return_value.attempts = 0
argv = ["", "upload", "container", self.tmpfile,
"-H", "X-Storage-Policy:one"]
@ -477,8 +477,7 @@ class TestShell(unittest.TestCase):
connection.return_value.get_object.return_value = ({}, json.dumps(
[{'name': 'container1/old_seg1'}, {'name': 'container2/old_seg2'}]
))
connection.return_value.put_object.return_value = (
'd41d8cd98f00b204e9800998ecf8427e')
connection.return_value.put_object.return_value = EMPTY_ETAG
swiftclient.shell.main(argv)
connection.return_value.put_object.assert_called_with(
'container',
@ -508,8 +507,7 @@ class TestShell(unittest.TestCase):
connection.return_value.head_object.return_value = {
'content-length': '0'}
connection.return_value.attempts = 0
connection.return_value.put_object.return_value = (
'd41d8cd98f00b204e9800998ecf8427e')
connection.return_value.put_object.return_value = EMPTY_ETAG
argv = ["", "upload", "container", self.tmpfile, "-S", "10",
"-C", "container"]
with open(self.tmpfile, "wb") as fh:
@ -1660,9 +1658,8 @@ class TestCrossAccountObjectAccess(TestBase, MockHttpTest):
def test_download_with_read_write_access(self):
req_handler = self._fake_cross_account_auth(True, True)
empty_str_etag = 'd41d8cd98f00b204e9800998ecf8427e'
fake_conn = self.fake_http_connection(403, on_request=req_handler,
etags=[empty_str_etag])
etags=[EMPTY_ETAG])
args, env = self._make_cmd('download', cmd_args=[self.cont,
self.obj.lstrip('/'),
@ -1683,9 +1680,8 @@ class TestCrossAccountObjectAccess(TestBase, MockHttpTest):
def test_download_with_read_only_access(self):
req_handler = self._fake_cross_account_auth(True, False)
empty_str_etag = 'd41d8cd98f00b204e9800998ecf8427e'
fake_conn = self.fake_http_connection(403, on_request=req_handler,
etags=[empty_str_etag])
etags=[EMPTY_ETAG])
args, env = self._make_cmd('download', cmd_args=[self.cont,
self.obj.lstrip('/'),

@ -32,6 +32,7 @@ from six.moves import reload_module
from .utils import MockHttpTest, fake_get_auth_keystone, StubResponse
from swiftclient.utils import EMPTY_ETAG
from swiftclient import client as c
import swiftclient.utils
import swiftclient
@ -102,8 +103,7 @@ class MockHttpResponse(object):
self.requests_params = None
self.verify = verify
self.md5sum = md5()
# zero byte hash
self.headers = {'etag': '"d41d8cd98f00b204e9800998ecf8427e"'}
self.headers = {'etag': '"%s"' % EMPTY_ETAG}
if headers:
self.headers.update(headers)
@ -806,6 +806,7 @@ class TestPutObject(MockHttpTest):
chunk_size=chunk_size)
self.assertNotEquals(etag, contents.get_md5sum())
self.assertEquals(etag, 'badresponseetag')
self.assertEquals(raw_data_md5, contents.get_md5sum())
def test_md5_match(self):

@ -25,6 +25,7 @@ from six.moves import reload_module
from six.moves.urllib.parse import urlparse, ParseResult
from swiftclient import client as c
from swiftclient import shell as s
from swiftclient.utils import EMPTY_ETAG
def fake_get_auth_keystone(expected_os_options=None, exc=None,
@ -127,7 +128,7 @@ def fake_http_connect(*code_iter, **kwargs):
'last-modified': self.timestamp,
'x-object-meta-test': 'testing',
'etag':
self.etag or '"d41d8cd98f00b204e9800998ecf8427e"',
self.etag or '"%s"' % EMPTY_ETAG,
'x-works': 'yes',
'x-account-container-count': 12345}
if not self.timestamp: