From 13780f37c3f5e5f18f10f131d1ef39c010457e87 Mon Sep 17 00:00:00 2001 From: Daniel Wakefield <daniel.wakefield@hp.com> Date: Wed, 4 Mar 2015 14:01:55 +0000 Subject: [PATCH] Add improvements to MD5 validation. With MD5Sum checking being added, a concern was brought up that It was a change with no possibility of reverting to the old behaviour. This change adds the flag '--ignore-checksum' to the upload subcommand allowing the checks to be turned off. Changed occurrences of the magic string for a null md5 to use a descriptive constant instead. Updated Error messages generated when validation fails. They should now be more descriptive and not output a literal newline sequence. Change-Id: Id1756cbb6700bb7e38f0ee0e75bc535e37f777ed --- swiftclient/service.py | 48 +++++++++++---------- swiftclient/shell.py | 8 +++- swiftclient/utils.py | 1 + tests/unit/test_service.py | 77 ++++++++++++++++++++++++++-------- tests/unit/test_shell.py | 20 ++++----- tests/unit/test_swiftclient.py | 5 ++- tests/unit/utils.py | 3 +- 7 files changed, 107 insertions(+), 55 deletions(-) diff --git a/swiftclient/service.py b/swiftclient/service.py index f24d4300..232e9c3d 100644 --- a/swiftclient/service.py +++ b/swiftclient/service.py @@ -40,7 +40,7 @@ from swiftclient.command_helpers import ( stat_account, stat_container, stat_object ) from swiftclient.utils import ( - config_true_value, ReadableToIterable, LengthWrapper + config_true_value, ReadableToIterable, LengthWrapper, EMPTY_ETAG ) from swiftclient.exceptions import ClientException from swiftclient.multithreading import MultiThreadingManager @@ -174,7 +174,8 @@ _default_local_options = { 'delimiter': None, 'fail_fast': False, 'human': False, - 'dir_marker': False + 'dir_marker': False, + 'checksum': True } POLICY = 'X-Storage-Policy' @@ -1410,16 +1411,16 @@ class SwiftService(object): res['headers'] = put_headers if options['changed']: try: - _empty_string_etag = 'd41d8cd98f00b204e9800998ecf8427e' headers = conn.head_object(container, obj) ct = headers.get('content-type') cl = int(headers.get('content-length')) et = headers.get('etag') mt = headers.get('x-object-meta-mtime') - if ct.split(';', 1)[0] == 'text/directory' and \ - cl == 0 and \ - et == _empty_string_etag and \ - mt == put_headers['x-object-meta-mtime']: + + if (ct.split(';', 1)[0] == 'text/directory' and + cl == 0 and + et == EMPTY_ETAG and + mt == put_headers['x-object-meta-mtime']): res['success'] = True return res except ClientException as err: @@ -1467,17 +1468,19 @@ class SwiftService(object): fp = open(path, 'rb') fp.seek(segment_start) - contents = LengthWrapper(fp, segment_size, md5=True) + contents = LengthWrapper(fp, segment_size, md5=options['checksum']) etag = conn.put_object(segment_container, segment_name, contents, content_length=segment_size, response_dict=results_dict) - if etag and etag != contents.get_md5sum(): - raise SwiftError('Segment upload failed: remote and local ' - 'object md5 did not match, {0} != {1}\n' - 'remote segment has not been removed.' - .format(etag, contents.get_md5sum())) + if options['checksum'] and etag and etag != contents.get_md5sum(): + raise SwiftError('Segment {0}: upload verification failed: ' + 'md5 mismatch, local {1} != remote {2} ' + '(remote segment has not been removed)' + .format(segment_index, + contents.get_md5sum(), + etag)) res.update({ 'success': True, @@ -1707,11 +1710,13 @@ class SwiftService(object): obr = {} if path is not None: content_length = getsize(path) - contents = LengthWrapper(open(path, 'rb'), content_length, - md5=True) + contents = LengthWrapper(open(path, 'rb'), + content_length, + md5=options['checksum']) else: content_length = None - contents = ReadableToIterable(stream, md5=True) + contents = ReadableToIterable(stream, + md5=options['checksum']) etag = conn.put_object( container, obj, contents, @@ -1720,11 +1725,12 @@ class SwiftService(object): ) res['response_dict'] = obr - if etag and etag != contents.get_md5sum(): - raise SwiftError('Object upload failed: remote and local ' - 'object md5 did not match, {0} != {1}\n' - 'remote object has not been removed.' - .format(etag, contents.get_md5sum())) + if (options['checksum'] and + etag and etag != contents.get_md5sum()): + raise SwiftError('Object upload verification failed: ' + 'md5 mismatch, local {0} != remote {1} ' + '(remote object has not been removed)' + .format(contents.get_md5sum(), etag)) if old_manifest or old_slo_manifest_paths: drs = [] diff --git a/swiftclient/shell.py b/swiftclient/shell.py index 439a92f3..3d3ef51a 100755 --- a/swiftclient/shell.py +++ b/swiftclient/shell.py @@ -637,7 +637,7 @@ def st_post(parser, args, output_manager): st_upload_options = '''[--changed] [--skip-identical] [--segment-size <size>] [--segment-container <container>] [--leave-segments] [--object-threads <thread>] [--segment-threads <threads>] - [--header <header>] [--use-slo] + [--header <header>] [--use-slo] [--ignore-checksum] [--object-name <object-name>] <container> <file_or_directory> ''' @@ -681,6 +681,7 @@ Optional arguments: Upload file and name object to <object-name> or upload dir and use <object-name> as object prefix instead of folder name. + --ignore-checksum Turn off checksum validation for uploads. '''.strip('\n') @@ -733,6 +734,9 @@ def st_upload(parser, args, output_manager): '', '--object-name', dest='object_name', help='Upload file and name object to <object-name> or upload dir and ' 'use <object-name> as object prefix instead of folder name.') + parser.add_option( + '', '--ignore-checksum', dest='checksum', default=True, + action='store_false', help='Turn off checksum validation for uploads.') (options, args) = parse_args(parser, args) args = args[1:] if len(args) < 2: @@ -848,7 +852,7 @@ def st_upload(parser, args, output_manager): output_manager.error("%s" % error) except SwiftError as e: - output_manager.error("%s" % e) + output_manager.error(e.value) st_capabilities_options = "[<proxy_url>]" diff --git a/swiftclient/utils.py b/swiftclient/utils.py index f0fcc019..2f4ec3f8 100644 --- a/swiftclient/utils.py +++ b/swiftclient/utils.py @@ -21,6 +21,7 @@ import time import six TRUE_VALUES = set(('true', '1', 'yes', 'on', 't', 'y')) +EMPTY_ETAG = 'd41d8cd98f00b204e9800998ecf8427e' def config_true_value(value): diff --git a/tests/unit/test_service.py b/tests/unit/test_service.py index 3309813f..000b0ba2 100644 --- a/tests/unit/test_service.py +++ b/tests/unit/test_service.py @@ -563,8 +563,8 @@ class TestServiceUpload(testtools.TestCase): if hasattr(self, 'assertDictEqual'): self.assertDictEqual(a, b, m) else: - self.assertTrue(isinstance(a, dict), m) - self.assertTrue(isinstance(b, dict), m) + self.assertIsInstance(a, dict, m) + self.assertIsInstance(b, dict, m) self.assertEqual(len(a), len(b), m) for k, v in a.items(): self.assertIn(k, b, m) @@ -605,7 +605,8 @@ class TestServiceUpload(testtools.TestCase): segment_size=10, segment_index=2, obj_name='test_o', - options={'segment_container': None}) + options={'segment_container': None, + 'checksum': True}) self._assertDictEqual(r, expected_r) @@ -623,6 +624,45 @@ class TestServiceUpload(testtools.TestCase): self.assertEqual(contents.read(), b'b' * 10) self.assertEqual(contents.get_md5sum(), md5(b'b' * 10).hexdigest()) + def test_etag_mismatch_with_ignore_checksum(self): + def _consuming_conn(*a, **kw): + contents = a[2] + contents.read() # Force md5 calculation + return 'badresponseetag' + + with tempfile.NamedTemporaryFile() as f: + f.write(b'a' * 10) + f.write(b'b' * 10) + f.write(b'c' * 10) + f.flush() + + mock_conn = mock.Mock() + mock_conn.put_object.side_effect = _consuming_conn + type(mock_conn).attempts = mock.PropertyMock(return_value=2) + + s = SwiftService() + r = s._upload_segment_job(conn=mock_conn, + path=f.name, + container='test_c', + segment_name='test_s_1', + segment_start=10, + segment_size=10, + segment_index=2, + obj_name='test_o', + options={'segment_container': None, + 'checksum': False}) + + self.assertNotIn('error', r) + self.assertEqual(mock_conn.put_object.call_count, 1) + mock_conn.put_object.assert_called_with('test_c_segments', + 'test_s_1', + mock.ANY, + content_length=10, + response_dict={}) + contents = mock_conn.put_object.call_args[0][2] + # Check that md5sum is not calculated. + self.assertEqual(contents.get_md5sum(), '') + def test_upload_segment_job_etag_mismatch(self): def _consuming_conn(*a, **kw): contents = a[2] @@ -648,10 +688,11 @@ class TestServiceUpload(testtools.TestCase): segment_size=10, segment_index=2, obj_name='test_o', - options={'segment_container': None}) + options={'segment_container': None, + 'checksum': True}) self.assertIn('error', r) - self.assertTrue(r['error'].value.find('md5 did not match') >= 0) + self.assertIn('md5 mismatch', str(r['error'])) self.assertEqual(mock_conn.put_object.call_count, 1) mock_conn.put_object.assert_called_with('test_c_segments', @@ -664,7 +705,7 @@ class TestServiceUpload(testtools.TestCase): def test_upload_object_job_file(self): # Uploading a file results in the file object being wrapped in a - # LengthWrapper. This test sets the options is such a way that much + # LengthWrapper. This test sets the options in such a way that much # of _upload_object_job is skipped bringing the critical path down # to around 60 lines to ease testing. with tempfile.NamedTemporaryFile() as f: @@ -696,12 +737,11 @@ class TestServiceUpload(testtools.TestCase): 'skip_identical': False, 'leave_segments': True, 'header': '', - 'segment_size': 0}) + 'segment_size': 0, + 'checksum': True}) - # Check for mtime and path separately as they are calculated - # from the temp file and will be different each time. mtime = float(r['headers']['x-object-meta-mtime']) - self.assertAlmostEqual(mtime, expected_mtime, delta=1) + self.assertAlmostEqual(mtime, expected_mtime, delta=0.5) del r['headers']['x-object-meta-mtime'] self.assertEqual(r['path'], f.name) @@ -718,7 +758,8 @@ class TestServiceUpload(testtools.TestCase): self.assertIsInstance(contents, utils.LengthWrapper) self.assertEqual(len(contents), 30) # This read forces the LengthWrapper to calculate the md5 - # for the read content. + # for the read content. This also checks that LengthWrapper was + # initialized with md5=True self.assertEqual(contents.read(), b'a' * 30) self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest()) @@ -740,7 +781,7 @@ class TestServiceUpload(testtools.TestCase): 'success': True, 'path': None, } - expected_mtime = round(time.time()) + expected_mtime = float(time.time()) mock_conn = mock.Mock() mock_conn.put_object.return_value = '' @@ -755,10 +796,11 @@ class TestServiceUpload(testtools.TestCase): 'skip_identical': False, 'leave_segments': True, 'header': '', - 'segment_size': 0}) + 'segment_size': 0, + 'checksum': True}) mtime = float(r['headers']['x-object-meta-mtime']) - self.assertAlmostEqual(mtime, expected_mtime, delta=10) + self.assertAlmostEqual(mtime, expected_mtime, delta=0.5) del r['headers']['x-object-meta-mtime'] self._assertDictEqual(r, expected_r) @@ -771,7 +813,7 @@ class TestServiceUpload(testtools.TestCase): contents = mock_conn.put_object.call_args[0][2] self.assertIsInstance(contents, utils.ReadableToIterable) self.assertEqual(contents.chunk_size, 65536) - # next retreives the first chunk of the stream or len(chunk_size) + # next retrieves the first chunk of the stream or len(chunk_size) # or less, it also forces the md5 to be calculated. self.assertEqual(next(contents), b'a' * 30) self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest()) @@ -801,11 +843,12 @@ class TestServiceUpload(testtools.TestCase): 'skip_identical': False, 'leave_segments': True, 'header': '', - 'segment_size': 0}) + 'segment_size': 0, + 'checksum': True}) self.assertEqual(r['success'], False) self.assertIn('error', r) - self.assertTrue(r['error'].value.find('md5 did not match') >= 0) + self.assertIn('md5 mismatch', str(r['error'])) self.assertEqual(mock_conn.put_object.call_count, 1) expected_headers = {'x-object-meta-mtime': mock.ANY} diff --git a/tests/unit/test_shell.py b/tests/unit/test_shell.py index 28aea7d6..dada0fa3 100644 --- a/tests/unit/test_shell.py +++ b/tests/unit/test_shell.py @@ -32,6 +32,7 @@ import swiftclient.utils from os.path import basename, dirname from tests.unit.test_swiftclient import MockHttpTest from tests.unit.utils import CaptureOutput, fake_get_auth_keystone +from swiftclient.utils import EMPTY_ETAG if six.PY2: @@ -314,7 +315,7 @@ class TestShell(unittest.TestCase): 'etag': '2cbbfe139a744d6abbe695e17f3c1991'}, objcontent), ({'content-type': 'text/plain', - 'etag': 'd41d8cd98f00b204e9800998ecf8427e'}, + 'etag': EMPTY_ETAG}, '') ] @@ -370,7 +371,7 @@ class TestShell(unittest.TestCase): @mock.patch('swiftclient.service.Connection') def test_download_no_content_type(self, connection): connection.return_value.get_object.return_value = [ - {'etag': 'd41d8cd98f00b204e9800998ecf8427e'}, + {'etag': EMPTY_ETAG}, ''] # Test downloading whole container @@ -400,8 +401,7 @@ class TestShell(unittest.TestCase): def test_upload(self, connection, walk): connection.return_value.head_object.return_value = { 'content-length': '0'} - connection.return_value.put_object.return_value = ( - 'd41d8cd98f00b204e9800998ecf8427e') + connection.return_value.put_object.return_value = EMPTY_ETAG connection.return_value.attempts = 0 argv = ["", "upload", "container", self.tmpfile, "-H", "X-Storage-Policy:one"] @@ -477,8 +477,7 @@ class TestShell(unittest.TestCase): connection.return_value.get_object.return_value = ({}, json.dumps( [{'name': 'container1/old_seg1'}, {'name': 'container2/old_seg2'}] )) - connection.return_value.put_object.return_value = ( - 'd41d8cd98f00b204e9800998ecf8427e') + connection.return_value.put_object.return_value = EMPTY_ETAG swiftclient.shell.main(argv) connection.return_value.put_object.assert_called_with( 'container', @@ -508,8 +507,7 @@ class TestShell(unittest.TestCase): connection.return_value.head_object.return_value = { 'content-length': '0'} connection.return_value.attempts = 0 - connection.return_value.put_object.return_value = ( - 'd41d8cd98f00b204e9800998ecf8427e') + connection.return_value.put_object.return_value = EMPTY_ETAG argv = ["", "upload", "container", self.tmpfile, "-S", "10", "-C", "container"] with open(self.tmpfile, "wb") as fh: @@ -1660,9 +1658,8 @@ class TestCrossAccountObjectAccess(TestBase, MockHttpTest): def test_download_with_read_write_access(self): req_handler = self._fake_cross_account_auth(True, True) - empty_str_etag = 'd41d8cd98f00b204e9800998ecf8427e' fake_conn = self.fake_http_connection(403, on_request=req_handler, - etags=[empty_str_etag]) + etags=[EMPTY_ETAG]) args, env = self._make_cmd('download', cmd_args=[self.cont, self.obj.lstrip('/'), @@ -1683,9 +1680,8 @@ class TestCrossAccountObjectAccess(TestBase, MockHttpTest): def test_download_with_read_only_access(self): req_handler = self._fake_cross_account_auth(True, False) - empty_str_etag = 'd41d8cd98f00b204e9800998ecf8427e' fake_conn = self.fake_http_connection(403, on_request=req_handler, - etags=[empty_str_etag]) + etags=[EMPTY_ETAG]) args, env = self._make_cmd('download', cmd_args=[self.cont, self.obj.lstrip('/'), diff --git a/tests/unit/test_swiftclient.py b/tests/unit/test_swiftclient.py index 9ebcff55..d1990506 100644 --- a/tests/unit/test_swiftclient.py +++ b/tests/unit/test_swiftclient.py @@ -32,6 +32,7 @@ from six.moves import reload_module from .utils import MockHttpTest, fake_get_auth_keystone, StubResponse +from swiftclient.utils import EMPTY_ETAG from swiftclient import client as c import swiftclient.utils import swiftclient @@ -102,8 +103,7 @@ class MockHttpResponse(object): self.requests_params = None self.verify = verify self.md5sum = md5() - # zero byte hash - self.headers = {'etag': '"d41d8cd98f00b204e9800998ecf8427e"'} + self.headers = {'etag': '"%s"' % EMPTY_ETAG} if headers: self.headers.update(headers) @@ -806,6 +806,7 @@ class TestPutObject(MockHttpTest): chunk_size=chunk_size) self.assertNotEquals(etag, contents.get_md5sum()) + self.assertEquals(etag, 'badresponseetag') self.assertEquals(raw_data_md5, contents.get_md5sum()) def test_md5_match(self): diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 88d6d129..3e87ea28 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -25,6 +25,7 @@ from six.moves import reload_module from six.moves.urllib.parse import urlparse, ParseResult from swiftclient import client as c from swiftclient import shell as s +from swiftclient.utils import EMPTY_ETAG def fake_get_auth_keystone(expected_os_options=None, exc=None, @@ -127,7 +128,7 @@ def fake_http_connect(*code_iter, **kwargs): 'last-modified': self.timestamp, 'x-object-meta-test': 'testing', 'etag': - self.etag or '"d41d8cd98f00b204e9800998ecf8427e"', + self.etag or '"%s"' % EMPTY_ETAG, 'x-works': 'yes', 'x-account-container-count': 12345} if not self.timestamp: