From 13780f37c3f5e5f18f10f131d1ef39c010457e87 Mon Sep 17 00:00:00 2001
From: Daniel Wakefield <daniel.wakefield@hp.com>
Date: Wed, 4 Mar 2015 14:01:55 +0000
Subject: [PATCH] Add improvements to MD5 validation.

With MD5Sum checking being added, a concern was brought up that It was
a change with no possibility of reverting to the old behaviour.
This change adds the flag '--ignore-checksum' to the upload subcommand
allowing the checks to be turned off.

Changed occurrences of the magic string for a null md5 to use a descriptive
constant instead.

Updated Error messages generated when validation fails. They should now be more descriptive
and not output a literal newline sequence.

Change-Id: Id1756cbb6700bb7e38f0ee0e75bc535e37f777ed
---
 swiftclient/service.py         | 48 +++++++++++----------
 swiftclient/shell.py           |  8 +++-
 swiftclient/utils.py           |  1 +
 tests/unit/test_service.py     | 77 ++++++++++++++++++++++++++--------
 tests/unit/test_shell.py       | 20 ++++-----
 tests/unit/test_swiftclient.py |  5 ++-
 tests/unit/utils.py            |  3 +-
 7 files changed, 107 insertions(+), 55 deletions(-)

diff --git a/swiftclient/service.py b/swiftclient/service.py
index f24d4300..232e9c3d 100644
--- a/swiftclient/service.py
+++ b/swiftclient/service.py
@@ -40,7 +40,7 @@ from swiftclient.command_helpers import (
     stat_account, stat_container, stat_object
 )
 from swiftclient.utils import (
-    config_true_value, ReadableToIterable, LengthWrapper
+    config_true_value, ReadableToIterable, LengthWrapper, EMPTY_ETAG
 )
 from swiftclient.exceptions import ClientException
 from swiftclient.multithreading import MultiThreadingManager
@@ -174,7 +174,8 @@ _default_local_options = {
     'delimiter': None,
     'fail_fast': False,
     'human': False,
-    'dir_marker': False
+    'dir_marker': False,
+    'checksum': True
 }
 
 POLICY = 'X-Storage-Policy'
@@ -1410,16 +1411,16 @@ class SwiftService(object):
         res['headers'] = put_headers
         if options['changed']:
             try:
-                _empty_string_etag = 'd41d8cd98f00b204e9800998ecf8427e'
                 headers = conn.head_object(container, obj)
                 ct = headers.get('content-type')
                 cl = int(headers.get('content-length'))
                 et = headers.get('etag')
                 mt = headers.get('x-object-meta-mtime')
-                if ct.split(';', 1)[0] == 'text/directory' and \
-                        cl == 0 and \
-                        et == _empty_string_etag and \
-                        mt == put_headers['x-object-meta-mtime']:
+
+                if (ct.split(';', 1)[0] == 'text/directory' and
+                        cl == 0 and
+                        et == EMPTY_ETAG and
+                        mt == put_headers['x-object-meta-mtime']):
                     res['success'] = True
                     return res
             except ClientException as err:
@@ -1467,17 +1468,19 @@ class SwiftService(object):
             fp = open(path, 'rb')
             fp.seek(segment_start)
 
-            contents = LengthWrapper(fp, segment_size, md5=True)
+            contents = LengthWrapper(fp, segment_size, md5=options['checksum'])
             etag = conn.put_object(segment_container,
                                    segment_name, contents,
                                    content_length=segment_size,
                                    response_dict=results_dict)
 
-            if etag and etag != contents.get_md5sum():
-                raise SwiftError('Segment upload failed: remote and local '
-                                 'object md5 did not match, {0} != {1}\n'
-                                 'remote segment has not been removed.'
-                                 .format(etag, contents.get_md5sum()))
+            if options['checksum'] and etag and etag != contents.get_md5sum():
+                raise SwiftError('Segment {0}: upload verification failed: '
+                                 'md5 mismatch, local {1} != remote {2} '
+                                 '(remote segment has not been removed)'
+                                 .format(segment_index,
+                                         contents.get_md5sum(),
+                                         etag))
 
             res.update({
                 'success': True,
@@ -1707,11 +1710,13 @@ class SwiftService(object):
                 obr = {}
                 if path is not None:
                     content_length = getsize(path)
-                    contents = LengthWrapper(open(path, 'rb'), content_length,
-                                             md5=True)
+                    contents = LengthWrapper(open(path, 'rb'),
+                                             content_length,
+                                             md5=options['checksum'])
                 else:
                     content_length = None
-                    contents = ReadableToIterable(stream, md5=True)
+                    contents = ReadableToIterable(stream,
+                                                  md5=options['checksum'])
 
                 etag = conn.put_object(
                     container, obj, contents,
@@ -1720,11 +1725,12 @@ class SwiftService(object):
                 )
                 res['response_dict'] = obr
 
-                if etag and etag != contents.get_md5sum():
-                    raise SwiftError('Object upload failed: remote and local '
-                                     'object md5 did not match, {0} != {1}\n'
-                                     'remote object has not been removed.'
-                                     .format(etag, contents.get_md5sum()))
+                if (options['checksum'] and
+                        etag and etag != contents.get_md5sum()):
+                    raise SwiftError('Object upload verification failed: '
+                                     'md5 mismatch, local {0} != remote {1} '
+                                     '(remote object has not been removed)'
+                                     .format(contents.get_md5sum(), etag))
 
             if old_manifest or old_slo_manifest_paths:
                 drs = []
diff --git a/swiftclient/shell.py b/swiftclient/shell.py
index 439a92f3..3d3ef51a 100755
--- a/swiftclient/shell.py
+++ b/swiftclient/shell.py
@@ -637,7 +637,7 @@ def st_post(parser, args, output_manager):
 st_upload_options = '''[--changed] [--skip-identical] [--segment-size <size>]
                     [--segment-container <container>] [--leave-segments]
                     [--object-threads <thread>] [--segment-threads <threads>]
-                    [--header <header>] [--use-slo]
+                    [--header <header>] [--use-slo] [--ignore-checksum]
                     [--object-name <object-name>]
                     <container> <file_or_directory>
 '''
@@ -681,6 +681,7 @@ Optional arguments:
                         Upload file and name object to <object-name> or upload
                         dir and use <object-name> as object prefix instead of
                         folder name.
+  --ignore-checksum     Turn off checksum validation for uploads.
 '''.strip('\n')
 
 
@@ -733,6 +734,9 @@ def st_upload(parser, args, output_manager):
         '', '--object-name', dest='object_name',
         help='Upload file and name object to <object-name> or upload dir and '
         'use <object-name> as object prefix instead of folder name.')
+    parser.add_option(
+        '', '--ignore-checksum', dest='checksum', default=True,
+        action='store_false', help='Turn off checksum validation for uploads.')
     (options, args) = parse_args(parser, args)
     args = args[1:]
     if len(args) < 2:
@@ -848,7 +852,7 @@ def st_upload(parser, args, output_manager):
                         output_manager.error("%s" % error)
 
         except SwiftError as e:
-            output_manager.error("%s" % e)
+            output_manager.error(e.value)
 
 
 st_capabilities_options = "[<proxy_url>]"
diff --git a/swiftclient/utils.py b/swiftclient/utils.py
index f0fcc019..2f4ec3f8 100644
--- a/swiftclient/utils.py
+++ b/swiftclient/utils.py
@@ -21,6 +21,7 @@ import time
 import six
 
 TRUE_VALUES = set(('true', '1', 'yes', 'on', 't', 'y'))
+EMPTY_ETAG = 'd41d8cd98f00b204e9800998ecf8427e'
 
 
 def config_true_value(value):
diff --git a/tests/unit/test_service.py b/tests/unit/test_service.py
index 3309813f..000b0ba2 100644
--- a/tests/unit/test_service.py
+++ b/tests/unit/test_service.py
@@ -563,8 +563,8 @@ class TestServiceUpload(testtools.TestCase):
         if hasattr(self, 'assertDictEqual'):
             self.assertDictEqual(a, b, m)
         else:
-            self.assertTrue(isinstance(a, dict), m)
-            self.assertTrue(isinstance(b, dict), m)
+            self.assertIsInstance(a, dict, m)
+            self.assertIsInstance(b, dict, m)
             self.assertEqual(len(a), len(b), m)
             for k, v in a.items():
                 self.assertIn(k, b, m)
@@ -605,7 +605,8 @@ class TestServiceUpload(testtools.TestCase):
                                       segment_size=10,
                                       segment_index=2,
                                       obj_name='test_o',
-                                      options={'segment_container': None})
+                                      options={'segment_container': None,
+                                               'checksum': True})
 
             self._assertDictEqual(r, expected_r)
 
@@ -623,6 +624,45 @@ class TestServiceUpload(testtools.TestCase):
             self.assertEqual(contents.read(), b'b' * 10)
             self.assertEqual(contents.get_md5sum(), md5(b'b' * 10).hexdigest())
 
+    def test_etag_mismatch_with_ignore_checksum(self):
+        def _consuming_conn(*a, **kw):
+            contents = a[2]
+            contents.read()  # Force md5 calculation
+            return 'badresponseetag'
+
+        with tempfile.NamedTemporaryFile() as f:
+            f.write(b'a' * 10)
+            f.write(b'b' * 10)
+            f.write(b'c' * 10)
+            f.flush()
+
+            mock_conn = mock.Mock()
+            mock_conn.put_object.side_effect = _consuming_conn
+            type(mock_conn).attempts = mock.PropertyMock(return_value=2)
+
+            s = SwiftService()
+            r = s._upload_segment_job(conn=mock_conn,
+                                      path=f.name,
+                                      container='test_c',
+                                      segment_name='test_s_1',
+                                      segment_start=10,
+                                      segment_size=10,
+                                      segment_index=2,
+                                      obj_name='test_o',
+                                      options={'segment_container': None,
+                                               'checksum': False})
+
+            self.assertNotIn('error', r)
+            self.assertEqual(mock_conn.put_object.call_count, 1)
+            mock_conn.put_object.assert_called_with('test_c_segments',
+                                                    'test_s_1',
+                                                    mock.ANY,
+                                                    content_length=10,
+                                                    response_dict={})
+            contents = mock_conn.put_object.call_args[0][2]
+            # Check that md5sum is not calculated.
+            self.assertEqual(contents.get_md5sum(), '')
+
     def test_upload_segment_job_etag_mismatch(self):
         def _consuming_conn(*a, **kw):
             contents = a[2]
@@ -648,10 +688,11 @@ class TestServiceUpload(testtools.TestCase):
                                       segment_size=10,
                                       segment_index=2,
                                       obj_name='test_o',
-                                      options={'segment_container': None})
+                                      options={'segment_container': None,
+                                               'checksum': True})
 
             self.assertIn('error', r)
-            self.assertTrue(r['error'].value.find('md5 did not match') >= 0)
+            self.assertIn('md5 mismatch', str(r['error']))
 
             self.assertEqual(mock_conn.put_object.call_count, 1)
             mock_conn.put_object.assert_called_with('test_c_segments',
@@ -664,7 +705,7 @@ class TestServiceUpload(testtools.TestCase):
 
     def test_upload_object_job_file(self):
         # Uploading a file results in the file object being wrapped in a
-        # LengthWrapper. This test sets the options is such a way that much
+        # LengthWrapper. This test sets the options in such a way that much
         # of _upload_object_job is skipped bringing the critical path down
         # to around 60 lines to ease testing.
         with tempfile.NamedTemporaryFile() as f:
@@ -696,12 +737,11 @@ class TestServiceUpload(testtools.TestCase):
                                               'skip_identical': False,
                                               'leave_segments': True,
                                               'header': '',
-                                              'segment_size': 0})
+                                              'segment_size': 0,
+                                              'checksum': True})
 
-            # Check for mtime and path separately as they are calculated
-            # from the temp file and will be different each time.
             mtime = float(r['headers']['x-object-meta-mtime'])
-            self.assertAlmostEqual(mtime, expected_mtime, delta=1)
+            self.assertAlmostEqual(mtime, expected_mtime, delta=0.5)
             del r['headers']['x-object-meta-mtime']
 
             self.assertEqual(r['path'], f.name)
@@ -718,7 +758,8 @@ class TestServiceUpload(testtools.TestCase):
             self.assertIsInstance(contents, utils.LengthWrapper)
             self.assertEqual(len(contents), 30)
             # This read forces the LengthWrapper to calculate the md5
-            # for the read content.
+            # for the read content. This also checks that LengthWrapper was
+            # initialized with md5=True
             self.assertEqual(contents.read(), b'a' * 30)
             self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest())
 
@@ -740,7 +781,7 @@ class TestServiceUpload(testtools.TestCase):
                 'success': True,
                 'path': None,
             }
-            expected_mtime = round(time.time())
+            expected_mtime = float(time.time())
 
             mock_conn = mock.Mock()
             mock_conn.put_object.return_value = ''
@@ -755,10 +796,11 @@ class TestServiceUpload(testtools.TestCase):
                                               'skip_identical': False,
                                               'leave_segments': True,
                                               'header': '',
-                                              'segment_size': 0})
+                                              'segment_size': 0,
+                                              'checksum': True})
 
             mtime = float(r['headers']['x-object-meta-mtime'])
-            self.assertAlmostEqual(mtime, expected_mtime, delta=10)
+            self.assertAlmostEqual(mtime, expected_mtime, delta=0.5)
             del r['headers']['x-object-meta-mtime']
 
             self._assertDictEqual(r, expected_r)
@@ -771,7 +813,7 @@ class TestServiceUpload(testtools.TestCase):
             contents = mock_conn.put_object.call_args[0][2]
             self.assertIsInstance(contents, utils.ReadableToIterable)
             self.assertEqual(contents.chunk_size, 65536)
-            # next retreives the first chunk of the stream or len(chunk_size)
+            # next retrieves the first chunk of the stream or len(chunk_size)
             # or less, it also forces the md5 to be calculated.
             self.assertEqual(next(contents), b'a' * 30)
             self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest())
@@ -801,11 +843,12 @@ class TestServiceUpload(testtools.TestCase):
                                               'skip_identical': False,
                                               'leave_segments': True,
                                               'header': '',
-                                              'segment_size': 0})
+                                              'segment_size': 0,
+                                              'checksum': True})
 
             self.assertEqual(r['success'], False)
             self.assertIn('error', r)
-            self.assertTrue(r['error'].value.find('md5 did not match') >= 0)
+            self.assertIn('md5 mismatch', str(r['error']))
 
             self.assertEqual(mock_conn.put_object.call_count, 1)
             expected_headers = {'x-object-meta-mtime': mock.ANY}
diff --git a/tests/unit/test_shell.py b/tests/unit/test_shell.py
index 28aea7d6..dada0fa3 100644
--- a/tests/unit/test_shell.py
+++ b/tests/unit/test_shell.py
@@ -32,6 +32,7 @@ import swiftclient.utils
 from os.path import basename, dirname
 from tests.unit.test_swiftclient import MockHttpTest
 from tests.unit.utils import CaptureOutput, fake_get_auth_keystone
+from swiftclient.utils import EMPTY_ETAG
 
 
 if six.PY2:
@@ -314,7 +315,7 @@ class TestShell(unittest.TestCase):
               'etag': '2cbbfe139a744d6abbe695e17f3c1991'},
              objcontent),
             ({'content-type': 'text/plain',
-              'etag': 'd41d8cd98f00b204e9800998ecf8427e'},
+              'etag': EMPTY_ETAG},
              '')
         ]
 
@@ -370,7 +371,7 @@ class TestShell(unittest.TestCase):
     @mock.patch('swiftclient.service.Connection')
     def test_download_no_content_type(self, connection):
         connection.return_value.get_object.return_value = [
-            {'etag': 'd41d8cd98f00b204e9800998ecf8427e'},
+            {'etag': EMPTY_ETAG},
             '']
 
         # Test downloading whole container
@@ -400,8 +401,7 @@ class TestShell(unittest.TestCase):
     def test_upload(self, connection, walk):
         connection.return_value.head_object.return_value = {
             'content-length': '0'}
-        connection.return_value.put_object.return_value = (
-            'd41d8cd98f00b204e9800998ecf8427e')
+        connection.return_value.put_object.return_value = EMPTY_ETAG
         connection.return_value.attempts = 0
         argv = ["", "upload", "container", self.tmpfile,
                 "-H", "X-Storage-Policy:one"]
@@ -477,8 +477,7 @@ class TestShell(unittest.TestCase):
         connection.return_value.get_object.return_value = ({}, json.dumps(
             [{'name': 'container1/old_seg1'}, {'name': 'container2/old_seg2'}]
         ))
-        connection.return_value.put_object.return_value = (
-            'd41d8cd98f00b204e9800998ecf8427e')
+        connection.return_value.put_object.return_value = EMPTY_ETAG
         swiftclient.shell.main(argv)
         connection.return_value.put_object.assert_called_with(
             'container',
@@ -508,8 +507,7 @@ class TestShell(unittest.TestCase):
         connection.return_value.head_object.return_value = {
             'content-length': '0'}
         connection.return_value.attempts = 0
-        connection.return_value.put_object.return_value = (
-            'd41d8cd98f00b204e9800998ecf8427e')
+        connection.return_value.put_object.return_value = EMPTY_ETAG
         argv = ["", "upload", "container", self.tmpfile, "-S", "10",
                 "-C", "container"]
         with open(self.tmpfile, "wb") as fh:
@@ -1660,9 +1658,8 @@ class TestCrossAccountObjectAccess(TestBase, MockHttpTest):
 
     def test_download_with_read_write_access(self):
         req_handler = self._fake_cross_account_auth(True, True)
-        empty_str_etag = 'd41d8cd98f00b204e9800998ecf8427e'
         fake_conn = self.fake_http_connection(403, on_request=req_handler,
-                                              etags=[empty_str_etag])
+                                              etags=[EMPTY_ETAG])
 
         args, env = self._make_cmd('download', cmd_args=[self.cont,
                                                          self.obj.lstrip('/'),
@@ -1683,9 +1680,8 @@ class TestCrossAccountObjectAccess(TestBase, MockHttpTest):
 
     def test_download_with_read_only_access(self):
         req_handler = self._fake_cross_account_auth(True, False)
-        empty_str_etag = 'd41d8cd98f00b204e9800998ecf8427e'
         fake_conn = self.fake_http_connection(403, on_request=req_handler,
-                                              etags=[empty_str_etag])
+                                              etags=[EMPTY_ETAG])
 
         args, env = self._make_cmd('download', cmd_args=[self.cont,
                                                          self.obj.lstrip('/'),
diff --git a/tests/unit/test_swiftclient.py b/tests/unit/test_swiftclient.py
index 9ebcff55..d1990506 100644
--- a/tests/unit/test_swiftclient.py
+++ b/tests/unit/test_swiftclient.py
@@ -32,6 +32,7 @@ from six.moves import reload_module
 
 from .utils import MockHttpTest, fake_get_auth_keystone, StubResponse
 
+from swiftclient.utils import EMPTY_ETAG
 from swiftclient import client as c
 import swiftclient.utils
 import swiftclient
@@ -102,8 +103,7 @@ class MockHttpResponse(object):
         self.requests_params = None
         self.verify = verify
         self.md5sum = md5()
-        # zero byte hash
-        self.headers = {'etag': '"d41d8cd98f00b204e9800998ecf8427e"'}
+        self.headers = {'etag': '"%s"' % EMPTY_ETAG}
         if headers:
             self.headers.update(headers)
 
@@ -806,6 +806,7 @@ class TestPutObject(MockHttpTest):
                                 chunk_size=chunk_size)
 
             self.assertNotEquals(etag, contents.get_md5sum())
+            self.assertEquals(etag, 'badresponseetag')
             self.assertEquals(raw_data_md5, contents.get_md5sum())
 
     def test_md5_match(self):
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 88d6d129..3e87ea28 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -25,6 +25,7 @@ from six.moves import reload_module
 from six.moves.urllib.parse import urlparse, ParseResult
 from swiftclient import client as c
 from swiftclient import shell as s
+from swiftclient.utils import EMPTY_ETAG
 
 
 def fake_get_auth_keystone(expected_os_options=None, exc=None,
@@ -127,7 +128,7 @@ def fake_http_connect(*code_iter, **kwargs):
                        'last-modified': self.timestamp,
                        'x-object-meta-test': 'testing',
                        'etag':
-                       self.etag or '"d41d8cd98f00b204e9800998ecf8427e"',
+                       self.etag or '"%s"' % EMPTY_ETAG,
                        'x-works': 'yes',
                        'x-account-container-count': 12345}
             if not self.timestamp: