diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index b119831521..82f3d10edc 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -341,6 +341,12 @@ def quarantine_renamer(device_path, corrupted_file_path): return to_dir +def valid_suffix(value): + if not isinstance(value, str) or len(value) != 3: + return False + return all(c in '0123456789abcdef' for c in value) + + def read_hashes(partition_dir): """ Read the existing hashes.pkl @@ -365,9 +371,9 @@ def read_hashes(partition_dir): pass # Check for corrupted data that could break os.listdir() - for suffix in hashes.keys(): - if not suffix.isalnum(): - return {'valid': False} + if not all(valid_suffix(key) or key in ('valid', 'updated') + for key in hashes): + return {'valid': False} # hashes.pkl w/o valid updated key is "valid" but "forever old" hashes.setdefault('valid', True) @@ -1292,6 +1298,8 @@ class BaseDiskFileManager(object): self.logger.debug('Run listdir on %s', partition_path) hashes.update((suffix, None) for suffix in recalculate) for suffix, hash_ in list(hashes.items()): + if suffix in ('valid', 'updated'): + continue if not hash_: suffix_dir = join(partition_path, suffix) try: @@ -1539,12 +1547,14 @@ class BaseDiskFileManager(object): if not dev_path: raise DiskFileDeviceUnavailable() partition_path = get_part_path(dev_path, policy, partition) - if not os.path.exists(partition_path): - mkdirs(partition_path) + suffixes = [suf for suf in suffixes or [] if valid_suffix(suf)] + if skip_rehash: - for suffix in suffixes or []: - invalidate_hash(os.path.join(partition_path, suffix)) - return None + for suffix in suffixes: + self.invalidate_hash(os.path.join(partition_path, suffix)) + hashes = None + elif not os.path.exists(partition_path): + hashes = {} else: _junk, hashes = tpool.execute( self._get_hashes, device, partition, policy, diff --git a/test/unit/cli/test_relinker.py b/test/unit/cli/test_relinker.py index d53ae2ab38..babf435f4d 100644 --- a/test/unit/cli/test_relinker.py +++ b/test/unit/cli/test_relinker.py @@ -453,6 +453,11 @@ class TestRelinker(unittest.TestCase): self.assertEqual('foo', hashes[self._hash[-3:]]) self.assertFalse(os.path.exists( os.path.join(self.part_dir, 'hashes.invalid'))) + # Check that only the dirty partition in upper half of next part power + # has been created and rehashed + other_next_part = self.next_part ^ 1 + other_next_part_dir = os.path.join(self.objects, str(other_next_part)) + self.assertFalse(os.path.exists(other_next_part_dir)) def _do_link_test(self, command, old_file_specs, new_file_specs, conflict_file_specs, exp_old_specs, exp_new_specs, diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index d9a8788a3c..98fb3945f6 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -6743,6 +6743,15 @@ class TestSuffixHashes(unittest.TestCase): if suffix_dir != suffix_dir2: return df2 + def test_valid_suffix(self): + self.assertTrue(diskfile.valid_suffix('000')) + self.assertTrue(diskfile.valid_suffix('123')) + self.assertTrue(diskfile.valid_suffix('fff')) + self.assertFalse(diskfile.valid_suffix(list('123'))) + self.assertFalse(diskfile.valid_suffix(123)) + self.assertFalse(diskfile.valid_suffix(' 12')) + self.assertFalse(diskfile.valid_suffix('-00')) + def check_cleanup_ondisk_files(self, policy, input_files, output_files): orig_unlink = os.unlink file_list = list(input_files) @@ -7104,7 +7113,12 @@ class TestSuffixHashes(unittest.TestCase): def test_invalidate_hash_empty_file_exists(self): for policy in self.iter_policies(): df_mgr = self.df_router[policy] + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + mkdirs(part_path) hashes = df_mgr.get_hashes('sda1', '0', [], policy) + pkl_path = os.path.join(part_path, diskfile.HASH_FILE) + self.assertTrue(os.path.exists(pkl_path)) self.assertEqual(hashes, {}) # create something to hash df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', @@ -7127,6 +7141,7 @@ class TestSuffixHashes(unittest.TestCase): df_mgr = self.df_router[policy] part_path = os.path.join(self.devices, 'sda1', diskfile.get_data_dir(policy), '0') + mkdirs(part_path) inv_file = os.path.join( part_path, diskfile.HASH_INVALIDATIONS_FILE) hash_file = os.path.join( @@ -7162,9 +7177,14 @@ class TestSuffixHashes(unittest.TestCase): # get_hashes call for policy in self.iter_policies(): df_mgr = self.df_router[policy] + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') if existing: + mkdirs(part_path) # force hashes.pkl to exist df_mgr.get_hashes('sda1', '0', [], policy) + self.assertTrue(os.path.exists(os.path.join( + part_path, diskfile.HASH_FILE))) orig_listdir = os.listdir df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy) @@ -7184,10 +7204,15 @@ class TestSuffixHashes(unittest.TestCase): df2.delete(self.ts()) return result + if not existing: + self.assertFalse(os.path.exists(os.path.join( + part_path, diskfile.HASH_FILE))) with mock.patch('swift.obj.diskfile.os.listdir', mock_listdir): - # creates pkl file + # creates pkl file if not already there hashes = df_mgr.get_hashes('sda1', '0', [], policy) + self.assertTrue(os.path.exists(os.path.join( + part_path, diskfile.HASH_FILE))) # second suffix added after directory listing, it's added later self.assertIn(suffix, hashes) @@ -7216,7 +7241,12 @@ class TestSuffixHashes(unittest.TestCase): orig_hash_suffix = df_mgr._hash_suffix if existing: # create hashes.pkl + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + mkdirs(part_path) df_mgr.get_hashes('sda1', '0', [], policy) + self.assertTrue(os.path.exists(os.path.join( + part_path, diskfile.HASH_FILE))) df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy) @@ -7257,7 +7287,7 @@ class TestSuffixHashes(unittest.TestCase): return result with mock.patch.object(df_mgr, '_hash_suffix', mock_hash_suffix): - # creates pkl file and repeats listing when pkl modified + # repeats listing when pkl modified hashes = df_mgr.get_hashes('sda1', '0', [], policy) # first get_hashes should complete with suffix1 state @@ -7452,6 +7482,12 @@ class TestSuffixHashes(unittest.TestCase): # verify that if consolidate_hashes raises an exception then suffixes # are rehashed and a hashes.pkl is written for policy in self.iter_policies(): + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + hashes_file = os.path.join(part_path, diskfile.HASH_FILE) + invalidations_file = os.path.join( + part_path, diskfile.HASH_INVALIDATIONS_FILE) + self.logger.clear() df_mgr = self.df_router[policy] # create something to hash @@ -7461,10 +7497,14 @@ class TestSuffixHashes(unittest.TestCase): # avoid getting O_TMPFILE warning in logs if not utils.o_tmpfile_in_tmpdir_supported(): df.manager.use_linkat = False + + self.assertFalse(os.path.exists(part_path)) df.delete(self.ts()) + self.assertTrue(os.path.exists(invalidations_file)) suffix_dir = os.path.dirname(df._datadir) suffix = os.path.basename(suffix_dir) # no pre-existing hashes.pkl + self.assertFalse(os.path.exists(hashes_file)) with mock.patch.object(df_mgr, '_hash_suffix', return_value='fake hash'): with mock.patch.object(df_mgr, 'consolidate_hashes', @@ -7473,10 +7513,6 @@ class TestSuffixHashes(unittest.TestCase): self.assertEqual({suffix: 'fake hash'}, hashes) # sanity check hashes file - part_path = os.path.join(self.devices, 'sda1', - diskfile.get_data_dir(policy), '0') - hashes_file = os.path.join(part_path, diskfile.HASH_FILE) - with open(hashes_file, 'rb') as f: found_hashes = pickle.load(f) found_hashes.pop('updated') @@ -7497,9 +7533,6 @@ class TestSuffixHashes(unittest.TestCase): self.assertEqual({suffix: 'new fake hash'}, hashes) # sanity check hashes file - part_path = os.path.join(self.devices, 'sda1', - diskfile.get_data_dir(policy), '0') - hashes_file = os.path.join(part_path, diskfile.HASH_FILE) with open(hashes_file, 'rb') as f: found_hashes = pickle.load(f) found_hashes.pop('updated') @@ -8185,6 +8218,9 @@ class TestSuffixHashes(unittest.TestCase): def test_hash_suffix_listdir_enoent(self): for policy in self.iter_policies(): df_mgr = self.df_router[policy] + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + mkdirs(part_path) # ensure we'll bother writing a pkl at all orig_listdir = os.listdir listdir_calls = [] @@ -8203,9 +8239,6 @@ class TestSuffixHashes(unittest.TestCase): # does not exist! df_mgr.get_hashes('sda1', '0', ['123'], policy) - part_path = os.path.join(self.devices, 'sda1', - diskfile.get_data_dir(policy), '0') - self.assertEqual(listdir_calls, [ # part path gets created automatically (part_path, True), @@ -8340,7 +8373,7 @@ class TestSuffixHashes(unittest.TestCase): # get_hashes tests - behaviors - def test_get_hashes_creates_partition_and_pkl(self): + def test_get_hashes_does_not_create_partition(self): for policy in self.iter_policies(): df_mgr = self.df_router[policy] hashes = df_mgr.get_hashes(self.existing_device, '0', [], @@ -8348,6 +8381,18 @@ class TestSuffixHashes(unittest.TestCase): self.assertEqual(hashes, {}) part_path = os.path.join( self.devices, 'sda1', diskfile.get_data_dir(policy), '0') + self.assertFalse(os.path.exists(part_path)) + + def test_get_hashes_creates_pkl(self): + # like above, but -- if the partition already exists, make the pickle + for policy in self.iter_policies(): + part_path = os.path.join( + self.devices, 'sda1', diskfile.get_data_dir(policy), '0') + mkdirs(part_path) + df_mgr = self.df_router[policy] + hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + self.assertEqual(hashes, {}) self.assertTrue(os.path.exists(part_path)) hashes_file = os.path.join(part_path, diskfile.HASH_FILE) @@ -8438,6 +8483,7 @@ class TestSuffixHashes(unittest.TestCase): # create an empty stale pickle part_path = os.path.join( self.devices, 'sda1', diskfile.get_data_dir(policy), '0') + mkdirs(part_path) hashes_file = os.path.join(part_path, diskfile.HASH_FILE) hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy) @@ -8641,6 +8687,7 @@ class TestSuffixHashes(unittest.TestCase): suffix2 = os.path.basename(os.path.dirname(df2._datadir)) part_path = os.path.dirname(os.path.dirname( os.path.join(df._datadir))) + mkdirs(part_path) hashfile_path = os.path.join(part_path, diskfile.HASH_FILE) # create hashes.pkl hashes = df_mgr.get_hashes(self.existing_device, '0', [], @@ -8745,8 +8792,13 @@ class TestSuffixHashes(unittest.TestCase): def test_get_hashes_modified_recursive_retry(self): for policy in self.iter_policies(): df_mgr = self.df_router[policy] + part_path = os.path.join(self.devices, self.existing_device, + diskfile.get_data_dir(policy), '0') + mkdirs(part_path) # first create an empty pickle df_mgr.get_hashes(self.existing_device, '0', [], policy) + self.assertTrue(os.path.exists(os.path.join( + part_path, diskfile.HASH_FILE))) non_local = {'suffix_count': 1} calls = [] @@ -8788,19 +8840,19 @@ class TestHashesHelpers(unittest.TestCase): rmtree(self.testdir, ignore_errors=1) def test_read_legacy_hashes(self): - hashes = {'stub': 'fake'} + hashes = {'fff': 'fake'} hashes_file = os.path.join(self.testdir, diskfile.HASH_FILE) with open(hashes_file, 'wb') as f: pickle.dump(hashes, f) expected = { - 'stub': 'fake', + 'fff': 'fake', 'updated': -1, 'valid': True, } self.assertEqual(expected, diskfile.read_hashes(self.testdir)) def test_write_hashes_valid_updated(self): - hashes = {'stub': 'fake', 'valid': True} + hashes = {'888': 'fake', 'valid': True} now = time() with mock.patch('swift.obj.diskfile.time.time', return_value=now): diskfile.write_hashes(self.testdir, hashes) @@ -8808,7 +8860,7 @@ class TestHashesHelpers(unittest.TestCase): with open(hashes_file, 'rb') as f: data = pickle.load(f) expected = { - 'stub': 'fake', + '888': 'fake', 'updated': now, 'valid': True, } @@ -8843,7 +8895,7 @@ class TestHashesHelpers(unittest.TestCase): self.assertEqual(expected, data) def test_read_write_valid_hashes_mutation_and_transative_equality(self): - hashes = {'stub': 'fake', 'valid': True} + hashes = {'000': 'fake', 'valid': True} diskfile.write_hashes(self.testdir, hashes) # write_hashes mutates the passed in hashes, it adds the updated key self.assertIn('updated', hashes) diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 21e1a4a2ed..f85ef92c37 100644 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -7158,11 +7158,11 @@ class TestObjectController(unittest.TestCase): def my_tpool_execute(func, *args, **kwargs): return func(*args, **kwargs) - was_get_hashes = diskfile.DiskFileManager._get_hashes - was_tpool_exe = tpool.execute - try: - diskfile.DiskFileManager._get_hashes = fake_get_hashes - tpool.execute = my_tpool_execute + with mock.patch.object(diskfile.DiskFileManager, '_get_hashes', + fake_get_hashes), \ + mock.patch.object(tpool, 'execute', my_tpool_execute), \ + mock.patch('swift.obj.diskfile.os.path.exists', + return_value=True): req = Request.blank('/sda1/p/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) @@ -7170,9 +7170,6 @@ class TestObjectController(unittest.TestCase): self.assertEqual(resp.status_int, 200) p_data = pickle.loads(resp.body) self.assertEqual(p_data, {1: 2}) - finally: - tpool.execute = was_tpool_exe - diskfile.DiskFileManager._get_hashes = was_get_hashes def test_REPLICATE_pickle_protocol(self): @@ -7182,25 +7179,22 @@ class TestObjectController(unittest.TestCase): def my_tpool_execute(func, *args, **kwargs): return func(*args, **kwargs) - was_get_hashes = diskfile.DiskFileManager._get_hashes - was_tpool_exe = tpool.execute - try: - diskfile.DiskFileManager._get_hashes = fake_get_hashes - tpool.execute = my_tpool_execute + with mock.patch.object(diskfile.DiskFileManager, '_get_hashes', + fake_get_hashes), \ + mock.patch.object(tpool, 'execute', my_tpool_execute), \ + mock.patch('swift.obj.server.pickle.dumps') as fake_pickle, \ + mock.patch('swift.obj.diskfile.os.path.exists', + return_value=True): req = Request.blank('/sda1/p/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) - with mock.patch('swift.obj.server.pickle.dumps') as fake_pickle: - fake_pickle.return_value = b'' - req.get_response(self.object_controller) - # This is the key assertion: starting in Python 3.0, the - # default protocol version is 3, but such pickles can't be read - # on Python 2. As long as we may need to talk to a Python 2 - # process, we need to cap our protocol version. - fake_pickle.assert_called_once_with({1: 2}, protocol=2) - finally: - tpool.execute = was_tpool_exe - diskfile.DiskFileManager._get_hashes = was_get_hashes + fake_pickle.return_value = b'' + req.get_response(self.object_controller) + # This is the key assertion: starting in Python 3.0, the + # default protocol version is 3, but such pickles can't be read + # on Python 2. As long as we may need to talk to a Python 2 + # process, we need to cap our protocol version. + fake_pickle.assert_called_once_with({1: 2}, protocol=2) def test_REPLICATE_timeout(self): @@ -7210,18 +7204,17 @@ class TestObjectController(unittest.TestCase): def my_tpool_execute(func, *args, **kwargs): return func(*args, **kwargs) - was_get_hashes = diskfile.DiskFileManager._get_hashes - was_tpool_exe = tpool.execute - try: + with mock.patch.object(diskfile.DiskFileManager, '_get_hashes', + fake_get_hashes), \ + mock.patch.object(tpool, 'execute', my_tpool_execute), \ + mock.patch('swift.obj.diskfile.os.path.exists', + return_value=True): diskfile.DiskFileManager._get_hashes = fake_get_hashes tpool.execute = my_tpool_execute req = Request.blank('/sda1/p/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) self.assertRaises(Timeout, self.object_controller.REPLICATE, req) - finally: - tpool.execute = was_tpool_exe - diskfile.DiskFileManager._get_hashes = was_get_hashes def test_REPLICATE_reclaims_tombstones(self): conf = {'devices': self.testdir, 'mount_check': False,