Merge "Optimize noop case for suffix rehash"

This commit is contained in:
Jenkins 2017-01-21 01:28:00 +00:00 committed by Gerrit Code Review
commit 4970277232
2 changed files with 52 additions and 4 deletions

View File

@ -273,9 +273,11 @@ def consolidate_hashes(partition_dir):
hashes = None
modified = False
found_invalidation_entry = False
try:
with open(invalidations_file, 'rb') as inv_fh:
for line in inv_fh:
found_invalidation_entry = True
suffix = line.strip()
if hashes is not None and \
hashes.get(suffix, '') is not None:
@ -290,12 +292,9 @@ def consolidate_hashes(partition_dir):
# Now that all the invalidations are reflected in hashes.pkl, it's
# safe to clear out the invalidations file.
try:
if found_invalidation_entry:
with open(invalidations_file, 'wb') as inv_fh:
pass
except OSError as e:
if e.errno != errno.ENOENT:
raise
return hashes
@ -1048,6 +1047,8 @@ class BaseDiskFileManager(object):
try:
hashes = self.consolidate_hashes(partition_path)
except Exception:
self.logger.warning('Unable to read %r', hashes_file,
exc_info=True)
do_listdir = True
force_rewrite = True
else:

View File

@ -27,6 +27,7 @@ import tempfile
import uuid
import xattr
import re
import six
from collections import defaultdict
from random import shuffle, randint
from shutil import rmtree
@ -6081,6 +6082,47 @@ class TestSuffixHashes(unittest.TestCase):
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
self.assertIn(suffix, hashes) # sanity
def test_invalidate_hash_file_not_truncated_when_empty(self):
orig_open = open
def watch_open(*args, **kargs):
name = os.path.basename(args[0])
open_log[name].append(args[1])
return orig_open(*args, **kargs)
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
inv_file = os.path.join(
part_path, diskfile.HASH_INVALIDATIONS_FILE)
hash_file = os.path.join(
part_path, diskfile.HASH_FILE)
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
self.assertEqual(hashes, {})
self.assertTrue(os.path.exists(hash_file))
# create something to hash
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
policy=policy)
df.delete(self.ts())
self.assertTrue(os.path.exists(inv_file))
# invalidation file created, lets consolidate it
df_mgr.get_hashes('sda1', '0', [], policy)
open_log = defaultdict(list)
open_loc = '__builtin__.open' if six.PY2 else 'builtins.open'
with mock.patch(open_loc, watch_open):
self.assertTrue(os.path.exists(inv_file))
# no new suffixes get invalided... so no write iop
df_mgr.get_hashes('sda1', '0', [], policy)
# each file is opened once to read
expected = {
'hashes.pkl': ['rb'],
'hashes.invalid': ['rb'],
}
self.assertEqual(open_log, expected)
def test_invalidate_hash_consolidation(self):
def assert_consolidation(suffixes):
# verify that suffixes are invalidated after consolidation
@ -6166,6 +6208,7 @@ class TestSuffixHashes(unittest.TestCase):
# verify that if consolidate_hashes raises an exception then suffixes
# are rehashed and a hashes.pkl is written
for policy in self.iter_policies():
self.logger.clear()
df_mgr = self.df_router[policy]
# create something to hash
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
@ -6189,6 +6232,10 @@ class TestSuffixHashes(unittest.TestCase):
with open(hashes_file, 'rb') as f:
self.assertEqual(hashes, pickle.load(f))
# sanity check log warning
warnings = self.logger.get_lines_for_level('warning')
self.assertEqual(warnings, ["Unable to read %r" % hashes_file])
# repeat with pre-existing hashes.pkl
with mock.patch.object(df_mgr, '_hash_suffix',
return_value='new fake hash'):