diskfile: Prevent get_hashes from creating missing partition dirs

The relinker currently blindly calls get_hashes on partitions in the
new, upper half of the partition space, which can cause extra handoffs
to be created if you're rerunning the relink step for safety.

Even if the relinker were smarter, though, there's no reason that a
curious operator should end up creating empty handoffs just because
they're poking devices with REPLICATE requests

Drive-by: be a little more strict about the "suffixes" we're willing to
put in hashes.invalid and read out of hashes.pkl.

Change-Id: I9aace80088cd00d02c418fe4d782b662fb5c8bcf
This commit is contained in:
Tim Burke 2021-03-19 13:32:49 -07:00
parent 38d5e1339b
commit ade3b28636
4 changed files with 116 additions and 56 deletions

View File

@ -341,6 +341,12 @@ def quarantine_renamer(device_path, corrupted_file_path):
return to_dir
def valid_suffix(value):
if not isinstance(value, str) or len(value) != 3:
return False
return all(c in '0123456789abcdef' for c in value)
def read_hashes(partition_dir):
"""
Read the existing hashes.pkl
@ -365,9 +371,9 @@ def read_hashes(partition_dir):
pass
# Check for corrupted data that could break os.listdir()
for suffix in hashes.keys():
if not suffix.isalnum():
return {'valid': False}
if not all(valid_suffix(key) or key in ('valid', 'updated')
for key in hashes):
return {'valid': False}
# hashes.pkl w/o valid updated key is "valid" but "forever old"
hashes.setdefault('valid', True)
@ -1301,6 +1307,8 @@ class BaseDiskFileManager(object):
self.logger.debug('Run listdir on %s', partition_path)
hashes.update((suffix, None) for suffix in recalculate)
for suffix, hash_ in list(hashes.items()):
if suffix in ('valid', 'updated'):
continue
if not hash_:
suffix_dir = join(partition_path, suffix)
try:
@ -1548,12 +1556,14 @@ class BaseDiskFileManager(object):
if not dev_path:
raise DiskFileDeviceUnavailable()
partition_path = get_part_path(dev_path, policy, partition)
if not os.path.exists(partition_path):
mkdirs(partition_path)
suffixes = [suf for suf in suffixes or [] if valid_suffix(suf)]
if skip_rehash:
for suffix in suffixes or []:
invalidate_hash(os.path.join(partition_path, suffix))
return None
for suffix in suffixes:
self.invalidate_hash(os.path.join(partition_path, suffix))
hashes = None
elif not os.path.exists(partition_path):
hashes = {}
else:
_junk, hashes = tpool.execute(
self._get_hashes, device, partition, policy,

View File

@ -422,6 +422,11 @@ class TestRelinker(unittest.TestCase):
self.assertEqual('foo', hashes[self._hash[-3:]])
self.assertFalse(os.path.exists(
os.path.join(self.part_dir, 'hashes.invalid')))
# Check that only the dirty partition in upper half of next part power
# has been created and rehashed
other_next_part = self.next_part ^ 1
other_next_part_dir = os.path.join(self.objects, str(other_next_part))
self.assertFalse(os.path.exists(other_next_part_dir))
def test_relink_link_already_exists(self):
self.rb.prepare_increase_partition_power()

View File

@ -6743,6 +6743,15 @@ class TestSuffixHashes(unittest.TestCase):
if suffix_dir != suffix_dir2:
return df2
def test_valid_suffix(self):
self.assertTrue(diskfile.valid_suffix('000'))
self.assertTrue(diskfile.valid_suffix('123'))
self.assertTrue(diskfile.valid_suffix('fff'))
self.assertFalse(diskfile.valid_suffix(list('123')))
self.assertFalse(diskfile.valid_suffix(123))
self.assertFalse(diskfile.valid_suffix(' 12'))
self.assertFalse(diskfile.valid_suffix('-00'))
def check_cleanup_ondisk_files(self, policy, input_files, output_files):
orig_unlink = os.unlink
file_list = list(input_files)
@ -7104,7 +7113,12 @@ class TestSuffixHashes(unittest.TestCase):
def test_invalidate_hash_empty_file_exists(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
pkl_path = os.path.join(part_path, diskfile.HASH_FILE)
self.assertTrue(os.path.exists(pkl_path))
self.assertEqual(hashes, {})
# create something to hash
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
@ -7127,6 +7141,7 @@ class TestSuffixHashes(unittest.TestCase):
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
inv_file = os.path.join(
part_path, diskfile.HASH_INVALIDATIONS_FILE)
hash_file = os.path.join(
@ -7162,9 +7177,14 @@ class TestSuffixHashes(unittest.TestCase):
# get_hashes call
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
if existing:
mkdirs(part_path)
# force hashes.pkl to exist
df_mgr.get_hashes('sda1', '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
orig_listdir = os.listdir
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
policy=policy)
@ -7184,10 +7204,15 @@ class TestSuffixHashes(unittest.TestCase):
df2.delete(self.ts())
return result
if not existing:
self.assertFalse(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
with mock.patch('swift.obj.diskfile.os.listdir',
mock_listdir):
# creates pkl file
# creates pkl file if not already there
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
# second suffix added after directory listing, it's added later
self.assertIn(suffix, hashes)
@ -7216,7 +7241,12 @@ class TestSuffixHashes(unittest.TestCase):
orig_hash_suffix = df_mgr._hash_suffix
if existing:
# create hashes.pkl
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
df_mgr.get_hashes('sda1', '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
policy=policy)
@ -7257,7 +7287,7 @@ class TestSuffixHashes(unittest.TestCase):
return result
with mock.patch.object(df_mgr, '_hash_suffix', mock_hash_suffix):
# creates pkl file and repeats listing when pkl modified
# repeats listing when pkl modified
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
# first get_hashes should complete with suffix1 state
@ -7452,6 +7482,12 @@ class TestSuffixHashes(unittest.TestCase):
# verify that if consolidate_hashes raises an exception then suffixes
# are rehashed and a hashes.pkl is written
for policy in self.iter_policies():
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
invalidations_file = os.path.join(
part_path, diskfile.HASH_INVALIDATIONS_FILE)
self.logger.clear()
df_mgr = self.df_router[policy]
# create something to hash
@ -7461,10 +7497,14 @@ class TestSuffixHashes(unittest.TestCase):
# avoid getting O_TMPFILE warning in logs
if not utils.o_tmpfile_in_tmpdir_supported():
df.manager.use_linkat = False
self.assertFalse(os.path.exists(part_path))
df.delete(self.ts())
self.assertTrue(os.path.exists(invalidations_file))
suffix_dir = os.path.dirname(df._datadir)
suffix = os.path.basename(suffix_dir)
# no pre-existing hashes.pkl
self.assertFalse(os.path.exists(hashes_file))
with mock.patch.object(df_mgr, '_hash_suffix',
return_value='fake hash'):
with mock.patch.object(df_mgr, 'consolidate_hashes',
@ -7473,10 +7513,6 @@ class TestSuffixHashes(unittest.TestCase):
self.assertEqual({suffix: 'fake hash'}, hashes)
# sanity check hashes file
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
with open(hashes_file, 'rb') as f:
found_hashes = pickle.load(f)
found_hashes.pop('updated')
@ -7497,9 +7533,6 @@ class TestSuffixHashes(unittest.TestCase):
self.assertEqual({suffix: 'new fake hash'}, hashes)
# sanity check hashes file
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
with open(hashes_file, 'rb') as f:
found_hashes = pickle.load(f)
found_hashes.pop('updated')
@ -8185,6 +8218,9 @@ class TestSuffixHashes(unittest.TestCase):
def test_hash_suffix_listdir_enoent(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path) # ensure we'll bother writing a pkl at all
orig_listdir = os.listdir
listdir_calls = []
@ -8203,9 +8239,6 @@ class TestSuffixHashes(unittest.TestCase):
# does not exist!
df_mgr.get_hashes('sda1', '0', ['123'], policy)
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
self.assertEqual(listdir_calls, [
# part path gets created automatically
(part_path, True),
@ -8340,7 +8373,7 @@ class TestSuffixHashes(unittest.TestCase):
# get_hashes tests - behaviors
def test_get_hashes_creates_partition_and_pkl(self):
def test_get_hashes_does_not_create_partition(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
@ -8348,6 +8381,18 @@ class TestSuffixHashes(unittest.TestCase):
self.assertEqual(hashes, {})
part_path = os.path.join(
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
self.assertFalse(os.path.exists(part_path))
def test_get_hashes_creates_pkl(self):
# like above, but -- if the partition already exists, make the pickle
for policy in self.iter_policies():
part_path = os.path.join(
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
df_mgr = self.df_router[policy]
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
policy)
self.assertEqual(hashes, {})
self.assertTrue(os.path.exists(part_path))
hashes_file = os.path.join(part_path,
diskfile.HASH_FILE)
@ -8438,6 +8483,7 @@ class TestSuffixHashes(unittest.TestCase):
# create an empty stale pickle
part_path = os.path.join(
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
hashes_file = os.path.join(part_path,
diskfile.HASH_FILE)
hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
@ -8641,6 +8687,7 @@ class TestSuffixHashes(unittest.TestCase):
suffix2 = os.path.basename(os.path.dirname(df2._datadir))
part_path = os.path.dirname(os.path.dirname(
os.path.join(df._datadir)))
mkdirs(part_path)
hashfile_path = os.path.join(part_path, diskfile.HASH_FILE)
# create hashes.pkl
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
@ -8745,8 +8792,13 @@ class TestSuffixHashes(unittest.TestCase):
def test_get_hashes_modified_recursive_retry(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, self.existing_device,
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
# first create an empty pickle
df_mgr.get_hashes(self.existing_device, '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
non_local = {'suffix_count': 1}
calls = []
@ -8788,19 +8840,19 @@ class TestHashesHelpers(unittest.TestCase):
rmtree(self.testdir, ignore_errors=1)
def test_read_legacy_hashes(self):
hashes = {'stub': 'fake'}
hashes = {'fff': 'fake'}
hashes_file = os.path.join(self.testdir, diskfile.HASH_FILE)
with open(hashes_file, 'wb') as f:
pickle.dump(hashes, f)
expected = {
'stub': 'fake',
'fff': 'fake',
'updated': -1,
'valid': True,
}
self.assertEqual(expected, diskfile.read_hashes(self.testdir))
def test_write_hashes_valid_updated(self):
hashes = {'stub': 'fake', 'valid': True}
hashes = {'888': 'fake', 'valid': True}
now = time()
with mock.patch('swift.obj.diskfile.time.time', return_value=now):
diskfile.write_hashes(self.testdir, hashes)
@ -8808,7 +8860,7 @@ class TestHashesHelpers(unittest.TestCase):
with open(hashes_file, 'rb') as f:
data = pickle.load(f)
expected = {
'stub': 'fake',
'888': 'fake',
'updated': now,
'valid': True,
}
@ -8843,7 +8895,7 @@ class TestHashesHelpers(unittest.TestCase):
self.assertEqual(expected, data)
def test_read_write_valid_hashes_mutation_and_transative_equality(self):
hashes = {'stub': 'fake', 'valid': True}
hashes = {'000': 'fake', 'valid': True}
diskfile.write_hashes(self.testdir, hashes)
# write_hashes mutates the passed in hashes, it adds the updated key
self.assertIn('updated', hashes)

View File

@ -7158,11 +7158,11 @@ class TestObjectController(unittest.TestCase):
def my_tpool_execute(func, *args, **kwargs):
return func(*args, **kwargs)
was_get_hashes = diskfile.DiskFileManager._get_hashes
was_tpool_exe = tpool.execute
try:
diskfile.DiskFileManager._get_hashes = fake_get_hashes
tpool.execute = my_tpool_execute
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
fake_get_hashes), \
mock.patch.object(tpool, 'execute', my_tpool_execute), \
mock.patch('swift.obj.diskfile.os.path.exists',
return_value=True):
req = Request.blank('/sda1/p/',
environ={'REQUEST_METHOD': 'REPLICATE'},
headers={})
@ -7170,9 +7170,6 @@ class TestObjectController(unittest.TestCase):
self.assertEqual(resp.status_int, 200)
p_data = pickle.loads(resp.body)
self.assertEqual(p_data, {1: 2})
finally:
tpool.execute = was_tpool_exe
diskfile.DiskFileManager._get_hashes = was_get_hashes
def test_REPLICATE_pickle_protocol(self):
@ -7182,25 +7179,22 @@ class TestObjectController(unittest.TestCase):
def my_tpool_execute(func, *args, **kwargs):
return func(*args, **kwargs)
was_get_hashes = diskfile.DiskFileManager._get_hashes
was_tpool_exe = tpool.execute
try:
diskfile.DiskFileManager._get_hashes = fake_get_hashes
tpool.execute = my_tpool_execute
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
fake_get_hashes), \
mock.patch.object(tpool, 'execute', my_tpool_execute), \
mock.patch('swift.obj.server.pickle.dumps') as fake_pickle, \
mock.patch('swift.obj.diskfile.os.path.exists',
return_value=True):
req = Request.blank('/sda1/p/',
environ={'REQUEST_METHOD': 'REPLICATE'},
headers={})
with mock.patch('swift.obj.server.pickle.dumps') as fake_pickle:
fake_pickle.return_value = b''
req.get_response(self.object_controller)
# This is the key assertion: starting in Python 3.0, the
# default protocol version is 3, but such pickles can't be read
# on Python 2. As long as we may need to talk to a Python 2
# process, we need to cap our protocol version.
fake_pickle.assert_called_once_with({1: 2}, protocol=2)
finally:
tpool.execute = was_tpool_exe
diskfile.DiskFileManager._get_hashes = was_get_hashes
fake_pickle.return_value = b''
req.get_response(self.object_controller)
# This is the key assertion: starting in Python 3.0, the
# default protocol version is 3, but such pickles can't be read
# on Python 2. As long as we may need to talk to a Python 2
# process, we need to cap our protocol version.
fake_pickle.assert_called_once_with({1: 2}, protocol=2)
def test_REPLICATE_timeout(self):
@ -7210,18 +7204,17 @@ class TestObjectController(unittest.TestCase):
def my_tpool_execute(func, *args, **kwargs):
return func(*args, **kwargs)
was_get_hashes = diskfile.DiskFileManager._get_hashes
was_tpool_exe = tpool.execute
try:
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
fake_get_hashes), \
mock.patch.object(tpool, 'execute', my_tpool_execute), \
mock.patch('swift.obj.diskfile.os.path.exists',
return_value=True):
diskfile.DiskFileManager._get_hashes = fake_get_hashes
tpool.execute = my_tpool_execute
req = Request.blank('/sda1/p/',
environ={'REQUEST_METHOD': 'REPLICATE'},
headers={})
self.assertRaises(Timeout, self.object_controller.REPLICATE, req)
finally:
tpool.execute = was_tpool_exe
diskfile.DiskFileManager._get_hashes = was_get_hashes
def test_REPLICATE_reclaims_tombstones(self):
conf = {'devices': self.testdir, 'mount_check': False,