Make the object replicator and auditor more robust to ENODATA

Currently when the replicator or auditor hits an ENODATA corruption
we bail.  This patch will allow them to skip and continue in an effort
to make progress on the work that needs to be done.

Change-Id: Id4c6e31332a93eb64a7660cec2a42ef686a84d22
This commit is contained in:
indianwhocodes 2022-06-07 14:24:44 -07:00 committed by Clay Gerrard
parent 93c432342b
commit d7c08d8ea7
4 changed files with 98 additions and 4 deletions

View File

@ -582,7 +582,7 @@ def object_audit_location_generator(devices, datadir, mount_check=True,
try:
suffixes = listdir(part_path)
except OSError as e:
if e.errno != errno.ENOTDIR:
if e.errno not in (errno.ENOTDIR, errno.ENODATA):
raise
continue
for asuffix in suffixes:
@ -590,7 +590,7 @@ def object_audit_location_generator(devices, datadir, mount_check=True,
try:
hashes = listdir(suff_path)
except OSError as e:
if e.errno != errno.ENOTDIR:
if e.errno not in (errno.ENOTDIR, errno.ENODATA):
raise
continue
for hsh in hashes:

View File

@ -608,8 +608,9 @@ class ObjectReplicator(Daemon):
try:
tpool.execute(shutil.rmtree, path)
except OSError as e:
if e.errno not in (errno.ENOENT, errno.ENOTEMPTY):
# If there was a race to create or delete, don't worry
if e.errno not in (errno.ENOENT, errno.ENOTEMPTY, errno.ENODATA):
# Don't worry if there was a race to create or delete,
# or some disk corruption that happened after the sync
raise
def delete_handoff_objs(self, job, delete_objs):

View File

@ -8560,6 +8560,59 @@ class TestSuffixHashes(unittest.TestCase):
)
self.assertTrue(os.path.exists(quarantine_path))
def test_auditor_hashdir_not_listable(self):
def list_locations(dirname, datadir):
return [(loc.path, loc.device, loc.partition, loc.policy)
for loc in diskfile.object_audit_location_generator(
devices=dirname, datadir=datadir, mount_check=False)]
real_listdir = os.listdir
def splode_if_endswith(suffix, err):
def sploder(path):
if path.endswith(suffix):
raise OSError(err, os.strerror(err))
else:
return real_listdir(path)
return sploder
with temptree([]) as tmpdir:
hashdir1 = os.path.join(tmpdir, "sdf", "objects", "2607", "b54",
"fe450ec990a88cc4b252b181bab04b54")
os.makedirs(hashdir1)
with open(os.path.join(hashdir1, '1656032666.98003.ts'), 'w'):
pass
hashdir2 = os.path.join(tmpdir, "sdf", "objects", "2809", "afd",
"7089ab48d955ab0851fc51cc17a34afd")
os.makedirs(hashdir2)
with open(os.path.join(hashdir2, '1656080624.31899.ts'), 'w'):
pass
expected = [(hashdir2, 'sdf', '2809', POLICIES[0])]
# Parts that look like files are just skipped
with mock.patch('os.listdir', splode_if_endswith(
"2607", errno.ENOTDIR)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
# ENODATA on a suffix is ok
with mock.patch('os.listdir', splode_if_endswith(
"b54", errno.ENODATA)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
# sanity the other way
expected = [(hashdir1, 'sdf', '2607', POLICIES[0])]
with mock.patch('os.listdir', splode_if_endswith(
"2809", errno.ENODATA)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
with mock.patch('os.listdir', splode_if_endswith(
"afd", errno.ENOTDIR)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
def test_hash_suffix_cleanup_ondisk_files_enodata_quarantined(self):
for policy in self.iter_policies():
df = self.df_router[policy].get_diskfile(

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import errno
import io
import json
import unittest
@ -1462,6 +1463,45 @@ class TestObjectReplicator(unittest.TestCase):
override_partitions=[1])
self.assertFalse(os.access(part_path, os.F_OK))
def _make_OSError(self, err):
return OSError(err, os.strerror(err))
def test_delete_partition_override_params_os_not_empty_error(self):
part_path = os.path.join(self.objects, '1')
with mock.patch('swift.obj.replicator.shutil.rmtree') as mockrmtree:
mockrmtree.side_effect = self._make_OSError(errno.ENOTEMPTY)
self.replicator.replicate(override_devices=['sda'],
override_partitions=[1],
override_policies=[0])
error_lines = self.replicator.logger.get_lines_for_level('error')
self.assertFalse(error_lines)
self.assertTrue(os.path.exists(part_path))
self.assertEqual([mock.call(part_path)], mockrmtree.call_args_list)
def test_delete_partition_ignores_os_no_entity_error(self):
part_path = os.path.join(self.objects, '1')
with mock.patch('swift.obj.replicator.shutil.rmtree') as mockrmtree:
mockrmtree.side_effect = self._make_OSError(errno.ENOENT)
self.replicator.replicate(override_devices=['sda'],
override_partitions=[1],
override_policies=[0])
error_lines = self.replicator.logger.get_lines_for_level('error')
self.assertFalse(error_lines)
self.assertTrue(os.path.exists(part_path))
self.assertEqual([mock.call(part_path)], mockrmtree.call_args_list)
def test_delete_partition_ignores_os_no_data_error(self):
part_path = os.path.join(self.objects, '1')
with mock.patch('swift.obj.replicator.shutil.rmtree') as mockrmtree:
mockrmtree.side_effect = self._make_OSError(errno.ENODATA)
self.replicator.replicate(override_devices=['sda'],
override_partitions=[1],
override_policies=[0])
error_lines = self.replicator.logger.get_lines_for_level('error')
self.assertFalse(error_lines)
self.assertTrue(os.path.exists(part_path))
self.assertEqual([mock.call(part_path)], mockrmtree.call_args_list)
def test_delete_policy_override_params(self):
df0 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o',
policy=POLICIES.legacy)