Ignore auditor status files to prevent replicator reports errors
Ignore `auditor_status_*.json` files during the collecting jobs and replicator won't use these wrong paths to find objects that causes an exception to increase failure count in replicator report. Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com> Co-Authored-By: Mark Kirkwood <mark.kirkwood@catalyst.net.nz> Change-Id: Ib15a0987288d9ee32432c1998aefe638ca3b223b Closes-Bug: #1583305
This commit is contained in:
@@ -357,12 +357,12 @@ class ObjectReplicator(Daemon):
|
|||||||
handoff_partition_deleted = True
|
handoff_partition_deleted = True
|
||||||
except (Exception, Timeout):
|
except (Exception, Timeout):
|
||||||
self.logger.exception(_("Error syncing handoff partition"))
|
self.logger.exception(_("Error syncing handoff partition"))
|
||||||
|
self._add_failure_stats(failure_devs_info)
|
||||||
finally:
|
finally:
|
||||||
target_devs_info = set([(target_dev['replication_ip'],
|
target_devs_info = set([(target_dev['replication_ip'],
|
||||||
target_dev['device'])
|
target_dev['device'])
|
||||||
for target_dev in job['nodes']])
|
for target_dev in job['nodes']])
|
||||||
self.stats['success'] += len(target_devs_info - failure_devs_info)
|
self.stats['success'] += len(target_devs_info - failure_devs_info)
|
||||||
self._add_failure_stats(failure_devs_info)
|
|
||||||
if not handoff_partition_deleted:
|
if not handoff_partition_deleted:
|
||||||
self.handoffs_remaining += 1
|
self.handoffs_remaining += 1
|
||||||
self.partition_times.append(time.time() - begin)
|
self.partition_times.append(time.time() - begin)
|
||||||
@@ -491,10 +491,10 @@ class ObjectReplicator(Daemon):
|
|||||||
self.suffix_count += len(local_hash)
|
self.suffix_count += len(local_hash)
|
||||||
except (Exception, Timeout):
|
except (Exception, Timeout):
|
||||||
failure_devs_info.update(target_devs_info)
|
failure_devs_info.update(target_devs_info)
|
||||||
|
self._add_failure_stats(failure_devs_info)
|
||||||
self.logger.exception(_("Error syncing partition"))
|
self.logger.exception(_("Error syncing partition"))
|
||||||
finally:
|
finally:
|
||||||
self.stats['success'] += len(target_devs_info - failure_devs_info)
|
self.stats['success'] += len(target_devs_info - failure_devs_info)
|
||||||
self._add_failure_stats(failure_devs_info)
|
|
||||||
self.partition_times.append(time.time() - begin)
|
self.partition_times.append(time.time() - begin)
|
||||||
self.logger.timing_since('partition.update.timing', begin)
|
self.logger.timing_since('partition.update.timing', begin)
|
||||||
|
|
||||||
@@ -613,6 +613,11 @@ class ObjectReplicator(Daemon):
|
|||||||
and partition not in override_partitions):
|
and partition not in override_partitions):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if (partition.startswith('auditor_status_') and
|
||||||
|
partition.endswith('.json')):
|
||||||
|
# ignore auditor status files
|
||||||
|
continue
|
||||||
|
|
||||||
part_nodes = None
|
part_nodes = None
|
||||||
try:
|
try:
|
||||||
job_path = join(obj_path, partition)
|
job_path = join(obj_path, partition)
|
||||||
|
@@ -235,7 +235,7 @@ class TestObjectReplicator(unittest.TestCase):
|
|||||||
config,
|
config,
|
||||||
))
|
))
|
||||||
|
|
||||||
def _write_disk_data(self, disk_name):
|
def _write_disk_data(self, disk_name, with_json=False):
|
||||||
os.mkdir(os.path.join(self.devices, disk_name))
|
os.mkdir(os.path.join(self.devices, disk_name))
|
||||||
objects = os.path.join(self.devices, disk_name,
|
objects = os.path.join(self.devices, disk_name,
|
||||||
diskfile.get_data_dir(POLICIES[0]))
|
diskfile.get_data_dir(POLICIES[0]))
|
||||||
@@ -251,6 +251,13 @@ class TestObjectReplicator(unittest.TestCase):
|
|||||||
parts_1[part] = os.path.join(objects_1, part)
|
parts_1[part] = os.path.join(objects_1, part)
|
||||||
os.mkdir(parts_1[part])
|
os.mkdir(parts_1[part])
|
||||||
|
|
||||||
|
if with_json:
|
||||||
|
for json_file in ['auditor_status_ZBF.json',
|
||||||
|
'auditor_status_ALL.json']:
|
||||||
|
for obj_dir in [objects, objects_1]:
|
||||||
|
with open(os.path.join(obj_dir, json_file), 'w'):
|
||||||
|
pass
|
||||||
|
|
||||||
return objects, objects_1, parts, parts_1
|
return objects, objects_1, parts, parts_1
|
||||||
|
|
||||||
def _create_replicator(self):
|
def _create_replicator(self):
|
||||||
@@ -418,6 +425,32 @@ class TestObjectReplicator(unittest.TestCase):
|
|||||||
self.assertEqual(jobs_by_pol_part[part]['path'],
|
self.assertEqual(jobs_by_pol_part[part]['path'],
|
||||||
os.path.join(self.objects_1, part[1:]))
|
os.path.join(self.objects_1, part[1:]))
|
||||||
|
|
||||||
|
def test_collect_jobs_failure_report_with_auditor_stats_json(self):
|
||||||
|
devs = [
|
||||||
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
||||||
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
||||||
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
||||||
|
{'id': 1, 'device': 'sdb', 'zone': 1,
|
||||||
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
||||||
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
||||||
|
{'id': 2, 'device': 'sdc', 'zone': 2,
|
||||||
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
||||||
|
'replication_ip': '127.0.0.1', 'replication_port': 6200},
|
||||||
|
{'id': 3, 'device': 'sdd', 'zone': 3,
|
||||||
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
||||||
|
'replication_ip': '127.0.0.1', 'replication_port': 6200},
|
||||||
|
]
|
||||||
|
objects_sdb, objects_1_sdb, _, _ = \
|
||||||
|
self._write_disk_data('sdb', with_json=True)
|
||||||
|
objects_sdc, objects_1_sdc, _, _ = \
|
||||||
|
self._write_disk_data('sdc', with_json=True)
|
||||||
|
objects_sdd, objects_1_sdd, _, _ = \
|
||||||
|
self._write_disk_data('sdd', with_json=True)
|
||||||
|
_create_test_rings(self.testdir, devs)
|
||||||
|
|
||||||
|
self.replicator.collect_jobs()
|
||||||
|
self.assertEqual(self.replicator.stats['failure'], 0)
|
||||||
|
|
||||||
@mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l)
|
@mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l)
|
||||||
def test_collect_jobs_multi_disk(self, mock_shuffle):
|
def test_collect_jobs_multi_disk(self, mock_shuffle):
|
||||||
devs = [
|
devs = [
|
||||||
@@ -1599,7 +1632,7 @@ class TestObjectReplicator(unittest.TestCase):
|
|||||||
return 2, {'abc': 'def'}
|
return 2, {'abc': 'def'}
|
||||||
|
|
||||||
def fake_exc(tester, *args, **kwargs):
|
def fake_exc(tester, *args, **kwargs):
|
||||||
if 'Error syncing partition' in args[0]:
|
if 'Error syncing partition timeout' in args[0]:
|
||||||
tester.i_failed = True
|
tester.i_failed = True
|
||||||
|
|
||||||
self.i_failed = False
|
self.i_failed = False
|
||||||
|
Reference in New Issue
Block a user