Remove per-device reconstruction stats

Now that we're shuffling parts before going through them, those stats no
longer make sense -- device completion would always be 100%.

Also, always use delete_partition for cleanup, so we have one place to
make improvements. This means we'll properly clean up non-numeric
directories.

Also also, put more I/O in the tpool in delete_partition.

Change-Id: Ie06bb16c130d46ccf887c8fcb252b8d018072d68
Related-Change: I69e4c4baee64fd2192cbf5836b0803db1cc71705
This commit is contained in:
Tim Burke
2017-01-31 21:46:03 +00:00
parent 2f0ab78f9f
commit 8973ceb31a
2 changed files with 34 additions and 36 deletions

View File

@@ -350,32 +350,22 @@ class ObjectReconstructor(Daemon):
""" """
Logs various stats for the currently running reconstruction pass. Logs various stats for the currently running reconstruction pass.
""" """
if (self.device_count and self.part_count and if (self.device_count and self.part_count):
self.reconstruction_device_count):
elapsed = (time.time() - self.start) or 0.000001 elapsed = (time.time() - self.start) or 0.000001
rate = self.reconstruction_part_count / elapsed rate = self.reconstruction_part_count / elapsed
total_part_count = (1.0 * self.part_count *
self.device_count /
self.reconstruction_device_count)
self.logger.info( self.logger.info(
_("%(reconstructed)d/%(total)d (%(percentage).2f%%)" _("%(reconstructed)d/%(total)d (%(percentage).2f%%)"
" partitions of %(device)d/%(dtotal)d " " partitions reconstructed in %(time).2fs "
"(%(dpercentage).2f%%) devices"
" reconstructed in %(time).2fs "
"(%(rate).2f/sec, %(remaining)s remaining)"), "(%(rate).2f/sec, %(remaining)s remaining)"),
{'reconstructed': self.reconstruction_part_count, {'reconstructed': self.reconstruction_part_count,
'total': self.part_count, 'total': self.part_count,
'percentage': 'percentage':
self.reconstruction_part_count * 100.0 / self.part_count, self.reconstruction_part_count * 100.0 / self.part_count,
'device': self.reconstruction_device_count,
'dtotal': self.device_count,
'dpercentage':
self.reconstruction_device_count * 100.0 / self.device_count,
'time': time.time() - self.start, 'rate': rate, 'time': time.time() - self.start, 'rate': rate,
'remaining': '%d%s' % 'remaining': '%d%s' %
compute_eta(self.start, compute_eta(self.start,
self.reconstruction_part_count, self.reconstruction_part_count,
total_part_count)}) self.part_count)})
if self.suffix_count and self.partition_times: if self.suffix_count and self.partition_times:
self.logger.info( self.logger.info(
@@ -833,7 +823,6 @@ class ObjectReconstructor(Daemon):
for policy, local_devices in policy2devices.items(): for policy, local_devices in policy2devices.items():
df_mgr = self._df_router[policy] df_mgr = self._df_router[policy]
for local_dev in local_devices: for local_dev in local_devices:
self.reconstruction_device_count += 1
dev_path = df_mgr.get_dev_path(local_dev['device']) dev_path = df_mgr.get_dev_path(local_dev['device'])
if not dev_path: if not dev_path:
self.logger.warning(_('%s is not mounted'), self.logger.warning(_('%s is not mounted'),
@@ -867,7 +856,7 @@ class ObjectReconstructor(Daemon):
if not partition.isdigit(): if not partition.isdigit():
self.logger.warning( self.logger.warning(
'Unexpected entity in data dir: %r' % part_path) 'Unexpected entity in data dir: %r' % part_path)
remove_file(part_path) self.delete_partition(part_path)
self.reconstruction_part_count += 1 self.reconstruction_part_count += 1
continue continue
partition = int(partition) partition = int(partition)
@@ -907,14 +896,16 @@ class ObjectReconstructor(Daemon):
self.suffix_hash = 0 self.suffix_hash = 0
self.reconstruction_count = 0 self.reconstruction_count = 0
self.reconstruction_part_count = 0 self.reconstruction_part_count = 0
self.reconstruction_device_count = 0
self.last_reconstruction_count = -1 self.last_reconstruction_count = -1
def delete_partition(self, path): def delete_partition(self, path):
self.logger.info(_("Removing partition: %s"), path) def kill_it(path):
tpool.execute(shutil.rmtree, path, ignore_errors=True) shutil.rmtree(path, ignore_errors=True)
remove_file(path) remove_file(path)
self.logger.info(_("Removing partition: %s"), path)
tpool.execute(kill_it, path)
def reconstruct(self, **kwargs): def reconstruct(self, **kwargs):
"""Run a reconstruction pass""" """Run a reconstruction pass"""
self._reset_stats() self._reset_stats()

View File

@@ -620,9 +620,9 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self._run_once(18, extra_devices) self._run_once(18, extra_devices)
stats_lines = set() stats_lines = set()
for line in self.logger.get_lines_for_level('info'): for line in self.logger.get_lines_for_level('info'):
if 'devices reconstructed in' not in line: if 'reconstructed in' not in line:
continue continue
stat_line = line.split('of', 1)[0].strip() stat_line = line.split('reconstructed', 1)[0].strip()
stats_lines.add(stat_line) stats_lines.add(stat_line)
acceptable = set([ acceptable = set([
'3/8 (37.50%) partitions', '3/8 (37.50%) partitions',
@@ -633,7 +633,6 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self.assertEqual(matched, acceptable, self.assertEqual(matched, acceptable,
'missing some expected acceptable:\n%s' % ( 'missing some expected acceptable:\n%s' % (
'\n'.join(sorted(acceptable - matched)))) '\n'.join(sorted(acceptable - matched))))
self.assertEqual(self.reconstructor.reconstruction_device_count, 4)
self.assertEqual(self.reconstructor.reconstruction_part_count, 8) self.assertEqual(self.reconstructor.reconstruction_part_count, 8)
self.assertEqual(self.reconstructor.part_count, 8) self.assertEqual(self.reconstructor.part_count, 8)
@@ -647,9 +646,9 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self._run_once(2, extra_devices, 'sdc1') self._run_once(2, extra_devices, 'sdc1')
stats_lines = set() stats_lines = set()
for line in self.logger.get_lines_for_level('info'): for line in self.logger.get_lines_for_level('info'):
if 'devices reconstructed in' not in line: if 'reconstructed in' not in line:
continue continue
stat_line = line.split('of', 1)[0].strip() stat_line = line.split('reconstructed', 1)[0].strip()
stats_lines.add(stat_line) stats_lines.add(stat_line)
acceptable = set([ acceptable = set([
'1/1 (100.00%) partitions', '1/1 (100.00%) partitions',
@@ -658,7 +657,6 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self.assertEqual(matched, acceptable, self.assertEqual(matched, acceptable,
'missing some expected acceptable:\n%s' % ( 'missing some expected acceptable:\n%s' % (
'\n'.join(sorted(acceptable - matched)))) '\n'.join(sorted(acceptable - matched))))
self.assertEqual(self.reconstructor.reconstruction_device_count, 1)
self.assertEqual(self.reconstructor.reconstruction_part_count, 1) self.assertEqual(self.reconstructor.reconstruction_part_count, 1)
self.assertEqual(self.reconstructor.part_count, 1) self.assertEqual(self.reconstructor.part_count, 1)
@@ -1490,25 +1488,34 @@ class TestObjectReconstructor(unittest.TestCase):
self.assertTrue('Unable to list partitions' in line) self.assertTrue('Unable to list partitions' in line)
self.assertTrue(datadir_path in line) self.assertTrue(datadir_path in line)
def test_collect_parts_removes_non_partition_files(self): def test_reconstruct_removes_non_partition_files(self):
# create some junk next to partitions # create some junk next to partitions
datadir_path = os.path.join(self.devices, self.local_dev['device'], datadir_path = os.path.join(self.devices, self.local_dev['device'],
diskfile.get_data_dir(self.policy)) diskfile.get_data_dir(self.policy))
num_parts = 3 num_parts = 3
for part in range(num_parts): for part in range(num_parts):
utils.mkdirs(os.path.join(datadir_path, str(part))) utils.mkdirs(os.path.join(datadir_path, str(part)))
junk_file = os.path.join(datadir_path, 'junk')
# Add some clearly non-partition dentries
utils.mkdirs(os.path.join(datadir_path, 'not/a/partition'))
for junk_name in ('junk', '1234'):
junk_file = os.path.join(datadir_path, junk_name)
with open(junk_file, 'w') as f: with open(junk_file, 'w') as f:
f.write('junk') f.write('junk')
with mock.patch('swift.obj.reconstructor.whataremyips', with mock.patch('swift.obj.reconstructor.whataremyips',
return_value=[self.ip]): return_value=[self.ip]), \
part_infos = list(self.reconstructor.collect_parts()) mock.patch('swift.obj.reconstructor.'
# the file is not included in the part_infos map 'ObjectReconstructor.process_job'):
self.assertEqual(sorted(p['part_path'] for p in part_infos), self.reconstructor.reconstruct()
sorted([os.path.join(datadir_path, str(i))
for i in range(num_parts)])) # all the bad gets cleaned up
# and gets cleaned up errors = []
self.assertFalse(os.path.exists(junk_file)) for junk_name in ('junk', '1234', 'not'):
junk_file = os.path.join(datadir_path, junk_name)
if os.path.exists(junk_file):
errors.append('%s still exists!' % junk_file)
self.assertFalse(errors)
def test_collect_parts_overrides(self): def test_collect_parts_overrides(self):
# setup multiple devices, with multiple parts # setup multiple devices, with multiple parts