From b5509b1bee31e275ff4ff2f26f1a85aa1ec1e22f Mon Sep 17 00:00:00 2001 From: gholt Date: Thu, 25 Oct 2012 19:17:57 +0000 Subject: [PATCH] Db reclamation should remove empty suffix dirs When a db is reclaimed it removes the hash dir the db files are in, but it does not try to remove the parent suffix dir though it might be empty now. This eventually leads to a bunch of empty suffix dirs lying around. This patch fixes that by attempting to remove the parent suffix dir after a hash dir reclamation. Here's a quick script to see how bad a given drive might be: import os, os.path, sys if len(sys.argv) != 2: sys.exit('%s ' % sys.argv[0]) in_use = 0 empty = 0 containers = os.path.join(sys.argv[1], 'containers') for p in os.listdir(containers): partition = os.path.join(containers, p) for s in os.listdir(partition): suffix = os.path.join(partition, s) if os.listdir(suffix): in_use += 1 else: empty += 1 print in_use, 'in use,', empty, 'empty,', '%.02f%%' % ( 100.0 * empty / (in_use + empty)), 'empty' And here's a quick script to clean up a drive: NOTE THAT I HAVEN'T ACTUALLY RUN THIS ON A LIVE NODE YET! import errno, os, os.path, sys if len(sys.argv) != 2: sys.exit('%s ' % sys.argv[0]) containers = os.path.join(sys.argv[1], 'containers') for p in os.listdir(containers): partition = os.path.join(containers, p) for s in os.listdir(partition): suffix = os.path.join(partition, s) try: os.rmdir(suffix) except OSError, err: if err.errno not in (errno.ENOENT, errno.ENOTEMPTY): print err Change-Id: I2e6463a4cd40597fc236ebe3e73b4b31347f2309 --- swift/common/db_replicator.py | 10 ++++- test/unit/common/test_db_replicator.py | 55 ++++++++++++++++++++------ 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index 9d3cb2dcfc..6f73f8470b 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -423,8 +423,16 @@ class Replicator(Daemon): self.logger.timing_since('timing', start_time) def delete_db(self, object_file): + hash_dir = os.path.dirname(object_file) + suf_dir = os.path.dirname(hash_dir) with lock_parent_directory(object_file): - shutil.rmtree(os.path.dirname(object_file), True) + shutil.rmtree(hash_dir, True) + try: + os.rmdir(suf_dir) + except OSError, err: + if err.errno not in (errno.ENOENT, errno.ENOTEMPTY): + self.logger.exception( + _('ERROR while trying to clean up %s') % suf_dir) self.stats['remove'] += 1 device_name = self.extract_device(object_file) self.logger.increment('removes.' + device_name) diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index 0f0d58270c..33b4cc47aa 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -18,6 +18,7 @@ from contextlib import contextmanager import os import logging import errno +from shutil import rmtree from tempfile import mkdtemp, NamedTemporaryFile from swift.common import db_replicator @@ -332,20 +333,52 @@ class TestDBReplicator(unittest.TestCase): replicator.logger = FakeLogger() temp_dir = mkdtemp() - temp_file = NamedTemporaryFile(dir=temp_dir, delete=False) + try: + temp_suf_dir = os.path.join(temp_dir, '16e') + os.mkdir(temp_suf_dir) + temp_hash_dir = os.path.join(temp_suf_dir, + '166e33924a08ede4204871468c11e16e') + os.mkdir(temp_hash_dir) + temp_file = NamedTemporaryFile(dir=temp_hash_dir, delete=False) + temp_hash_dir2 = os.path.join(temp_suf_dir, + '266e33924a08ede4204871468c11e16e') + os.mkdir(temp_hash_dir2) + temp_file2 = NamedTemporaryFile(dir=temp_hash_dir2, delete=False) - # sanity-checks - self.assertTrue(os.path.exists(temp_dir)) - self.assertTrue(os.path.exists(temp_file.name)) - self.assertEqual(0, replicator.stats['remove']) + # sanity-checks + self.assertTrue(os.path.exists(temp_dir)) + self.assertTrue(os.path.exists(temp_suf_dir)) + self.assertTrue(os.path.exists(temp_hash_dir)) + self.assertTrue(os.path.exists(temp_file.name)) + self.assertTrue(os.path.exists(temp_hash_dir2)) + self.assertTrue(os.path.exists(temp_file2.name)) + self.assertEqual(0, replicator.stats['remove']) - replicator.delete_db(temp_file.name) + replicator.delete_db(temp_file.name) - self.assertFalse(os.path.exists(temp_dir)) - self.assertFalse(os.path.exists(temp_file.name)) - self.assertEqual([(('removes.some_device',), {})], - replicator.logger.log_dict['increment']) - self.assertEqual(1, replicator.stats['remove']) + self.assertTrue(os.path.exists(temp_dir)) + self.assertTrue(os.path.exists(temp_suf_dir)) + self.assertFalse(os.path.exists(temp_hash_dir)) + self.assertFalse(os.path.exists(temp_file.name)) + self.assertTrue(os.path.exists(temp_hash_dir2)) + self.assertTrue(os.path.exists(temp_file2.name)) + self.assertEqual([(('removes.some_device',), {})], + replicator.logger.log_dict['increment']) + self.assertEqual(1, replicator.stats['remove']) + + replicator.delete_db(temp_file2.name) + + self.assertTrue(os.path.exists(temp_dir)) + self.assertFalse(os.path.exists(temp_suf_dir)) + self.assertFalse(os.path.exists(temp_hash_dir)) + self.assertFalse(os.path.exists(temp_file.name)) + self.assertFalse(os.path.exists(temp_hash_dir2)) + self.assertFalse(os.path.exists(temp_file2.name)) + self.assertEqual([(('removes.some_device',), {})] * 2, + replicator.logger.log_dict['increment']) + self.assertEqual(2, replicator.stats['remove']) + finally: + rmtree(temp_dir) def test_extract_device(self): replicator = TestReplicator({'devices': '/some/root'})