From d24e280bf4bccb4843337fabcb5f0e36f85cad52 Mon Sep 17 00:00:00 2001 From: David Goetz Date: Mon, 10 Sep 2012 11:39:10 -0700 Subject: [PATCH] obj replicator speed up Change-Id: If02b573353dedea9c2368ce4733fe97599229b2e --- swift/common/exceptions.py | 4 ++++ swift/obj/replicator.py | 26 +++++++++++++++++--------- test/unit/obj/test_replicator.py | 11 +++++++++++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/swift/common/exceptions.py b/swift/common/exceptions.py index a0df98c097..d377009698 100644 --- a/swift/common/exceptions.py +++ b/swift/common/exceptions.py @@ -46,6 +46,10 @@ class DiskFileNotExist(SwiftException): pass +class PathNotDir(OSError): + pass + + class AuthException(SwiftException): pass diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index d78e7457bd..2abf761dbd 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -37,6 +37,7 @@ from swift.common.utils import whataremyips, unlink_older_than, lock_path, \ from swift.common.bufferedhttp import http_connect from swift.common.daemon import Daemon from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE +from swift.common.exceptions import PathNotDir hubs.use_hub('poll') @@ -75,9 +76,17 @@ def hash_suffix(path, reclaim_age): Performs reclamation and returns an md5 of all (remaining) files. :param reclaim_age: age in seconds at which to remove tombstones + :raises PathNotDir: if given path is not a valid directory + :raises OSError: for non-ENOTDIR errors """ md5 = hashlib.md5() - for hsh in sorted(os.listdir(path)): + try: + path_contents = sorted(os.listdir(path)) + except OSError, err: + if err.errno == errno.ENOTDIR: + raise PathNotDir() + raise + for hsh in path_contents: hsh_path = join(path, hsh) try: files = os.listdir(hsh_path) @@ -177,21 +186,20 @@ def get_hashes(partition_dir, recalculate=[], do_listdir=False, do_listdir = True if do_listdir: for suff in os.listdir(partition_dir): - if len(suff) == 3 and isdir(join(partition_dir, suff)): + if len(suff) == 3: hashes.setdefault(suff, None) modified = True hashes.update((hash_, None) for hash_ in recalculate) for suffix, hash_ in hashes.items(): if not hash_: suffix_dir = join(partition_dir, suffix) - if isdir(suffix_dir): - try: - hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) - hashed += 1 - except OSError: - logging.exception(_('Error hashing suffix')) - else: + try: + hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) + hashed += 1 + except PathNotDir: del hashes[suffix] + except OSError: + logging.exception(_('Error hashing suffix')) modified = True if modified: with lock_path(partition_dir): diff --git a/test/unit/obj/test_replicator.py b/test/unit/obj/test_replicator.py index 224017cf3c..136d35ef7b 100644 --- a/test/unit/obj/test_replicator.py +++ b/test/unit/obj/test_replicator.py @@ -210,6 +210,17 @@ class TestObjectReplicator(unittest.TestCase): self.assertEquals(hashed, 1) self.assert_('a83' in hashes) + def test_get_hashes_bad_dir(self): + df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) + mkdirs(df.datadir) + with open(os.path.join(self.objects, '0', 'bad'), 'wb') as f: + f.write('1234567890') + part = os.path.join(self.objects, '0') + hashed, hashes = object_replicator.get_hashes(part) + self.assertEquals(hashed, 1) + self.assert_('a83' in hashes) + self.assert_('bad' not in hashes) + def test_get_hashes_unmodified(self): df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) mkdirs(df.datadir)