obj replicator speed up

Change-Id: If02b573353dedea9c2368ce4733fe97599229b2e
This commit is contained in:
David Goetz 2012-09-10 11:39:10 -07:00
parent 3482eb26f9
commit d24e280bf4
3 changed files with 32 additions and 9 deletions

View File

@ -46,6 +46,10 @@ class DiskFileNotExist(SwiftException):
pass pass
class PathNotDir(OSError):
pass
class AuthException(SwiftException): class AuthException(SwiftException):
pass pass

View File

@ -37,6 +37,7 @@ from swift.common.utils import whataremyips, unlink_older_than, lock_path, \
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
from swift.common.exceptions import PathNotDir
hubs.use_hub('poll') hubs.use_hub('poll')
@ -75,9 +76,17 @@ def hash_suffix(path, reclaim_age):
Performs reclamation and returns an md5 of all (remaining) files. Performs reclamation and returns an md5 of all (remaining) files.
:param reclaim_age: age in seconds at which to remove tombstones :param reclaim_age: age in seconds at which to remove tombstones
:raises PathNotDir: if given path is not a valid directory
:raises OSError: for non-ENOTDIR errors
""" """
md5 = hashlib.md5() md5 = hashlib.md5()
for hsh in sorted(os.listdir(path)): try:
path_contents = sorted(os.listdir(path))
except OSError, err:
if err.errno == errno.ENOTDIR:
raise PathNotDir()
raise
for hsh in path_contents:
hsh_path = join(path, hsh) hsh_path = join(path, hsh)
try: try:
files = os.listdir(hsh_path) files = os.listdir(hsh_path)
@ -177,21 +186,20 @@ def get_hashes(partition_dir, recalculate=[], do_listdir=False,
do_listdir = True do_listdir = True
if do_listdir: if do_listdir:
for suff in os.listdir(partition_dir): for suff in os.listdir(partition_dir):
if len(suff) == 3 and isdir(join(partition_dir, suff)): if len(suff) == 3:
hashes.setdefault(suff, None) hashes.setdefault(suff, None)
modified = True modified = True
hashes.update((hash_, None) for hash_ in recalculate) hashes.update((hash_, None) for hash_ in recalculate)
for suffix, hash_ in hashes.items(): for suffix, hash_ in hashes.items():
if not hash_: if not hash_:
suffix_dir = join(partition_dir, suffix) suffix_dir = join(partition_dir, suffix)
if isdir(suffix_dir):
try: try:
hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
hashed += 1 hashed += 1
except PathNotDir:
del hashes[suffix]
except OSError: except OSError:
logging.exception(_('Error hashing suffix')) logging.exception(_('Error hashing suffix'))
else:
del hashes[suffix]
modified = True modified = True
if modified: if modified:
with lock_path(partition_dir): with lock_path(partition_dir):

View File

@ -210,6 +210,17 @@ class TestObjectReplicator(unittest.TestCase):
self.assertEquals(hashed, 1) self.assertEquals(hashed, 1)
self.assert_('a83' in hashes) self.assert_('a83' in hashes)
def test_get_hashes_bad_dir(self):
df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger())
mkdirs(df.datadir)
with open(os.path.join(self.objects, '0', 'bad'), 'wb') as f:
f.write('1234567890')
part = os.path.join(self.objects, '0')
hashed, hashes = object_replicator.get_hashes(part)
self.assertEquals(hashed, 1)
self.assert_('a83' in hashes)
self.assert_('bad' not in hashes)
def test_get_hashes_unmodified(self): def test_get_hashes_unmodified(self):
df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger()) df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger())
mkdirs(df.datadir) mkdirs(df.datadir)