From 4e5889d6ce997e706fbaf0b3134d057ad4d177a1 Mon Sep 17 00:00:00 2001 From: gholt Date: Wed, 14 Nov 2012 02:53:14 +0000 Subject: [PATCH] Refactor db_replicator's roundrobin_datadirs roundrobin_datadirs was returning any .db file at any depth in the accounts/containers structure. Since xfs corruption can cause such files to appear in odd places at times (only happened on one drive of ours so far, but still...), I've refactored this function to only return .db files at the proper depth. Change-Id: Id06ef6584941f8a572e286f69dfa3d96fe451355 --- swift/common/db_replicator.py | 68 ++++++----- test/unit/common/test_db_replicator.py | 155 +++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 26 deletions(-) diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index 328e6638f6..ed16177ed5 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -65,6 +65,47 @@ def quarantine_db(object_file, server_type): renamer(object_dir, quarantine_dir) +def roundrobin_datadirs(datadirs): + """ + Generator to walk the data dirs in a round robin manner, evenly + hitting each device on the system, and yielding any .db files + found (in their proper places). The partitions within each data + dir are walked randomly, however. + + :param datadirs: a list of (path, node_id) to walk + :returns: A generator of (partition, path_to_db_file, node_id) + """ + + def walk_datadir(datadir, node_id): + partitions = os.listdir(datadir) + random.shuffle(partitions) + for partition in partitions: + part_dir = os.path.join(datadir, partition) + if not os.path.isdir(part_dir): + continue + suffixes = os.listdir(part_dir) + for suffix in suffixes: + suff_dir = os.path.join(part_dir, suffix) + if not os.path.isdir(suff_dir): + continue + hashes = os.listdir(suff_dir) + for hsh in hashes: + hash_dir = os.path.join(suff_dir, hsh) + if not os.path.isdir(hash_dir): + continue + object_file = os.path.join(hash_dir, hsh + '.db') + if os.path.exists(object_file): + yield (partition, object_file, node_id) + + its = [walk_datadir(datadir, node_id) for datadir, node_id in datadirs] + while its: + for it in its: + try: + yield it.next() + except StopIteration: + its.remove(it) + + class ReplConnection(BufferedHTTPConnection): """ Helper to simplify REPLICATEing to a remote server. @@ -458,31 +499,6 @@ class Replicator(Daemon): def report_up_to_date(self, full_info): return True - def roundrobin_datadirs(self, datadirs): - """ - Generator to walk the data dirs in a round robin manner, evenly - hitting each device on the system. - - :param datadirs: a list of paths to walk - """ - - def walk_datadir(datadir, node_id): - partitions = os.listdir(datadir) - random.shuffle(partitions) - for partition in partitions: - part_dir = os.path.join(datadir, partition) - for root, dirs, files in os.walk(part_dir, topdown=False): - for fname in (f for f in files if f.endswith('.db')): - object_file = os.path.join(root, fname) - yield (partition, object_file, node_id) - its = [walk_datadir(datadir, node_id) for datadir, node_id in datadirs] - while its: - for it in its: - try: - yield it.next() - except StopIteration: - its.remove(it) - def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() @@ -505,7 +521,7 @@ class Replicator(Daemon): if os.path.isdir(datadir): dirs.append((datadir, node['id'])) self.logger.info(_('Beginning replication run')) - for part, object_file, node_id in self.roundrobin_datadirs(dirs): + for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index 33b4cc47aa..2e93205b67 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -420,6 +420,161 @@ class TestDBReplicator(unittest.TestCase): rpc.merge_syncs(fake_broker, args) self.assertEquals(fake_broker.args, (args[0],)) + def test_roundrobin_datadirs(self): + listdir_calls = [] + isdir_calls = [] + exists_calls = [] + shuffle_calls = [] + + def _listdir(path): + listdir_calls.append(path) + if not path.startswith('/srv/node/sda/containers') and \ + not path.startswith('/srv/node/sdb/containers'): + return [] + path = path[len('/srv/node/sdx/containers'):] + if path == '': + return ['123', '456', '789'] # 456 will pretend to be a file + elif path == '/123': + return ['abc', 'def.db'] # def.db will pretend to be a file + elif path == '/123/abc': + # 11111111111111111111111111111abc will pretend to be a file + return ['00000000000000000000000000000abc', + '11111111111111111111111111111abc'] + elif path == '/123/abc/00000000000000000000000000000abc': + return ['00000000000000000000000000000abc.db', + # This other.db isn't in the right place, so should be + # ignored later. + '000000000000000000000000000other.db', + 'weird1'] # weird1 will pretend to be a dir, if asked + elif path == '/789': + return ['ghi', 'jkl'] # jkl will pretend to be a file + elif path == '/789/ghi': + # 33333333333333333333333333333ghi will pretend to be a file + return ['22222222222222222222222222222ghi', + '33333333333333333333333333333ghi'] + elif path == '/789/ghi/22222222222222222222222222222ghi': + return ['22222222222222222222222222222ghi.db', + 'weird2'] # weird2 will pretend to be a dir, if asked + return [] + + def _isdir(path): + isdir_calls.append(path) + if not path.startswith('/srv/node/sda/containers') and \ + not path.startswith('/srv/node/sdb/containers'): + return False + path = path[len('/srv/node/sdx/containers'):] + if path in ('/123', '/123/abc', + '/123/abc/00000000000000000000000000000abc', + '/123/abc/00000000000000000000000000000abc/weird1', + '/789', '/789/ghi', + '/789/ghi/22222222222222222222222222222ghi', + '/789/ghi/22222222222222222222222222222ghi/weird2'): + return True + return False + + def _exists(arg): + exists_calls.append(arg) + return True + + def _shuffle(arg): + shuffle_calls.append(arg) + + orig_listdir = db_replicator.os.listdir + orig_isdir = db_replicator.os.path.isdir + orig_exists = db_replicator.os.path.exists + orig_shuffle = db_replicator.random.shuffle + try: + db_replicator.os.listdir = _listdir + db_replicator.os.path.isdir = _isdir + db_replicator.os.path.exists = _exists + db_replicator.random.shuffle = _shuffle + datadirs = [('/srv/node/sda/containers', 1), + ('/srv/node/sdb/containers', 2)] + results = list(db_replicator.roundrobin_datadirs(datadirs)) + # The results show that the .db files are returned, the devices + # interleaved. + self.assertEquals(results, [ + ('123', '/srv/node/sda/containers/123/abc/' + '00000000000000000000000000000abc/' + '00000000000000000000000000000abc.db', 1), + ('123', '/srv/node/sdb/containers/123/abc/' + '00000000000000000000000000000abc/' + '00000000000000000000000000000abc.db', 2), + ('789', '/srv/node/sda/containers/789/ghi/' + '22222222222222222222222222222ghi/' + '22222222222222222222222222222ghi.db', 1), + ('789', '/srv/node/sdb/containers/789/ghi/' + '22222222222222222222222222222ghi/' + '22222222222222222222222222222ghi.db', 2)]) + # The listdir calls show that we only listdir the dirs + self.assertEquals(listdir_calls, [ + '/srv/node/sda/containers', + '/srv/node/sda/containers/123', + '/srv/node/sda/containers/123/abc', + '/srv/node/sdb/containers', + '/srv/node/sdb/containers/123', + '/srv/node/sdb/containers/123/abc', + '/srv/node/sda/containers/789', + '/srv/node/sda/containers/789/ghi', + '/srv/node/sdb/containers/789', + '/srv/node/sdb/containers/789/ghi']) + # The isdir calls show that we did ask about the things pretending + # to be files at various levels. + self.assertEquals(isdir_calls, [ + '/srv/node/sda/containers/123', + '/srv/node/sda/containers/123/abc', + ('/srv/node/sda/containers/123/abc/' + '00000000000000000000000000000abc'), + '/srv/node/sdb/containers/123', + '/srv/node/sdb/containers/123/abc', + ('/srv/node/sdb/containers/123/abc/' + '00000000000000000000000000000abc'), + ('/srv/node/sda/containers/123/abc/' + '11111111111111111111111111111abc'), + '/srv/node/sda/containers/123/def.db', + '/srv/node/sda/containers/456', + '/srv/node/sda/containers/789', + '/srv/node/sda/containers/789/ghi', + ('/srv/node/sda/containers/789/ghi/' + '22222222222222222222222222222ghi'), + ('/srv/node/sdb/containers/123/abc/' + '11111111111111111111111111111abc'), + '/srv/node/sdb/containers/123/def.db', + '/srv/node/sdb/containers/456', + '/srv/node/sdb/containers/789', + '/srv/node/sdb/containers/789/ghi', + ('/srv/node/sdb/containers/789/ghi/' + '22222222222222222222222222222ghi'), + ('/srv/node/sda/containers/789/ghi/' + '33333333333333333333333333333ghi'), + '/srv/node/sda/containers/789/jkl', + ('/srv/node/sdb/containers/789/ghi/' + '33333333333333333333333333333ghi'), + '/srv/node/sdb/containers/789/jkl']) + # The exists calls are the .db files we looked for as we walked the + # structure. + self.assertEquals(exists_calls, [ + ('/srv/node/sda/containers/123/abc/' + '00000000000000000000000000000abc/' + '00000000000000000000000000000abc.db'), + ('/srv/node/sdb/containers/123/abc/' + '00000000000000000000000000000abc/' + '00000000000000000000000000000abc.db'), + ('/srv/node/sda/containers/789/ghi/' + '22222222222222222222222222222ghi/' + '22222222222222222222222222222ghi.db'), + ('/srv/node/sdb/containers/789/ghi/' + '22222222222222222222222222222ghi/' + '22222222222222222222222222222ghi.db')]) + # Shows that we called shuffle twice, once for each device. + self.assertEquals( + shuffle_calls, [['123', '456', '789'], ['123', '456', '789']]) + finally: + db_replicator.os.listdir = orig_listdir + db_replicator.os.path.isdir = orig_isdir + db_replicator.os.path.exists = orig_exists + db_replicator.random.shuffle = orig_shuffle + if __name__ == '__main__': unittest.main()