Object replicator now hands off replication only on 507 errors

This commit is contained in:
gholt
2010-10-18 18:52:15 +00:00
committed by Tarmac

View File

@@ -363,23 +363,29 @@ class ObjectReplicator(Daemon):
do_listdir=(self.replication_count % 10) == 0, do_listdir=(self.replication_count % 10) == 0,
reclaim_age=self.reclaim_age) reclaim_age=self.reclaim_age)
self.suffix_hash += hashed self.suffix_hash += hashed
successes = 0 attempts_left = self.object_ring.replica_count
nodes = itertools.chain(job['nodes'], nodes = itertools.chain(job['nodes'],
self.object_ring.get_more_nodes(int(job['partition']))) self.object_ring.get_more_nodes(int(job['partition'])))
while successes < (self.object_ring.replica_count - 1): while attempts_left > 0:
# If this throws StopIterator it will be caught way below
node = next(nodes) node = next(nodes)
attempts_left -= 1
try: try:
with Timeout(60): with Timeout(60):
resp = http_connect(node['ip'], node['port'], resp = http_connect(node['ip'], node['port'],
node['device'], job['partition'], 'REPLICATE', node['device'], job['partition'], 'REPLICATE',
'', headers={'Content-Length': '0'}).getresponse() '', headers={'Content-Length': '0'}).getresponse()
if resp.status == 507:
self.logger.error('%s/%s responded as unmounted' %
(node['ip'], node['device']))
attempts_left += 1
continue
if resp.status != 200: if resp.status != 200:
self.logger.error("Invalid response %s from %s" % self.logger.error("Invalid response %s from %s" %
(resp.status, node['ip'])) (resp.status, node['ip']))
continue continue
remote_hash = pickle.loads(resp.read()) remote_hash = pickle.loads(resp.read())
del resp del resp
successes += 1
suffixes = [suffix for suffix in local_hash suffixes = [suffix for suffix in local_hash
if local_hash[suffix] != if local_hash[suffix] !=
remote_hash.get(suffix, -1)] remote_hash.get(suffix, -1)]