From 7c484d9237c4cc7f42dfa72bb251f586bfc57f45 Mon Sep 17 00:00:00 2001 From: Michael Barton Date: Thu, 14 Oct 2010 19:23:26 +0000 Subject: [PATCH 1/3] object replicator only handoff for unmounted drives --- swift/obj/replicator.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index 581bf7de2b..bb0ac20852 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -363,23 +363,25 @@ class ObjectReplicator(Daemon): do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed - successes = 0 + not_unmounted = 0 nodes = itertools.chain(job['nodes'], self.object_ring.get_more_nodes(int(job['partition']))) - while successes < (self.object_ring.replica_count - 1): + while not_unmounted < (self.object_ring.replica_count - 1): node = next(nodes) + not_unmounted += 1 try: with Timeout(60): resp = http_connect(node['ip'], node['port'], node['device'], job['partition'], 'REPLICATE', '', headers={'Content-Length': '0'}).getresponse() + if resp.status == 507: + not_unmounted -= 1 if resp.status != 200: self.logger.error("Invalid response %s from %s" % (resp.status, node['ip'])) continue remote_hash = pickle.loads(resp.read()) del resp - successes += 1 suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] From 6bf591ee14b4efef39e5681f6763b57919624265 Mon Sep 17 00:00:00 2001 From: Michael Barton Date: Sun, 17 Oct 2010 01:20:22 +0000 Subject: [PATCH 2/3] clarify code by separating accumulator into two counters --- swift/obj/replicator.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index bb0ac20852..47d032561c 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -363,22 +363,23 @@ class ObjectReplicator(Daemon): do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed - not_unmounted = 0 + unmounted = 0 + attempted = 0 nodes = itertools.chain(job['nodes'], self.object_ring.get_more_nodes(int(job['partition']))) - while not_unmounted < (self.object_ring.replica_count - 1): + while (attempted - unmounted) < (self.object_ring.replica_count - 1): node = next(nodes) - not_unmounted += 1 + attempted += 1 try: with Timeout(60): resp = http_connect(node['ip'], node['port'], node['device'], job['partition'], 'REPLICATE', '', headers={'Content-Length': '0'}).getresponse() - if resp.status == 507: - not_unmounted -= 1 if resp.status != 200: self.logger.error("Invalid response %s from %s" % (resp.status, node['ip'])) + if resp.status == 507: + unmounted += 1 continue remote_hash = pickle.loads(resp.read()) del resp From c13c7f560780efd60d77c91e0f76b4348e2d31be Mon Sep 17 00:00:00 2001 From: gholt Date: Mon, 18 Oct 2010 08:49:33 -0700 Subject: [PATCH 3/3] Changed logic to attempts_left; added comment for uncommon iterator usage. --- swift/obj/replicator.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index 47d032561c..487724fc09 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -363,23 +363,26 @@ class ObjectReplicator(Daemon): do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed - unmounted = 0 - attempted = 0 + attempts_left = self.object_ring.replica_count nodes = itertools.chain(job['nodes'], self.object_ring.get_more_nodes(int(job['partition']))) - while (attempted - unmounted) < (self.object_ring.replica_count - 1): + while attempts_left > 0: + # If this throws StopIterator it will be caught way below node = next(nodes) - attempted += 1 + attempts_left -= 1 try: with Timeout(60): resp = http_connect(node['ip'], node['port'], node['device'], job['partition'], 'REPLICATE', '', headers={'Content-Length': '0'}).getresponse() + if resp.status == 507: + self.logger.error('%s/%s responded as unmounted' % + (node['ip'], node['device'])) + attempts_left += 1 + continue if resp.status != 200: self.logger.error("Invalid response %s from %s" % (resp.status, node['ip'])) - if resp.status == 507: - unmounted += 1 continue remote_hash = pickle.loads(resp.read()) del resp