Do not revert fragments to handoffs

We're already a handoff - just wait until we can ship it to the right primary location. If we timeout talking to a couple of nodes (or more likely get rejected for connection limits because of contention during a rebalance) we can actually end up making *more* work if we move the part to another node. I've seen clusters get stuck on rebalance just passing parts around handoffs for *days*. Known-Issues: If we see a 507 from a primary and we're not in the handoff list (we're an old primary post rebalance) it'd probably be not so terrible to try to revert it to the first handoff if it's not already holding a part. But that's more work and sounds more like lp bug #1510342 Closes-Bug: #1653169 Change-Id: Ie351d8342fc8e589b143f981e95ce74e70e52784
2017-01-25 11:40:54 -08:00
parent 2914e04493
commit eadb01b8af
2 changed files with 5 additions and 22 deletions
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@@ -643,21 +643,9 @@ class ObjectReconstructor(Daemon):
        """
        self.logger.increment(
            'partition.delete.count.%s' % (job['local_dev']['device'],))
-        # we'd desperately like to push this partition back to it's
-        # primary location, but if that node is down, the next best thing
-        # is one of the handoff locations - which *might* be us already!
-        dest_nodes = itertools.chain(
-            job['sync_to'],
-            job['policy'].object_ring.get_more_nodes(job['partition']),
-        )
        syncd_with = 0
        reverted_objs = {}
-        for node in dest_nodes:
-            if syncd_with >= len(job['sync_to']):
-                break
-            if node['id'] == job['local_dev']['id']:
-                # this is as good a place as any for this data for now
-                break
+        for node in job['sync_to']:
            success, in_sync_objs = ssync_sender(
                self, node, job, job['suffixes'])()
            self.rehash_remote(node, job, job['suffixes'])
--- a/test/unit/obj/test_reconstructor.py
+++ b/test/unit/obj/test_reconstructor.py
@@ -2287,14 +2287,13 @@ class TestObjectReconstructor(unittest.TestCase):
        self.assertEqual(call['node'], sync_to[0])
        self.assertEqual(set(call['suffixes']), set(['123', 'abc']))

-    def test_process_job_revert_to_handoff(self):
+    def test_process_job_will_not_revert_to_handoff(self):
        replicas = self.policy.object_ring.replicas
        frag_index = random.randint(0, replicas - 1)
        sync_to = [random.choice([n for n in self.policy.object_ring.devs
                                  if n != self.local_dev])]
        sync_to[0]['index'] = frag_index
        partition = 0
-        handoff = next(self.policy.object_ring.get_more_nodes(partition))

        stub_hashes = {
            '123': {frag_index: 'hash', None: 'hash'},
@@ -2326,7 +2325,7 @@ class TestObjectReconstructor(unittest.TestCase):

        expected_suffix_calls = set([
            (node['replication_ip'], '/%s/0/123-abc' % node['device'])
-            for node in (sync_to[0], handoff)
+            for node in (sync_to[0],)
        ])

        ssync_calls = []
@@ -2383,9 +2382,6 @@ class TestObjectReconstructor(unittest.TestCase):
        expected_suffix_calls = set([
            (sync_to[0]['replication_ip'],
             '/%s/0/123-abc' % sync_to[0]['device'])
-        ] + [
-            (node['replication_ip'], '/%s/0/123-abc' % node['device'])
-            for node in handoff_nodes[:-1]
        ])

        ssync_calls = []
@@ -2401,9 +2397,8 @@ class TestObjectReconstructor(unittest.TestCase):
                                 for r in request_log.requests)
        self.assertEqual(expected_suffix_calls, found_suffix_calls)

-        # this is ssync call to primary (which fails) plus the ssync call to
-        # all of the handoffs (except the last one - which is the local_dev)
-        self.assertEqual(len(ssync_calls), len(handoff_nodes))
+        # this is ssync call to primary (which fails) and nothing else!
+        self.assertEqual(len(ssync_calls), 1)
        call = ssync_calls[0]
        self.assertEqual(call['node'], sync_to[0])
        self.assertEqual(set(call['suffixes']), set(['123', 'abc']))