Do not revert fragments to handoffs

We're already a handoff - just wait until we can ship it to the right
primary location.

If we timeout talking to a couple of nodes (or more likely get rejected
for connection limits because of contention during a rebalance) we can
actually end up making *more* work if we move the part to another node.
I've seen clusters get stuck on rebalance just passing parts around
handoffs for *days*.

Known-Issues:

If we see a 507 from a primary and we're not in the handoff list (we're
an old primary post rebalance) it'd probably be not so terrible to try
to revert it to the first handoff if it's not already holding a part.
But that's more work and sounds more like lp bug #1510342

Closes-Bug: #1653169

Change-Id: Ie351d8342fc8e589b143f981e95ce74e70e52784
This commit is contained in:
Clay Gerrard
2017-01-25 11:40:54 -08:00
committed by Tim Burke
parent 2914e04493
commit eadb01b8af
2 changed files with 5 additions and 22 deletions

View File

@@ -643,21 +643,9 @@ class ObjectReconstructor(Daemon):
"""
self.logger.increment(
'partition.delete.count.%s' % (job['local_dev']['device'],))
# we'd desperately like to push this partition back to it's
# primary location, but if that node is down, the next best thing
# is one of the handoff locations - which *might* be us already!
dest_nodes = itertools.chain(
job['sync_to'],
job['policy'].object_ring.get_more_nodes(job['partition']),
)
syncd_with = 0
reverted_objs = {}
for node in dest_nodes:
if syncd_with >= len(job['sync_to']):
break
if node['id'] == job['local_dev']['id']:
# this is as good a place as any for this data for now
break
for node in job['sync_to']:
success, in_sync_objs = ssync_sender(
self, node, job, job['suffixes'])()
self.rehash_remote(node, job, job['suffixes'])

View File

@@ -2287,14 +2287,13 @@ class TestObjectReconstructor(unittest.TestCase):
self.assertEqual(call['node'], sync_to[0])
self.assertEqual(set(call['suffixes']), set(['123', 'abc']))
def test_process_job_revert_to_handoff(self):
def test_process_job_will_not_revert_to_handoff(self):
replicas = self.policy.object_ring.replicas
frag_index = random.randint(0, replicas - 1)
sync_to = [random.choice([n for n in self.policy.object_ring.devs
if n != self.local_dev])]
sync_to[0]['index'] = frag_index
partition = 0
handoff = next(self.policy.object_ring.get_more_nodes(partition))
stub_hashes = {
'123': {frag_index: 'hash', None: 'hash'},
@@ -2326,7 +2325,7 @@ class TestObjectReconstructor(unittest.TestCase):
expected_suffix_calls = set([
(node['replication_ip'], '/%s/0/123-abc' % node['device'])
for node in (sync_to[0], handoff)
for node in (sync_to[0],)
])
ssync_calls = []
@@ -2383,9 +2382,6 @@ class TestObjectReconstructor(unittest.TestCase):
expected_suffix_calls = set([
(sync_to[0]['replication_ip'],
'/%s/0/123-abc' % sync_to[0]['device'])
] + [
(node['replication_ip'], '/%s/0/123-abc' % node['device'])
for node in handoff_nodes[:-1]
])
ssync_calls = []
@@ -2401,9 +2397,8 @@ class TestObjectReconstructor(unittest.TestCase):
for r in request_log.requests)
self.assertEqual(expected_suffix_calls, found_suffix_calls)
# this is ssync call to primary (which fails) plus the ssync call to
# all of the handoffs (except the last one - which is the local_dev)
self.assertEqual(len(ssync_calls), len(handoff_nodes))
# this is ssync call to primary (which fails) and nothing else!
self.assertEqual(len(ssync_calls), 1)
call = ssync_calls[0]
self.assertEqual(call['node'], sync_to[0])
self.assertEqual(set(call['suffixes']), set(['123', 'abc']))