Fix rebalancing when 2+ of a partition's replicas are on deleted devices.

RingBuilder._reassign_parts assumed that only replica for a given
partition would move. This isn't necessarily true in the case where a
bunch of devices have been removed. This would leave invalid entries
in _replica2part2dev and also cause validation to fail.

One easy way to reproduce this is to create a 3-replica, 3-zone,
6-device ring with 2 drives per zone (all of equal weight), rebalance,
and then remove one drive from each zone and rebalance again.

Bug: 943493

Change-Id: I0d399bed5d733448ad877fa2823b542777d385a4
This commit is contained in:
Samuel Merritt 2012-02-28 18:25:14 -08:00
parent 9316a8f876
commit 7fe0c6c695
3 changed files with 55 additions and 20 deletions

View File

@ -38,6 +38,7 @@ Dragos Manolescu (dragosm@hp.com)
Juan J. Martinez (juan@memset.com)
Donagh McCabe (donagh.mccabe@hp.com)
Ewan Mellor (ewan.mellor@citrix.com)
Samuel Merritt (spam@andcheese.org)
Stephen Milton (milton@isomedia.com)
Russ Nelson (russ@crynwr.com)
Maru Newby (mnewby@internap.com)

View File

@ -502,31 +502,35 @@ class RingBuilder(object):
key=lambda x: x['sort_key'])
for part in reassign_parts:
other_zones = array('H')
replace = None
replace = []
for replica in xrange(self.replicas):
if self._replica2part2dev[replica][part] == 0xffff:
replace = replica
replace.append(replica)
else:
other_zones.append(self.devs[
self._replica2part2dev[replica][part]]['zone'])
index = len(available_devs) - 1
while available_devs[index]['zone'] in other_zones:
index -= 1
dev = available_devs.pop(index)
self._replica2part2dev[replace][part] = dev['id']
dev['parts_wanted'] -= 1
dev['parts'] += 1
dev['sort_key'] = '%08x.%04x' % (self.parts + dev['parts_wanted'],
randint(0, 0xffff))
index = 0
end = len(available_devs)
while index < end:
mid = (index + end) // 2
if dev['sort_key'] < available_devs[mid]['sort_key']:
end = mid
else:
index = mid + 1
available_devs.insert(index, dev)
for replica in replace:
index = len(available_devs) - 1
while available_devs[index]['zone'] in other_zones:
index -= 1
dev = available_devs.pop(index)
other_zones.append(dev['zone'])
self._replica2part2dev[replica][part] = dev['id']
dev['parts_wanted'] -= 1
dev['parts'] += 1
dev['sort_key'] = \
'%08x.%04x' % (self.parts + dev['parts_wanted'],
randint(0, 0xffff))
index = 0
end = len(available_devs)
while index < end:
mid = (index + end) // 2
if dev['sort_key'] < available_devs[mid]['sort_key']:
end = mid
else:
index = mid + 1
available_devs.insert(index, dev)
for dev in self.devs:
if dev is not None:
del dev['sort_key']

View File

@ -126,6 +126,36 @@ class TestRingBuilder(unittest.TestCase):
counts[dev_id] = counts.get(dev_id, 0) + 1
self.assertEquals(counts, {0: 256, 2: 256, 3: 256})
def test_remove_a_lot(self):
rb = ring.RingBuilder(3, 3, 1)
rb.add_dev({'id': 0, 'device': 'd0', 'ip': '10.0.0.1',
'port': 6002, 'weight': 1000.0, 'zone': 1})
rb.add_dev({'id': 1, 'device': 'd1', 'ip': '10.0.0.2',
'port': 6002, 'weight': 1000.0, 'zone': 2})
rb.add_dev({'id': 2, 'device': 'd2', 'ip': '10.0.0.3',
'port': 6002, 'weight': 1000.0, 'zone': 3})
rb.add_dev({'id': 3, 'device': 'd3', 'ip': '10.0.0.1',
'port': 6002, 'weight': 1000.0, 'zone': 1})
rb.add_dev({'id': 4, 'device': 'd4', 'ip': '10.0.0.2',
'port': 6002, 'weight': 1000.0, 'zone': 2})
rb.add_dev({'id': 5, 'device': 'd5', 'ip': '10.0.0.3',
'port': 6002, 'weight': 1000.0, 'zone': 3})
rb.rebalance()
rb.validate()
# this has to put more than 1/3 of the partitions in the
# cluster on removed devices in order to ensure that at least
# one partition has multiple replicas that need to move.
#
# (for an N-replica ring, it's more than 1/N of the
# partitions, of course)
rb.remove_dev(3)
rb.remove_dev(4)
rb.remove_dev(5)
rb.rebalance()
rb.validate()
def test_shuffled_gather(self):
if self._shuffled_gather_helper() and \
self._shuffled_gather_helper():