Save ring builder if dispersion changes
There are cases where a rebalance improves dispersion, but doesn't improve balance. This is because the balance of a ring builder is taken to be the balance of its least-balanced device, so if there's a device that has no partitions, wants some, but can't get them, then we'll never save the ring builder even if every other device in the ring got better. We can detect this situation by looking at the dispersion number; if it changes, then the rebalance needs to be saved in order to continue to make progress. Partial-Bug: #1697543 Change-Id: Ie239b958fc7e0547ffda2bebf61546bd4ef3d829
This commit is contained in:
parent
99b89aea10
commit
aa82d2cba8
@ -898,7 +898,9 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||
min_part_seconds_left = builder.min_part_seconds_left
|
||||
try:
|
||||
last_balance = builder.get_balance()
|
||||
last_dispersion = builder.dispersion
|
||||
parts, balance, removed_devs = builder.rebalance(seed=get_seed(3))
|
||||
dispersion = builder.dispersion
|
||||
except exceptions.RingBuilderError as e:
|
||||
print('-' * 79)
|
||||
print("An error has occurred during ring validation. Common\n"
|
||||
@ -922,9 +924,25 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||
# special value(MAX_BALANCE) until zero weighted device return all
|
||||
# its partitions. So we cannot check balance has changed.
|
||||
# Thus we need to check balance or last_balance is special value.
|
||||
if not options.force and \
|
||||
not devs_changed and abs(last_balance - balance) < 1 and \
|
||||
not (last_balance == MAX_BALANCE and balance == MAX_BALANCE):
|
||||
be_cowardly = True
|
||||
if options.force:
|
||||
# User said save it, so we save it.
|
||||
be_cowardly = False
|
||||
elif devs_changed:
|
||||
# We must save if a device changed; this could be something like
|
||||
# a changed IP address.
|
||||
be_cowardly = False
|
||||
else:
|
||||
# If balance or dispersion changed (presumably improved), then
|
||||
# we should save to get the improvement.
|
||||
balance_changed = (
|
||||
abs(last_balance - balance) >= 1 or
|
||||
(last_balance == MAX_BALANCE and balance == MAX_BALANCE))
|
||||
dispersion_changed = abs(last_dispersion - dispersion) >= 1
|
||||
if balance_changed or dispersion_changed:
|
||||
be_cowardly = False
|
||||
|
||||
if be_cowardly:
|
||||
print('Cowardly refusing to save rebalance as it did not change '
|
||||
'at least 1%.')
|
||||
exit(EXIT_WARNING)
|
||||
|
@ -1916,7 +1916,102 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin):
|
||||
ring.save(self.tmpfile)
|
||||
# Test No change to the device
|
||||
argv = ["", self.tmpfile, "rebalance", "3"]
|
||||
with mock.patch('swift.common.ring.RingBuilder.save') as mock_save:
|
||||
self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv)
|
||||
self.assertEqual(len(mock_save.calls), 0)
|
||||
|
||||
def test_rebalance_saves_dispersion_improvement(self):
|
||||
# We set up a situation where dispersion improves but balance
|
||||
# doesn't. We construct a ring with one zone, then add a second zone
|
||||
# concurrently with a new device in the first zone. That first
|
||||
# device won't acquire any partitions, so the ring's balance won't
|
||||
# change. However, dispersion will improve.
|
||||
|
||||
ring = RingBuilder(6, 5, 1)
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 1,
|
||||
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sda'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 1,
|
||||
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdb'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 1,
|
||||
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdc'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 1,
|
||||
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdd'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 1,
|
||||
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sde'})
|
||||
ring.rebalance()
|
||||
|
||||
# The last guy in zone 1
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 1,
|
||||
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdf'})
|
||||
|
||||
# Add zone 2 (same total weight as zone 1)
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 2,
|
||||
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sda'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 2,
|
||||
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdb'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 2,
|
||||
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdc'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 2,
|
||||
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdd'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 2,
|
||||
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sde'})
|
||||
ring.add_dev({
|
||||
'region': 1, 'zone': 2,
|
||||
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
|
||||
'device': 'sdf'})
|
||||
ring.pretend_min_part_hours_passed()
|
||||
ring.save(self.tmpfile)
|
||||
del ring
|
||||
|
||||
# Rebalance once: this gets 1/5 replica into zone 2; the ring is
|
||||
# saved because devices changed.
|
||||
argv = ["", self.tmpfile, "rebalance", "5759339"]
|
||||
self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv)
|
||||
rb = RingBuilder.load(self.tmpfile)
|
||||
self.assertEqual(rb.dispersion, 100)
|
||||
self.assertEqual(rb.get_balance(), 100)
|
||||
self.run_srb('pretend_min_part_hours_passed')
|
||||
|
||||
# Rebalance again: this gets 2/5 replica into zone 2, but no devices
|
||||
# changed and the balance stays the same. The only improvement is
|
||||
# dispersion.
|
||||
|
||||
captured = {}
|
||||
|
||||
def capture_save(rb, path):
|
||||
captured['dispersion'] = rb.dispersion
|
||||
captured['balance'] = rb.get_balance()
|
||||
# The warning is benign; it's just telling the user to keep on
|
||||
# rebalancing. The important assertion is that the builder was
|
||||
# saved.
|
||||
with mock.patch('swift.common.ring.RingBuilder.save', capture_save):
|
||||
self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv)
|
||||
self.assertEqual(captured, {
|
||||
'dispersion': 0,
|
||||
'balance': 100,
|
||||
})
|
||||
|
||||
def test_rebalance_no_devices(self):
|
||||
# Test no devices
|
||||
|
Loading…
Reference in New Issue
Block a user