Save ring builder if dispersion changes

There are cases where a rebalance improves dispersion, but doesn't
improve balance. This is because the balance of a ring builder is
taken to be the balance of its least-balanced device, so if there's a
device that has no partitions, wants some, but can't get them, then
we'll never save the ring builder even if every other device in the
ring got better.

We can detect this situation by looking at the dispersion number; if it
changes, then the rebalance needs to be saved in order to continue to
make progress.

Partial-Bug: #1697543

Change-Id: Ie239b958fc7e0547ffda2bebf61546bd4ef3d829
This commit is contained in:
Samuel Merritt 2017-06-29 10:23:38 -07:00 committed by Clay Gerrard
parent 99b89aea10
commit aa82d2cba8
2 changed files with 116 additions and 3 deletions

View File

@ -898,7 +898,9 @@ swift-ring-builder <builder_file> rebalance [options]
min_part_seconds_left = builder.min_part_seconds_left
try:
last_balance = builder.get_balance()
last_dispersion = builder.dispersion
parts, balance, removed_devs = builder.rebalance(seed=get_seed(3))
dispersion = builder.dispersion
except exceptions.RingBuilderError as e:
print('-' * 79)
print("An error has occurred during ring validation. Common\n"
@ -922,9 +924,25 @@ swift-ring-builder <builder_file> rebalance [options]
# special value(MAX_BALANCE) until zero weighted device return all
# its partitions. So we cannot check balance has changed.
# Thus we need to check balance or last_balance is special value.
if not options.force and \
not devs_changed and abs(last_balance - balance) < 1 and \
not (last_balance == MAX_BALANCE and balance == MAX_BALANCE):
be_cowardly = True
if options.force:
# User said save it, so we save it.
be_cowardly = False
elif devs_changed:
# We must save if a device changed; this could be something like
# a changed IP address.
be_cowardly = False
else:
# If balance or dispersion changed (presumably improved), then
# we should save to get the improvement.
balance_changed = (
abs(last_balance - balance) >= 1 or
(last_balance == MAX_BALANCE and balance == MAX_BALANCE))
dispersion_changed = abs(last_dispersion - dispersion) >= 1
if balance_changed or dispersion_changed:
be_cowardly = False
if be_cowardly:
print('Cowardly refusing to save rebalance as it did not change '
'at least 1%.')
exit(EXIT_WARNING)

View File

@ -1916,7 +1916,102 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin):
ring.save(self.tmpfile)
# Test No change to the device
argv = ["", self.tmpfile, "rebalance", "3"]
with mock.patch('swift.common.ring.RingBuilder.save') as mock_save:
self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv)
self.assertEqual(len(mock_save.calls), 0)
def test_rebalance_saves_dispersion_improvement(self):
# We set up a situation where dispersion improves but balance
# doesn't. We construct a ring with one zone, then add a second zone
# concurrently with a new device in the first zone. That first
# device won't acquire any partitions, so the ring's balance won't
# change. However, dispersion will improve.
ring = RingBuilder(6, 5, 1)
ring.add_dev({
'region': 1, 'zone': 1,
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
'device': 'sda'})
ring.add_dev({
'region': 1, 'zone': 1,
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
'device': 'sdb'})
ring.add_dev({
'region': 1, 'zone': 1,
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
'device': 'sdc'})
ring.add_dev({
'region': 1, 'zone': 1,
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
'device': 'sdd'})
ring.add_dev({
'region': 1, 'zone': 1,
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
'device': 'sde'})
ring.rebalance()
# The last guy in zone 1
ring.add_dev({
'region': 1, 'zone': 1,
'ip': '10.0.0.1', 'port': 20001, 'weight': 1000,
'device': 'sdf'})
# Add zone 2 (same total weight as zone 1)
ring.add_dev({
'region': 1, 'zone': 2,
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
'device': 'sda'})
ring.add_dev({
'region': 1, 'zone': 2,
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
'device': 'sdb'})
ring.add_dev({
'region': 1, 'zone': 2,
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
'device': 'sdc'})
ring.add_dev({
'region': 1, 'zone': 2,
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
'device': 'sdd'})
ring.add_dev({
'region': 1, 'zone': 2,
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
'device': 'sde'})
ring.add_dev({
'region': 1, 'zone': 2,
'ip': '10.0.0.2', 'port': 20001, 'weight': 1000,
'device': 'sdf'})
ring.pretend_min_part_hours_passed()
ring.save(self.tmpfile)
del ring
# Rebalance once: this gets 1/5 replica into zone 2; the ring is
# saved because devices changed.
argv = ["", self.tmpfile, "rebalance", "5759339"]
self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv)
rb = RingBuilder.load(self.tmpfile)
self.assertEqual(rb.dispersion, 100)
self.assertEqual(rb.get_balance(), 100)
self.run_srb('pretend_min_part_hours_passed')
# Rebalance again: this gets 2/5 replica into zone 2, but no devices
# changed and the balance stays the same. The only improvement is
# dispersion.
captured = {}
def capture_save(rb, path):
captured['dispersion'] = rb.dispersion
captured['balance'] = rb.get_balance()
# The warning is benign; it's just telling the user to keep on
# rebalancing. The important assertion is that the builder was
# saved.
with mock.patch('swift.common.ring.RingBuilder.save', capture_save):
self.assertSystemExit(EXIT_WARNING, ringbuilder.main, argv)
self.assertEqual(captured, {
'dispersion': 0,
'balance': 100,
})
def test_rebalance_no_devices(self):
# Test no devices