Add some debug output to the ring builder

Sometimes, I get handed a builder file in a support ticket and a
question of the form "why is the balance [not] doing $thing?". When
that happens, I add a bunch of print statements to my local
swift/common/ring/builder.py, figure things out, and then delete the
print statements. This time, instead of deleting the print statements,
I turned them into debug() calls and added a "--debug" flag to the
rebalance command in hopes that someone else will find it useful.

Change-Id: I697af90984fa5b314ddf570280b4585ba0ba363c
This commit is contained in:
Samuel Merritt 2015-01-11 13:41:35 -08:00
parent cf21db4bc6
commit 8d3b3b2ee0
4 changed files with 93 additions and 8 deletions

View File

@ -88,6 +88,16 @@ attempting to write to or read the builder/ring files while operations are in
progress. This can be useful in environments where ring management has been
automated but the operator still needs to interact with the rings manually.
If the ring builder is not producing the balances that you are
expecting, you can gain visibility into what it's doing with the
``--debug`` flag.::
swift-ring-builder <builder-file> rebalance --debug
This produces a great deal of output that is mostly useful if you are
either (a) attempting to fix the ring builder, or (b) filing a bug
against the ring builder.
-----------------------
Scripting Ring Creation
-----------------------

View File

@ -14,12 +14,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from errno import EEXIST
from itertools import islice, izip
from operator import itemgetter
from os import mkdir
from os.path import basename, abspath, dirname, exists, join as pathjoin
from sys import argv as sys_argv, exit, stderr
from sys import argv as sys_argv, exit, stderr, stdout
from textwrap import wrap
from time import time
import optparse
@ -831,6 +833,8 @@ swift-ring-builder <builder_file> rebalance [options]
help='Force a rebalanced ring to save even '
'if < 1% of parts changed')
parser.add_option('-s', '--seed', help="seed to use for rebalance")
parser.add_option('-d', '--debug', action='store_true',
help="print debug information")
options, args = parser.parse_args(argv)
def get_seed(index):
@ -841,6 +845,14 @@ swift-ring-builder <builder_file> rebalance [options]
except IndexError:
pass
if options.debug:
logger = logging.getLogger("swift.ring.builder")
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(stdout)
formatter = logging.Formatter("%(levelname)s: %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
devs_changed = builder.devs_changed
try:
last_balance = builder.get_balance()
@ -889,11 +901,12 @@ swift-ring-builder <builder_file> rebalance [options]
status = EXIT_SUCCESS
if builder.dispersion > 0:
print '-' * 79
print('NOTE: Dispersion of %.06f indicates some parts are not\n'
' optimally dispersed.\n\n'
' You may want adjust some device weights, increase\n'
' the overload or review the dispersion report.' %
builder.dispersion)
print(
'NOTE: Dispersion of %.06f indicates some parts are not\n'
' optimally dispersed.\n\n'
' You may want to adjust some device weights, increase\n'
' the overload or review the dispersion report.' %
builder.dispersion)
status = EXIT_WARNING
print '-' * 79
elif balance > 5 and balance / 100.0 > builder.overload:

View File

@ -17,6 +17,7 @@ import bisect
import copy
import errno
import itertools
import logging
import math
import random
import cPickle as pickle
@ -33,6 +34,16 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree, \
MAX_BALANCE = 999.99
try:
# python 2.7+
from logging import NullHandler
except ImportError:
# python 2.6
class NullHandler(logging.Handler):
def emit(self, *a, **kw):
pass
class RingBuilder(object):
"""
Used to build swift.common.ring.RingData instances to be written to disk
@ -96,6 +107,11 @@ class RingBuilder(object):
self._remove_devs = []
self._ring = None
self.logger = logging.getLogger("swift.ring.builder")
if not self.logger.handlers:
# silence "no handler for X" error messages
self.logger.addHandler(NullHandler())
def weight_of_one_part(self):
"""
Returns the weight of each partition as calculated from the
@ -355,6 +371,7 @@ class RingBuilder(object):
self._ring = None
if self._last_part_moves_epoch is None:
self.logger.debug("New builder; performing initial balance")
self._initial_balance()
self.devs_changed = False
self._build_dispersion_graph()
@ -363,16 +380,23 @@ class RingBuilder(object):
self._update_last_part_moves()
last_balance = 0
new_parts, removed_part_count = self._adjust_replica2part2dev_size()
self.logger.debug(
"%d new parts and %d removed parts from replica-count change",
len(new_parts), removed_part_count)
changed_parts += removed_part_count
self._set_parts_wanted()
self._reassign_parts(new_parts)
changed_parts += len(new_parts)
while True:
reassign_parts = self._gather_reassign_parts()
self._reassign_parts(reassign_parts)
changed_parts += len(reassign_parts)
self.logger.debug("Gathered %d parts", changed_parts)
self._reassign_parts(reassign_parts)
self.logger.debug("Assigned %d parts", changed_parts)
while self._remove_devs:
self.devs[self._remove_devs.pop()['id']] = None
remove_dev_id = self._remove_devs.pop()['id']
self.logger.debug("Removing dev %d", remove_dev_id)
self.devs[remove_dev_id] = None
balance = self.get_balance()
if balance < 1 or abs(last_balance - balance) < 1 or \
changed_parts == self.parts:
@ -786,6 +810,9 @@ class RingBuilder(object):
if dev_id in dev_ids:
self._last_part_moves[part] = 0
removed_dev_parts[part].append(replica)
self.logger.debug(
"Gathered %d/%d from dev %d [dev removed]",
part, replica, dev_id)
# Now we gather partitions that are "at risk" because they aren't
# currently sufficient spread out across the cluster.
@ -859,6 +886,9 @@ class RingBuilder(object):
dev['parts'] -= 1
removed_replica = True
moved_parts += 1
self.logger.debug(
"Gathered %d/%d from dev %d [dispersion]",
part, replica, dev['id'])
break
if removed_replica:
for tier in tfd[dev['id']]:
@ -894,6 +924,9 @@ class RingBuilder(object):
dev['parts_wanted'] += 1
dev['parts'] -= 1
reassign_parts[part].append(replica)
self.logger.debug(
"Gathered %d/%d from dev %d [weight]",
part, replica, dev['id'])
reassign_parts.update(spread_out_parts)
reassign_parts.update(removed_dev_parts)
@ -1121,6 +1154,8 @@ class RingBuilder(object):
new_index, new_last_sort_key)
self._replica2part2dev[replica][part] = dev['id']
self.logger.debug(
"Placed %d/%d onto dev %d", part, replica, dev['id'])
# Just to save memory and keep from accidental reuse.
for dev in self._iter_devs():

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import mock
import os
import StringIO
@ -1744,6 +1745,32 @@ class TestRebalanceCommand(unittest.TestCase, RunSwiftRingBuilderMixin):
raise
return (mock_stdout.getvalue(), mock_stderr.getvalue())
def test_debug(self):
# NB: getLogger(name) always returns the same object
rb_logger = logging.getLogger("swift.ring.builder")
try:
self.assertNotEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
self.run_srb("create", 8, 3, 1)
self.run_srb("add",
"r1z1-10.1.1.1:2345/sda", 100.0,
"r1z1-10.1.1.1:2345/sdb", 100.0,
"r1z1-10.1.1.1:2345/sdc", 100.0,
"r1z1-10.1.1.1:2345/sdd", 100.0)
self.run_srb("rebalance", "--debug")
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
rb_logger.setLevel(logging.INFO)
self.run_srb("rebalance", "--debug", "123")
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
rb_logger.setLevel(logging.INFO)
self.run_srb("rebalance", "123", "--debug")
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
finally:
rb_logger.setLevel(logging.INFO) # silence other test cases
def test_rebalance_warning_appears(self):
self.run_srb("create", 8, 3, 24)
# all in one machine: totally balanceable