Add some debug output to the ring builder

Sometimes, I get handed a builder file in a support ticket and a
question of the form "why is the balance [not] doing $thing?". When
that happens, I add a bunch of print statements to my local
swift/common/ring/builder.py, figure things out, and then delete the
print statements. This time, instead of deleting the print statements,
I turned them into debug() calls and added a "--debug" flag to the
rebalance command in hopes that someone else will find it useful.

Change-Id: I697af90984fa5b314ddf570280b4585ba0ba363c
This commit is contained in:
Samuel Merritt 2015-01-11 13:41:35 -08:00
parent cf21db4bc6
commit 8d3b3b2ee0
4 changed files with 93 additions and 8 deletions

View File

@ -88,6 +88,16 @@ attempting to write to or read the builder/ring files while operations are in
progress. This can be useful in environments where ring management has been progress. This can be useful in environments where ring management has been
automated but the operator still needs to interact with the rings manually. automated but the operator still needs to interact with the rings manually.
If the ring builder is not producing the balances that you are
expecting, you can gain visibility into what it's doing with the
``--debug`` flag.::
swift-ring-builder <builder-file> rebalance --debug
This produces a great deal of output that is mostly useful if you are
either (a) attempting to fix the ring builder, or (b) filing a bug
against the ring builder.
----------------------- -----------------------
Scripting Ring Creation Scripting Ring Creation
----------------------- -----------------------

View File

@ -14,12 +14,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
from errno import EEXIST from errno import EEXIST
from itertools import islice, izip from itertools import islice, izip
from operator import itemgetter from operator import itemgetter
from os import mkdir from os import mkdir
from os.path import basename, abspath, dirname, exists, join as pathjoin from os.path import basename, abspath, dirname, exists, join as pathjoin
from sys import argv as sys_argv, exit, stderr from sys import argv as sys_argv, exit, stderr, stdout
from textwrap import wrap from textwrap import wrap
from time import time from time import time
import optparse import optparse
@ -831,6 +833,8 @@ swift-ring-builder <builder_file> rebalance [options]
help='Force a rebalanced ring to save even ' help='Force a rebalanced ring to save even '
'if < 1% of parts changed') 'if < 1% of parts changed')
parser.add_option('-s', '--seed', help="seed to use for rebalance") parser.add_option('-s', '--seed', help="seed to use for rebalance")
parser.add_option('-d', '--debug', action='store_true',
help="print debug information")
options, args = parser.parse_args(argv) options, args = parser.parse_args(argv)
def get_seed(index): def get_seed(index):
@ -841,6 +845,14 @@ swift-ring-builder <builder_file> rebalance [options]
except IndexError: except IndexError:
pass pass
if options.debug:
logger = logging.getLogger("swift.ring.builder")
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(stdout)
formatter = logging.Formatter("%(levelname)s: %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
devs_changed = builder.devs_changed devs_changed = builder.devs_changed
try: try:
last_balance = builder.get_balance() last_balance = builder.get_balance()
@ -889,11 +901,12 @@ swift-ring-builder <builder_file> rebalance [options]
status = EXIT_SUCCESS status = EXIT_SUCCESS
if builder.dispersion > 0: if builder.dispersion > 0:
print '-' * 79 print '-' * 79
print('NOTE: Dispersion of %.06f indicates some parts are not\n' print(
' optimally dispersed.\n\n' 'NOTE: Dispersion of %.06f indicates some parts are not\n'
' You may want adjust some device weights, increase\n' ' optimally dispersed.\n\n'
' the overload or review the dispersion report.' % ' You may want to adjust some device weights, increase\n'
builder.dispersion) ' the overload or review the dispersion report.' %
builder.dispersion)
status = EXIT_WARNING status = EXIT_WARNING
print '-' * 79 print '-' * 79
elif balance > 5 and balance / 100.0 > builder.overload: elif balance > 5 and balance / 100.0 > builder.overload:

View File

@ -17,6 +17,7 @@ import bisect
import copy import copy
import errno import errno
import itertools import itertools
import logging
import math import math
import random import random
import cPickle as pickle import cPickle as pickle
@ -33,6 +34,16 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree, \
MAX_BALANCE = 999.99 MAX_BALANCE = 999.99
try:
# python 2.7+
from logging import NullHandler
except ImportError:
# python 2.6
class NullHandler(logging.Handler):
def emit(self, *a, **kw):
pass
class RingBuilder(object): class RingBuilder(object):
""" """
Used to build swift.common.ring.RingData instances to be written to disk Used to build swift.common.ring.RingData instances to be written to disk
@ -96,6 +107,11 @@ class RingBuilder(object):
self._remove_devs = [] self._remove_devs = []
self._ring = None self._ring = None
self.logger = logging.getLogger("swift.ring.builder")
if not self.logger.handlers:
# silence "no handler for X" error messages
self.logger.addHandler(NullHandler())
def weight_of_one_part(self): def weight_of_one_part(self):
""" """
Returns the weight of each partition as calculated from the Returns the weight of each partition as calculated from the
@ -355,6 +371,7 @@ class RingBuilder(object):
self._ring = None self._ring = None
if self._last_part_moves_epoch is None: if self._last_part_moves_epoch is None:
self.logger.debug("New builder; performing initial balance")
self._initial_balance() self._initial_balance()
self.devs_changed = False self.devs_changed = False
self._build_dispersion_graph() self._build_dispersion_graph()
@ -363,16 +380,23 @@ class RingBuilder(object):
self._update_last_part_moves() self._update_last_part_moves()
last_balance = 0 last_balance = 0
new_parts, removed_part_count = self._adjust_replica2part2dev_size() new_parts, removed_part_count = self._adjust_replica2part2dev_size()
self.logger.debug(
"%d new parts and %d removed parts from replica-count change",
len(new_parts), removed_part_count)
changed_parts += removed_part_count changed_parts += removed_part_count
self._set_parts_wanted() self._set_parts_wanted()
self._reassign_parts(new_parts) self._reassign_parts(new_parts)
changed_parts += len(new_parts) changed_parts += len(new_parts)
while True: while True:
reassign_parts = self._gather_reassign_parts() reassign_parts = self._gather_reassign_parts()
self._reassign_parts(reassign_parts)
changed_parts += len(reassign_parts) changed_parts += len(reassign_parts)
self.logger.debug("Gathered %d parts", changed_parts)
self._reassign_parts(reassign_parts)
self.logger.debug("Assigned %d parts", changed_parts)
while self._remove_devs: while self._remove_devs:
self.devs[self._remove_devs.pop()['id']] = None remove_dev_id = self._remove_devs.pop()['id']
self.logger.debug("Removing dev %d", remove_dev_id)
self.devs[remove_dev_id] = None
balance = self.get_balance() balance = self.get_balance()
if balance < 1 or abs(last_balance - balance) < 1 or \ if balance < 1 or abs(last_balance - balance) < 1 or \
changed_parts == self.parts: changed_parts == self.parts:
@ -786,6 +810,9 @@ class RingBuilder(object):
if dev_id in dev_ids: if dev_id in dev_ids:
self._last_part_moves[part] = 0 self._last_part_moves[part] = 0
removed_dev_parts[part].append(replica) removed_dev_parts[part].append(replica)
self.logger.debug(
"Gathered %d/%d from dev %d [dev removed]",
part, replica, dev_id)
# Now we gather partitions that are "at risk" because they aren't # Now we gather partitions that are "at risk" because they aren't
# currently sufficient spread out across the cluster. # currently sufficient spread out across the cluster.
@ -859,6 +886,9 @@ class RingBuilder(object):
dev['parts'] -= 1 dev['parts'] -= 1
removed_replica = True removed_replica = True
moved_parts += 1 moved_parts += 1
self.logger.debug(
"Gathered %d/%d from dev %d [dispersion]",
part, replica, dev['id'])
break break
if removed_replica: if removed_replica:
for tier in tfd[dev['id']]: for tier in tfd[dev['id']]:
@ -894,6 +924,9 @@ class RingBuilder(object):
dev['parts_wanted'] += 1 dev['parts_wanted'] += 1
dev['parts'] -= 1 dev['parts'] -= 1
reassign_parts[part].append(replica) reassign_parts[part].append(replica)
self.logger.debug(
"Gathered %d/%d from dev %d [weight]",
part, replica, dev['id'])
reassign_parts.update(spread_out_parts) reassign_parts.update(spread_out_parts)
reassign_parts.update(removed_dev_parts) reassign_parts.update(removed_dev_parts)
@ -1121,6 +1154,8 @@ class RingBuilder(object):
new_index, new_last_sort_key) new_index, new_last_sort_key)
self._replica2part2dev[replica][part] = dev['id'] self._replica2part2dev[replica][part] = dev['id']
self.logger.debug(
"Placed %d/%d onto dev %d", part, replica, dev['id'])
# Just to save memory and keep from accidental reuse. # Just to save memory and keep from accidental reuse.
for dev in self._iter_devs(): for dev in self._iter_devs():

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
import mock import mock
import os import os
import StringIO import StringIO
@ -1744,6 +1745,32 @@ class TestRebalanceCommand(unittest.TestCase, RunSwiftRingBuilderMixin):
raise raise
return (mock_stdout.getvalue(), mock_stderr.getvalue()) return (mock_stdout.getvalue(), mock_stderr.getvalue())
def test_debug(self):
# NB: getLogger(name) always returns the same object
rb_logger = logging.getLogger("swift.ring.builder")
try:
self.assertNotEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
self.run_srb("create", 8, 3, 1)
self.run_srb("add",
"r1z1-10.1.1.1:2345/sda", 100.0,
"r1z1-10.1.1.1:2345/sdb", 100.0,
"r1z1-10.1.1.1:2345/sdc", 100.0,
"r1z1-10.1.1.1:2345/sdd", 100.0)
self.run_srb("rebalance", "--debug")
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
rb_logger.setLevel(logging.INFO)
self.run_srb("rebalance", "--debug", "123")
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
rb_logger.setLevel(logging.INFO)
self.run_srb("rebalance", "123", "--debug")
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
finally:
rb_logger.setLevel(logging.INFO) # silence other test cases
def test_rebalance_warning_appears(self): def test_rebalance_warning_appears(self):
self.run_srb("create", 8, 3, 24) self.run_srb("create", 8, 3, 24)
# all in one machine: totally balanceable # all in one machine: totally balanceable