Add some debug output to the ring builder
Sometimes, I get handed a builder file in a support ticket and a question of the form "why is the balance [not] doing $thing?". When that happens, I add a bunch of print statements to my local swift/common/ring/builder.py, figure things out, and then delete the print statements. This time, instead of deleting the print statements, I turned them into debug() calls and added a "--debug" flag to the rebalance command in hopes that someone else will find it useful. Change-Id: I697af90984fa5b314ddf570280b4585ba0ba363c
This commit is contained in:
parent
cf21db4bc6
commit
8d3b3b2ee0
|
@ -88,6 +88,16 @@ attempting to write to or read the builder/ring files while operations are in
|
||||||
progress. This can be useful in environments where ring management has been
|
progress. This can be useful in environments where ring management has been
|
||||||
automated but the operator still needs to interact with the rings manually.
|
automated but the operator still needs to interact with the rings manually.
|
||||||
|
|
||||||
|
If the ring builder is not producing the balances that you are
|
||||||
|
expecting, you can gain visibility into what it's doing with the
|
||||||
|
``--debug`` flag.::
|
||||||
|
|
||||||
|
swift-ring-builder <builder-file> rebalance --debug
|
||||||
|
|
||||||
|
This produces a great deal of output that is mostly useful if you are
|
||||||
|
either (a) attempting to fix the ring builder, or (b) filing a bug
|
||||||
|
against the ring builder.
|
||||||
|
|
||||||
-----------------------
|
-----------------------
|
||||||
Scripting Ring Creation
|
Scripting Ring Creation
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
|
@ -14,12 +14,14 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from errno import EEXIST
|
from errno import EEXIST
|
||||||
from itertools import islice, izip
|
from itertools import islice, izip
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from os import mkdir
|
from os import mkdir
|
||||||
from os.path import basename, abspath, dirname, exists, join as pathjoin
|
from os.path import basename, abspath, dirname, exists, join as pathjoin
|
||||||
from sys import argv as sys_argv, exit, stderr
|
from sys import argv as sys_argv, exit, stderr, stdout
|
||||||
from textwrap import wrap
|
from textwrap import wrap
|
||||||
from time import time
|
from time import time
|
||||||
import optparse
|
import optparse
|
||||||
|
@ -831,6 +833,8 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||||
help='Force a rebalanced ring to save even '
|
help='Force a rebalanced ring to save even '
|
||||||
'if < 1% of parts changed')
|
'if < 1% of parts changed')
|
||||||
parser.add_option('-s', '--seed', help="seed to use for rebalance")
|
parser.add_option('-s', '--seed', help="seed to use for rebalance")
|
||||||
|
parser.add_option('-d', '--debug', action='store_true',
|
||||||
|
help="print debug information")
|
||||||
options, args = parser.parse_args(argv)
|
options, args = parser.parse_args(argv)
|
||||||
|
|
||||||
def get_seed(index):
|
def get_seed(index):
|
||||||
|
@ -841,6 +845,14 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if options.debug:
|
||||||
|
logger = logging.getLogger("swift.ring.builder")
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
handler = logging.StreamHandler(stdout)
|
||||||
|
formatter = logging.Formatter("%(levelname)s: %(message)s")
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
devs_changed = builder.devs_changed
|
devs_changed = builder.devs_changed
|
||||||
try:
|
try:
|
||||||
last_balance = builder.get_balance()
|
last_balance = builder.get_balance()
|
||||||
|
@ -889,11 +901,12 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||||
status = EXIT_SUCCESS
|
status = EXIT_SUCCESS
|
||||||
if builder.dispersion > 0:
|
if builder.dispersion > 0:
|
||||||
print '-' * 79
|
print '-' * 79
|
||||||
print('NOTE: Dispersion of %.06f indicates some parts are not\n'
|
print(
|
||||||
' optimally dispersed.\n\n'
|
'NOTE: Dispersion of %.06f indicates some parts are not\n'
|
||||||
' You may want adjust some device weights, increase\n'
|
' optimally dispersed.\n\n'
|
||||||
' the overload or review the dispersion report.' %
|
' You may want to adjust some device weights, increase\n'
|
||||||
builder.dispersion)
|
' the overload or review the dispersion report.' %
|
||||||
|
builder.dispersion)
|
||||||
status = EXIT_WARNING
|
status = EXIT_WARNING
|
||||||
print '-' * 79
|
print '-' * 79
|
||||||
elif balance > 5 and balance / 100.0 > builder.overload:
|
elif balance > 5 and balance / 100.0 > builder.overload:
|
||||||
|
|
|
@ -17,6 +17,7 @@ import bisect
|
||||||
import copy
|
import copy
|
||||||
import errno
|
import errno
|
||||||
import itertools
|
import itertools
|
||||||
|
import logging
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
|
@ -33,6 +34,16 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree, \
|
||||||
MAX_BALANCE = 999.99
|
MAX_BALANCE = 999.99
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
# python 2.7+
|
||||||
|
from logging import NullHandler
|
||||||
|
except ImportError:
|
||||||
|
# python 2.6
|
||||||
|
class NullHandler(logging.Handler):
|
||||||
|
def emit(self, *a, **kw):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class RingBuilder(object):
|
class RingBuilder(object):
|
||||||
"""
|
"""
|
||||||
Used to build swift.common.ring.RingData instances to be written to disk
|
Used to build swift.common.ring.RingData instances to be written to disk
|
||||||
|
@ -96,6 +107,11 @@ class RingBuilder(object):
|
||||||
self._remove_devs = []
|
self._remove_devs = []
|
||||||
self._ring = None
|
self._ring = None
|
||||||
|
|
||||||
|
self.logger = logging.getLogger("swift.ring.builder")
|
||||||
|
if not self.logger.handlers:
|
||||||
|
# silence "no handler for X" error messages
|
||||||
|
self.logger.addHandler(NullHandler())
|
||||||
|
|
||||||
def weight_of_one_part(self):
|
def weight_of_one_part(self):
|
||||||
"""
|
"""
|
||||||
Returns the weight of each partition as calculated from the
|
Returns the weight of each partition as calculated from the
|
||||||
|
@ -355,6 +371,7 @@ class RingBuilder(object):
|
||||||
|
|
||||||
self._ring = None
|
self._ring = None
|
||||||
if self._last_part_moves_epoch is None:
|
if self._last_part_moves_epoch is None:
|
||||||
|
self.logger.debug("New builder; performing initial balance")
|
||||||
self._initial_balance()
|
self._initial_balance()
|
||||||
self.devs_changed = False
|
self.devs_changed = False
|
||||||
self._build_dispersion_graph()
|
self._build_dispersion_graph()
|
||||||
|
@ -363,16 +380,23 @@ class RingBuilder(object):
|
||||||
self._update_last_part_moves()
|
self._update_last_part_moves()
|
||||||
last_balance = 0
|
last_balance = 0
|
||||||
new_parts, removed_part_count = self._adjust_replica2part2dev_size()
|
new_parts, removed_part_count = self._adjust_replica2part2dev_size()
|
||||||
|
self.logger.debug(
|
||||||
|
"%d new parts and %d removed parts from replica-count change",
|
||||||
|
len(new_parts), removed_part_count)
|
||||||
changed_parts += removed_part_count
|
changed_parts += removed_part_count
|
||||||
self._set_parts_wanted()
|
self._set_parts_wanted()
|
||||||
self._reassign_parts(new_parts)
|
self._reassign_parts(new_parts)
|
||||||
changed_parts += len(new_parts)
|
changed_parts += len(new_parts)
|
||||||
while True:
|
while True:
|
||||||
reassign_parts = self._gather_reassign_parts()
|
reassign_parts = self._gather_reassign_parts()
|
||||||
self._reassign_parts(reassign_parts)
|
|
||||||
changed_parts += len(reassign_parts)
|
changed_parts += len(reassign_parts)
|
||||||
|
self.logger.debug("Gathered %d parts", changed_parts)
|
||||||
|
self._reassign_parts(reassign_parts)
|
||||||
|
self.logger.debug("Assigned %d parts", changed_parts)
|
||||||
while self._remove_devs:
|
while self._remove_devs:
|
||||||
self.devs[self._remove_devs.pop()['id']] = None
|
remove_dev_id = self._remove_devs.pop()['id']
|
||||||
|
self.logger.debug("Removing dev %d", remove_dev_id)
|
||||||
|
self.devs[remove_dev_id] = None
|
||||||
balance = self.get_balance()
|
balance = self.get_balance()
|
||||||
if balance < 1 or abs(last_balance - balance) < 1 or \
|
if balance < 1 or abs(last_balance - balance) < 1 or \
|
||||||
changed_parts == self.parts:
|
changed_parts == self.parts:
|
||||||
|
@ -786,6 +810,9 @@ class RingBuilder(object):
|
||||||
if dev_id in dev_ids:
|
if dev_id in dev_ids:
|
||||||
self._last_part_moves[part] = 0
|
self._last_part_moves[part] = 0
|
||||||
removed_dev_parts[part].append(replica)
|
removed_dev_parts[part].append(replica)
|
||||||
|
self.logger.debug(
|
||||||
|
"Gathered %d/%d from dev %d [dev removed]",
|
||||||
|
part, replica, dev_id)
|
||||||
|
|
||||||
# Now we gather partitions that are "at risk" because they aren't
|
# Now we gather partitions that are "at risk" because they aren't
|
||||||
# currently sufficient spread out across the cluster.
|
# currently sufficient spread out across the cluster.
|
||||||
|
@ -859,6 +886,9 @@ class RingBuilder(object):
|
||||||
dev['parts'] -= 1
|
dev['parts'] -= 1
|
||||||
removed_replica = True
|
removed_replica = True
|
||||||
moved_parts += 1
|
moved_parts += 1
|
||||||
|
self.logger.debug(
|
||||||
|
"Gathered %d/%d from dev %d [dispersion]",
|
||||||
|
part, replica, dev['id'])
|
||||||
break
|
break
|
||||||
if removed_replica:
|
if removed_replica:
|
||||||
for tier in tfd[dev['id']]:
|
for tier in tfd[dev['id']]:
|
||||||
|
@ -894,6 +924,9 @@ class RingBuilder(object):
|
||||||
dev['parts_wanted'] += 1
|
dev['parts_wanted'] += 1
|
||||||
dev['parts'] -= 1
|
dev['parts'] -= 1
|
||||||
reassign_parts[part].append(replica)
|
reassign_parts[part].append(replica)
|
||||||
|
self.logger.debug(
|
||||||
|
"Gathered %d/%d from dev %d [weight]",
|
||||||
|
part, replica, dev['id'])
|
||||||
|
|
||||||
reassign_parts.update(spread_out_parts)
|
reassign_parts.update(spread_out_parts)
|
||||||
reassign_parts.update(removed_dev_parts)
|
reassign_parts.update(removed_dev_parts)
|
||||||
|
@ -1121,6 +1154,8 @@ class RingBuilder(object):
|
||||||
new_index, new_last_sort_key)
|
new_index, new_last_sort_key)
|
||||||
|
|
||||||
self._replica2part2dev[replica][part] = dev['id']
|
self._replica2part2dev[replica][part] = dev['id']
|
||||||
|
self.logger.debug(
|
||||||
|
"Placed %d/%d onto dev %d", part, replica, dev['id'])
|
||||||
|
|
||||||
# Just to save memory and keep from accidental reuse.
|
# Just to save memory and keep from accidental reuse.
|
||||||
for dev in self._iter_devs():
|
for dev in self._iter_devs():
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import logging
|
||||||
import mock
|
import mock
|
||||||
import os
|
import os
|
||||||
import StringIO
|
import StringIO
|
||||||
|
@ -1744,6 +1745,32 @@ class TestRebalanceCommand(unittest.TestCase, RunSwiftRingBuilderMixin):
|
||||||
raise
|
raise
|
||||||
return (mock_stdout.getvalue(), mock_stderr.getvalue())
|
return (mock_stdout.getvalue(), mock_stderr.getvalue())
|
||||||
|
|
||||||
|
def test_debug(self):
|
||||||
|
# NB: getLogger(name) always returns the same object
|
||||||
|
rb_logger = logging.getLogger("swift.ring.builder")
|
||||||
|
try:
|
||||||
|
self.assertNotEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||||
|
|
||||||
|
self.run_srb("create", 8, 3, 1)
|
||||||
|
self.run_srb("add",
|
||||||
|
"r1z1-10.1.1.1:2345/sda", 100.0,
|
||||||
|
"r1z1-10.1.1.1:2345/sdb", 100.0,
|
||||||
|
"r1z1-10.1.1.1:2345/sdc", 100.0,
|
||||||
|
"r1z1-10.1.1.1:2345/sdd", 100.0)
|
||||||
|
self.run_srb("rebalance", "--debug")
|
||||||
|
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||||
|
|
||||||
|
rb_logger.setLevel(logging.INFO)
|
||||||
|
self.run_srb("rebalance", "--debug", "123")
|
||||||
|
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||||
|
|
||||||
|
rb_logger.setLevel(logging.INFO)
|
||||||
|
self.run_srb("rebalance", "123", "--debug")
|
||||||
|
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
rb_logger.setLevel(logging.INFO) # silence other test cases
|
||||||
|
|
||||||
def test_rebalance_warning_appears(self):
|
def test_rebalance_warning_appears(self):
|
||||||
self.run_srb("create", 8, 3, 24)
|
self.run_srb("create", 8, 3, 24)
|
||||||
# all in one machine: totally balanceable
|
# all in one machine: totally balanceable
|
||||||
|
|
Loading…
Reference in New Issue