Deprecate broken handoffs_first in favor of handoffs_only

The handoffs_first mode in the replicator has the useful behavior of
processing all handoff parts across all disks until there aren't any
handoffs anymore on the node [1] and then it seemingly tries to drop
back into normal operation.  In practice I've only ever heard of
handoffs_first used while rebalancing and turned off as soon as the
rebalance finishes - it's not recommended to run with handoffs_first
mode turned on and it emits a warning on startup if option is enabled.

The handoffs_first mode on the reconstructor doesn't work - it was
prioritizing handoffs *per-part* [2] - which is really unfortunate
because in the reconstructor during a rebalance it's often *much* more
attractive from an efficiency disk/network perspective to revert a
partition from a handoff than it is to rebuild an entire partition from
another primary using the other EC fragments in the cluster.

This change deprecates handoffs_first in favor of handoffs_only in the
reconstructor which is far more useful - and just like handoffs_first
mode in the replicator - it gives the operator the option of forcing the
consistency engine to focus on rebalance.  The handoffs_only behavior is
somewhat consistent with the replicator's handoffs_first option (any
error on any handoff in the replicactor will make it essentially handoff
only forever) but the option does what you want and is named correctly
in the reconstructor.

For consistency with the replicator the reconstructor will mostly honor
the handoffs_first option, but if you set handoffs_only in the config it
always takes precedence.  Having handoffs_first in your config always
results in a warning, but if handoff_only is not set and handoffs_first
is true the reconstructor will assume you need handoffs_only and behaves
as such.

When running in handoffs_only mode the reconstructor will start to log a
warning every cycle if you leave it running in handoffs_only after it
finishes reverting handoffs.  However you should be monitoring on-disk
partitions and disable the option as soon as the cluster finishes the
full rebalance cycle.

1. Ia324728d42c606e2f9e7d29b4ab5fcbff6e47aea fixed replicator
handoffs_first "mode"

2. Unlike replication each partition in a EC policy can have a different
kind of job per frag_index, but the cardinality of jobs is typically
only one (either sync or revert) unless there's been a bunch of errors
during write and then handoffs partitions maybe hold a number of
different fragments.

Known-Issues:

handoffs_only is not documented outside of the example config, see lp
bug #1626290

Closes-Bug: #1653018

Change-Id: Idde4b6cf92fab6c45f2c0c2733277701eb436898
This commit is contained in:
Clay Gerrard 2017-01-25 11:51:03 -08:00
parent 2f0ab78f9f
commit da557011ec
3 changed files with 145 additions and 16 deletions

View File

@ -298,7 +298,18 @@ use = egg:swift#recon
# lockup_timeout = 1800 # lockup_timeout = 1800
# ring_check_interval = 15 # ring_check_interval = 15
# recon_cache_path = /var/cache/swift # recon_cache_path = /var/cache/swift
# handoffs_first = False # The handoffs_only mode option is for special case emergency situations during
# rebalance such as disk full in the cluster. This option SHOULD NOT BE
# CHANGED, except for extreme situations. When handoffs_only mode is enabled
# the reconstructor will *only* revert rebalance fragments to primaries and not
# attempt to sync any primary parts with neighbor primaries. This will force
# the reconstructor to sync and delete handoffs fragments more quickly and
# minimize the time of the rebalance by limiting the number of rebuilds. The
# handoffs_only option is only for temporary use, it should be disabled as soon
# as the emergency situation is resolved. When handoffs_only is not set, the
# deprecated handoffs_first option will be honored as a synonym, but may be
# ignored in a future release.
# handoffs_only = False
# #
# You can set scheduling priority of processes. Niceness values range from -20 # You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process). # (most favorable to the process) to 19 (least favorable to the process).

View File

@ -149,8 +149,24 @@ class ObjectReconstructor(Daemon):
self.headers = { self.headers = {
'Content-Length': '0', 'Content-Length': '0',
'user-agent': 'obj-reconstructor %s' % os.getpid()} 'user-agent': 'obj-reconstructor %s' % os.getpid()}
self.handoffs_first = config_true_value(conf.get('handoffs_first', if 'handoffs_first' in conf:
False)) self.logger.warning(
'The handoffs_first option is deprecated in favor '
'of handoffs_only. This option may be ignored in a '
'future release.')
# honor handoffs_first for backwards compatibility
default_handoffs_only = config_true_value(conf['handoffs_first'])
else:
default_handoffs_only = False
self.handoffs_only = config_true_value(
conf.get('handoffs_only', default_handoffs_only))
if self.handoffs_only:
self.logger.warning(
'Handoff only mode is not intended for normal '
'operation, use handoffs_only with care.')
elif default_handoffs_only:
self.logger.warning('Ignored handoffs_first option in favor '
'of handoffs_only.')
self._df_router = DiskFileRouter(conf, self.logger) self._df_router = DiskFileRouter(conf, self.logger)
def load_object_ring(self, policy): def load_object_ring(self, policy):
@ -668,6 +684,8 @@ class ObjectReconstructor(Daemon):
if syncd_with >= len(job['sync_to']): if syncd_with >= len(job['sync_to']):
self.delete_reverted_objs( self.delete_reverted_objs(
job, reverted_objs, job['frag_index']) job, reverted_objs, job['frag_index'])
else:
self.handoffs_remaining += 1
self.logger.timing_since('partition.delete.timing', begin) self.logger.timing_since('partition.delete.timing', begin)
def _get_part_jobs(self, local_dev, part_path, partition, policy): def _get_part_jobs(self, local_dev, part_path, partition, policy):
@ -696,6 +714,9 @@ class ObjectReconstructor(Daemon):
:param policy: the policy :param policy: the policy
:returns: a list of dicts of job info :returns: a list of dicts of job info
N.B. If this function ever returns an empty list of jobs the entire
partition will be deleted.
""" """
# find all the fi's in the part, and which suffixes have them # find all the fi's in the part, and which suffixes have them
try: try:
@ -888,12 +909,12 @@ class ObjectReconstructor(Daemon):
""" """
Helper function for collect_jobs to build jobs for reconstruction Helper function for collect_jobs to build jobs for reconstruction
using EC style storage policy using EC style storage policy
N.B. If this function ever returns an empty list of jobs the entire
partition will be deleted.
""" """
jobs = self._get_part_jobs(**part_info) jobs = self._get_part_jobs(**part_info)
random.shuffle(jobs) random.shuffle(jobs)
if self.handoffs_first:
# Move the handoff revert jobs to the front of the list
jobs.sort(key=lambda job: job['job_type'], reverse=True)
self.job_count += len(jobs) self.job_count += len(jobs)
return jobs return jobs
@ -909,6 +930,7 @@ class ObjectReconstructor(Daemon):
self.reconstruction_part_count = 0 self.reconstruction_part_count = 0
self.reconstruction_device_count = 0 self.reconstruction_device_count = 0
self.last_reconstruction_count = -1 self.last_reconstruction_count = -1
self.handoffs_remaining = 0
def delete_partition(self, path): def delete_partition(self, path):
self.logger.info(_("Removing partition: %s"), path) self.logger.info(_("Removing partition: %s"), path)
@ -944,6 +966,11 @@ class ObjectReconstructor(Daemon):
self.run_pool.spawn(self.delete_partition, self.run_pool.spawn(self.delete_partition,
part_info['part_path']) part_info['part_path'])
for job in jobs: for job in jobs:
if (self.handoffs_only and job['job_type'] != REVERT):
self.logger.debug('Skipping %s job for %s '
'while in handoffs_only mode.',
job['job_type'], job['path'])
continue
self.run_pool.spawn(self.process_job, job) self.run_pool.spawn(self.process_job, job)
with Timeout(self.lockup_timeout): with Timeout(self.lockup_timeout):
self.run_pool.waitall() self.run_pool.waitall()
@ -955,6 +982,16 @@ class ObjectReconstructor(Daemon):
stats.kill() stats.kill()
lockup_detector.kill() lockup_detector.kill()
self.stats_line() self.stats_line()
if self.handoffs_only:
if self.handoffs_remaining > 0:
self.logger.info(_(
"Handoffs only mode still has handoffs remaining. "
"Next pass will continue to revert handoffs."))
else:
self.logger.warning(_(
"Handoffs only mode found no handoffs remaining. "
"You should disable handoffs_only once all nodes "
"are reporting no handoffs remaining."))
def run_once(self, *args, **kwargs): def run_once(self, *args, **kwargs):
start = time.time() start = time.time()

View File

@ -719,7 +719,6 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
rmtree(testring, ignore_errors=1) rmtree(testring, ignore_errors=1)
def test_build_reconstruction_jobs(self): def test_build_reconstruction_jobs(self):
self.reconstructor.handoffs_first = False
self.reconstructor._reset_stats() self.reconstructor._reset_stats()
for part_info in self.reconstructor.collect_parts(): for part_info in self.reconstructor.collect_parts():
jobs = self.reconstructor.build_reconstruction_jobs(part_info) jobs = self.reconstructor.build_reconstruction_jobs(part_info)
@ -728,13 +727,40 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
object_reconstructor.REVERT)) object_reconstructor.REVERT))
self.assert_expected_jobs(part_info['partition'], jobs) self.assert_expected_jobs(part_info['partition'], jobs)
self.reconstructor.handoffs_first = True def test_handoffs_only(self):
self.reconstructor._reset_stats() self.reconstructor.handoffs_only = True
for part_info in self.reconstructor.collect_parts():
jobs = self.reconstructor.build_reconstruction_jobs(part_info) found_job_types = set()
self.assertTrue(jobs[0]['job_type'] ==
object_reconstructor.REVERT) def fake_process_job(job):
self.assert_expected_jobs(part_info['partition'], jobs) # increment failure counter
self.reconstructor.handoffs_remaining += 1
found_job_types.add(job['job_type'])
self.reconstructor.process_job = fake_process_job
# only revert jobs
self.reconstructor.reconstruct()
self.assertEqual(found_job_types, {object_reconstructor.REVERT})
# but failures keep handoffs remaining
msgs = self.reconstructor.logger.get_lines_for_level('info')
self.assertIn('Next pass will continue to revert handoffs', msgs[-1])
self.logger._clear()
found_job_types = set()
def fake_process_job(job):
# success does not increment failure counter
found_job_types.add(job['job_type'])
self.reconstructor.process_job = fake_process_job
# only revert jobs ... but all handoffs cleared out successfully
self.reconstructor.reconstruct()
self.assertEqual(found_job_types, {object_reconstructor.REVERT})
# it's time to turn off handoffs_only
msgs = self.reconstructor.logger.get_lines_for_level('warning')
self.assertIn('You should disable handoffs_only', msgs[-1])
def test_get_partners(self): def test_get_partners(self):
# we're going to perform an exhaustive test of every possible # we're going to perform an exhaustive test of every possible
@ -1156,6 +1182,57 @@ class TestObjectReconstructor(unittest.TestCase):
def ts(self): def ts(self):
return next(self.ts_iter) return next(self.ts_iter)
def test_handoffs_only_default(self):
# sanity neither option added to default conf
self.conf.pop('handoffs_only', None)
self.conf.pop('handoffs_first', None)
self.reconstructor = object_reconstructor.ObjectReconstructor(
self.conf, logger=self.logger)
self.assertFalse(self.reconstructor.handoffs_only)
def test_handoffs_first_enables_handoffs_only(self):
self.conf.pop('handoffs_only', None) # sanity
self.conf['handoffs_first'] = True
self.reconstructor = object_reconstructor.ObjectReconstructor(
self.conf, logger=self.logger)
self.assertTrue(self.reconstructor.handoffs_only)
warnings = self.logger.get_lines_for_level('warning')
expected = [
'The handoffs_first option is deprecated in favor '
'of handoffs_only. This option may be ignored in a '
'future release.',
'Handoff only mode is not intended for normal operation, '
'use handoffs_only with care.',
]
self.assertEqual(expected, warnings)
def test_handoffs_only_ignores_handoffs_first(self):
self.conf['handoffs_first'] = True
self.conf['handoffs_only'] = False
self.reconstructor = object_reconstructor.ObjectReconstructor(
self.conf, logger=self.logger)
self.assertFalse(self.reconstructor.handoffs_only)
warnings = self.logger.get_lines_for_level('warning')
expected = [
'The handoffs_first option is deprecated in favor of '
'handoffs_only. This option may be ignored in a future release.',
'Ignored handoffs_first option in favor of handoffs_only.',
]
self.assertEqual(expected, warnings)
def test_handoffs_only_enabled(self):
self.conf.pop('handoffs_first', None) # sanity
self.conf['handoffs_only'] = True
self.reconstructor = object_reconstructor.ObjectReconstructor(
self.conf, logger=self.logger)
self.assertTrue(self.reconstructor.handoffs_only)
warnings = self.logger.get_lines_for_level('warning')
expected = [
'Handoff only mode is not intended for normal operation, '
'use handoffs_only with care.',
]
self.assertEqual(expected, warnings)
def test_two_ec_policies(self): def test_two_ec_policies(self):
with patch_policies([ with patch_policies([
StoragePolicy(0, name='zero', is_deprecated=True), StoragePolicy(0, name='zero', is_deprecated=True),
@ -2346,7 +2423,7 @@ class TestObjectReconstructor(unittest.TestCase):
self.assertEqual(call['node'], sync_to[0]) self.assertEqual(call['node'], sync_to[0])
self.assertEqual(set(call['suffixes']), set(['123', 'abc'])) self.assertEqual(set(call['suffixes']), set(['123', 'abc']))
def test_process_job_revert_is_handoff(self): def test_process_job_revert_is_handoff_fails(self):
replicas = self.policy.object_ring.replicas replicas = self.policy.object_ring.replicas
frag_index = random.randint(0, replicas - 1) frag_index = random.randint(0, replicas - 1)
sync_to = [random.choice([n for n in self.policy.object_ring.devs sync_to = [random.choice([n for n in self.policy.object_ring.devs
@ -2376,7 +2453,8 @@ class TestObjectReconstructor(unittest.TestCase):
def ssync_response_callback(*args): def ssync_response_callback(*args):
# in this test ssync always fails, until we encounter ourselves in # in this test ssync always fails, until we encounter ourselves in
# the list of possible handoff's to sync to # the list of possible handoff's to sync to, so handoffs_remaining
# should increment
return False, {} return False, {}
expected_suffix_calls = set([ expected_suffix_calls = set([
@ -2406,6 +2484,7 @@ class TestObjectReconstructor(unittest.TestCase):
call = ssync_calls[0] call = ssync_calls[0]
self.assertEqual(call['node'], sync_to[0]) self.assertEqual(call['node'], sync_to[0])
self.assertEqual(set(call['suffixes']), set(['123', 'abc'])) self.assertEqual(set(call['suffixes']), set(['123', 'abc']))
self.assertEqual(self.reconstructor.handoffs_remaining, 1)
def test_process_job_revert_cleanup(self): def test_process_job_revert_cleanup(self):
replicas = self.policy.object_ring.replicas replicas = self.policy.object_ring.replicas
@ -2451,6 +2530,7 @@ class TestObjectReconstructor(unittest.TestCase):
} }
def ssync_response_callback(*args): def ssync_response_callback(*args):
# success should not increment handoffs_remaining
return True, {ohash: {'ts_data': ts}} return True, {ohash: {'ts_data': ts}}
ssync_calls = [] ssync_calls = []
@ -2474,6 +2554,7 @@ class TestObjectReconstructor(unittest.TestCase):
df_mgr.get_hashes(self.local_dev['device'], str(partition), [], df_mgr.get_hashes(self.local_dev['device'], str(partition), [],
self.policy) self.policy)
self.assertFalse(os.access(df._datadir, os.F_OK)) self.assertFalse(os.access(df._datadir, os.F_OK))
self.assertEqual(self.reconstructor.handoffs_remaining, 0)
def test_process_job_revert_cleanup_tombstone(self): def test_process_job_revert_cleanup_tombstone(self):
sync_to = [random.choice([n for n in self.policy.object_ring.devs sync_to = [random.choice([n for n in self.policy.object_ring.devs