Deprecate broken handoffs_first in favor of handoffs_only

The handoffs_first mode in the replicator has the useful behavior of processing all handoff parts across all disks until there aren't any handoffs anymore on the node [1] and then it seemingly tries to drop back into normal operation. In practice I've only ever heard of handoffs_first used while rebalancing and turned off as soon as the rebalance finishes - it's not recommended to run with handoffs_first mode turned on and it emits a warning on startup if option is enabled. The handoffs_first mode on the reconstructor doesn't work - it was prioritizing handoffs *per-part* [2] - which is really unfortunate because in the reconstructor during a rebalance it's often *much* more attractive from an efficiency disk/network perspective to revert a partition from a handoff than it is to rebuild an entire partition from another primary using the other EC fragments in the cluster. This change deprecates handoffs_first in favor of handoffs_only in the reconstructor which is far more useful - and just like handoffs_first mode in the replicator - it gives the operator the option of forcing the consistency engine to focus on rebalance. The handoffs_only behavior is somewhat consistent with the replicator's handoffs_first option (any error on any handoff in the replicactor will make it essentially handoff only forever) but the option does what you want and is named correctly in the reconstructor. For consistency with the replicator the reconstructor will mostly honor the handoffs_first option, but if you set handoffs_only in the config it always takes precedence. Having handoffs_first in your config always results in a warning, but if handoff_only is not set and handoffs_first is true the reconstructor will assume you need handoffs_only and behaves as such. When running in handoffs_only mode the reconstructor will start to log a warning every cycle if you leave it running in handoffs_only after it finishes reverting handoffs. However you should be monitoring on-disk partitions and disable the option as soon as the cluster finishes the full rebalance cycle. 1. Ia324728d42c606e2f9e7d29b4ab5fcbff6e47aea fixed replicator handoffs_first "mode" 2. Unlike replication each partition in a EC policy can have a different kind of job per frag_index, but the cardinality of jobs is typically only one (either sync or revert) unless there's been a bunch of errors during write and then handoffs partitions maybe hold a number of different fragments. Known-Issues: handoffs_only is not documented outside of the example config, see lp bug #1626290 Closes-Bug: #1653018 Change-Id: Idde4b6cf92fab6c45f2c0c2733277701eb436898
2017-01-25 11:51:03 -08:00
parent 2f0ab78f9f
commit da557011ec
3 changed files with 145 additions and 16 deletions
--- a/etc/object-server.conf-sample
+++ b/etc/object-server.conf-sample
@@ -298,7 +298,18 @@ use = egg:swift#recon
 # lockup_timeout = 1800
 # ring_check_interval = 15
 # recon_cache_path = /var/cache/swift
-# handoffs_first = False
+# The handoffs_only mode option is for special case emergency situations during
+# rebalance such as disk full in the cluster.  This option SHOULD NOT BE
+# CHANGED, except for extreme situations.  When handoffs_only mode is enabled
+# the reconstructor will *only* revert rebalance fragments to primaries and not
+# attempt to sync any primary parts with neighbor primaries.  This will force
+# the reconstructor to sync and delete handoffs fragments more quickly and
+# minimize the time of the rebalance by limiting the number of rebuilds.  The
+# handoffs_only option is only for temporary use, it should be disabled as soon
+# as the emergency situation is resolved.  When handoffs_only is not set, the
+# deprecated handoffs_first option will be honored as a synonym, but may be
+# ignored in a future release.
+# handoffs_only = False
 #
 # You can set scheduling priority of processes. Niceness values range from -20
 # (most favorable to the process) to 19 (least favorable to the process).
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@@ -149,8 +149,24 @@ class ObjectReconstructor(Daemon):
        self.headers = {
            'Content-Length': '0',
            'user-agent': 'obj-reconstructor %s' % os.getpid()}
-        self.handoffs_first = config_true_value(conf.get('handoffs_first',
-                                                         False))
+        if 'handoffs_first' in conf:
+            self.logger.warning(
+                'The handoffs_first option is deprecated in favor '
+                'of handoffs_only.  This option may be ignored in a '
+                'future release.')
+            # honor handoffs_first for backwards compatibility
+            default_handoffs_only = config_true_value(conf['handoffs_first'])
+        else:
+            default_handoffs_only = False
+        self.handoffs_only = config_true_value(
+            conf.get('handoffs_only', default_handoffs_only))
+        if self.handoffs_only:
+            self.logger.warning(
+                'Handoff only mode is not intended for normal '
+                'operation, use handoffs_only with care.')
+        elif default_handoffs_only:
+            self.logger.warning('Ignored handoffs_first option in favor '
+                                'of handoffs_only.')
        self._df_router = DiskFileRouter(conf, self.logger)

    def load_object_ring(self, policy):
@@ -668,6 +684,8 @@ class ObjectReconstructor(Daemon):
        if syncd_with >= len(job['sync_to']):
            self.delete_reverted_objs(
                job, reverted_objs, job['frag_index'])
+        else:
+            self.handoffs_remaining += 1
        self.logger.timing_since('partition.delete.timing', begin)

    def _get_part_jobs(self, local_dev, part_path, partition, policy):
@@ -696,6 +714,9 @@ class ObjectReconstructor(Daemon):
        :param policy: the policy

        :returns: a list of dicts of job info
+
+        N.B. If this function ever returns an empty list of jobs the entire
+        partition will be deleted.
        """
        # find all the fi's in the part, and which suffixes have them
        try:
@@ -888,12 +909,12 @@ class ObjectReconstructor(Daemon):
        """
        Helper function for collect_jobs to build jobs for reconstruction
        using EC style storage policy
+
+        N.B. If this function ever returns an empty list of jobs the entire
+        partition will be deleted.
        """
        jobs = self._get_part_jobs(**part_info)
        random.shuffle(jobs)
-        if self.handoffs_first:
-            # Move the handoff revert jobs to the front of the list
-            jobs.sort(key=lambda job: job['job_type'], reverse=True)
        self.job_count += len(jobs)
        return jobs

@@ -909,6 +930,7 @@ class ObjectReconstructor(Daemon):
        self.reconstruction_part_count = 0
        self.reconstruction_device_count = 0
        self.last_reconstruction_count = -1
+        self.handoffs_remaining = 0

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
@@ -944,6 +966,11 @@ class ObjectReconstructor(Daemon):
                    self.run_pool.spawn(self.delete_partition,
                                        part_info['part_path'])
                for job in jobs:
+                    if (self.handoffs_only and job['job_type'] != REVERT):
+                        self.logger.debug('Skipping %s job for %s '
+                                          'while in handoffs_only mode.',
+                                          job['job_type'], job['path'])
+                        continue
                    self.run_pool.spawn(self.process_job, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
@@ -955,6 +982,16 @@ class ObjectReconstructor(Daemon):
            stats.kill()
            lockup_detector.kill()
            self.stats_line()
+        if self.handoffs_only:
+            if self.handoffs_remaining > 0:
+                self.logger.info(_(
+                    "Handoffs only mode still has handoffs remaining.  "
+                    "Next pass will continue to revert handoffs."))
+            else:
+                self.logger.warning(_(
+                    "Handoffs only mode found no handoffs remaining.  "
+                    "You should disable handoffs_only once all nodes "
+                    "are reporting no handoffs remaining."))

    def run_once(self, *args, **kwargs):
        start = time.time()
--- a/test/unit/obj/test_reconstructor.py
+++ b/test/unit/obj/test_reconstructor.py
@@ -719,7 +719,6 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
        rmtree(testring, ignore_errors=1)

    def test_build_reconstruction_jobs(self):
-        self.reconstructor.handoffs_first = False
        self.reconstructor._reset_stats()
        for part_info in self.reconstructor.collect_parts():
            jobs = self.reconstructor.build_reconstruction_jobs(part_info)
@@ -728,13 +727,40 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
                             object_reconstructor.REVERT))
            self.assert_expected_jobs(part_info['partition'], jobs)

-        self.reconstructor.handoffs_first = True
-        self.reconstructor._reset_stats()
-        for part_info in self.reconstructor.collect_parts():
-            jobs = self.reconstructor.build_reconstruction_jobs(part_info)
-            self.assertTrue(jobs[0]['job_type'] ==
-                            object_reconstructor.REVERT)
-            self.assert_expected_jobs(part_info['partition'], jobs)
+    def test_handoffs_only(self):
+        self.reconstructor.handoffs_only = True
+
+        found_job_types = set()
+
+        def fake_process_job(job):
+            # increment failure counter
+            self.reconstructor.handoffs_remaining += 1
+            found_job_types.add(job['job_type'])
+
+        self.reconstructor.process_job = fake_process_job
+
+        # only revert jobs
+        self.reconstructor.reconstruct()
+        self.assertEqual(found_job_types, {object_reconstructor.REVERT})
+        # but failures keep handoffs remaining
+        msgs = self.reconstructor.logger.get_lines_for_level('info')
+        self.assertIn('Next pass will continue to revert handoffs', msgs[-1])
+        self.logger._clear()
+
+        found_job_types = set()
+
+        def fake_process_job(job):
+            # success does not increment failure counter
+            found_job_types.add(job['job_type'])
+
+        self.reconstructor.process_job = fake_process_job
+
+        # only revert jobs ... but all handoffs cleared out successfully
+        self.reconstructor.reconstruct()
+        self.assertEqual(found_job_types, {object_reconstructor.REVERT})
+        # it's time to turn off handoffs_only
+        msgs = self.reconstructor.logger.get_lines_for_level('warning')
+        self.assertIn('You should disable handoffs_only', msgs[-1])

    def test_get_partners(self):
        # we're going to perform an exhaustive test of every possible
@@ -1156,6 +1182,57 @@ class TestObjectReconstructor(unittest.TestCase):
    def ts(self):
        return next(self.ts_iter)

+    def test_handoffs_only_default(self):
+        # sanity neither option added to default conf
+        self.conf.pop('handoffs_only', None)
+        self.conf.pop('handoffs_first', None)
+        self.reconstructor = object_reconstructor.ObjectReconstructor(
+            self.conf, logger=self.logger)
+        self.assertFalse(self.reconstructor.handoffs_only)
+
+    def test_handoffs_first_enables_handoffs_only(self):
+        self.conf.pop('handoffs_only', None)  # sanity
+        self.conf['handoffs_first'] = True
+        self.reconstructor = object_reconstructor.ObjectReconstructor(
+            self.conf, logger=self.logger)
+        self.assertTrue(self.reconstructor.handoffs_only)
+        warnings = self.logger.get_lines_for_level('warning')
+        expected = [
+            'The handoffs_first option is deprecated in favor '
+            'of handoffs_only.  This option may be ignored in a '
+            'future release.',
+            'Handoff only mode is not intended for normal operation, '
+            'use handoffs_only with care.',
+        ]
+        self.assertEqual(expected, warnings)
+
+    def test_handoffs_only_ignores_handoffs_first(self):
+        self.conf['handoffs_first'] = True
+        self.conf['handoffs_only'] = False
+        self.reconstructor = object_reconstructor.ObjectReconstructor(
+            self.conf, logger=self.logger)
+        self.assertFalse(self.reconstructor.handoffs_only)
+        warnings = self.logger.get_lines_for_level('warning')
+        expected = [
+            'The handoffs_first option is deprecated in favor of '
+            'handoffs_only.  This option may be ignored in a future release.',
+            'Ignored handoffs_first option in favor of handoffs_only.',
+        ]
+        self.assertEqual(expected, warnings)
+
+    def test_handoffs_only_enabled(self):
+        self.conf.pop('handoffs_first', None)  # sanity
+        self.conf['handoffs_only'] = True
+        self.reconstructor = object_reconstructor.ObjectReconstructor(
+            self.conf, logger=self.logger)
+        self.assertTrue(self.reconstructor.handoffs_only)
+        warnings = self.logger.get_lines_for_level('warning')
+        expected = [
+            'Handoff only mode is not intended for normal operation, '
+            'use handoffs_only with care.',
+        ]
+        self.assertEqual(expected, warnings)
+
    def test_two_ec_policies(self):
        with patch_policies([
                StoragePolicy(0, name='zero', is_deprecated=True),
@@ -2346,7 +2423,7 @@ class TestObjectReconstructor(unittest.TestCase):
        self.assertEqual(call['node'], sync_to[0])
        self.assertEqual(set(call['suffixes']), set(['123', 'abc']))

-    def test_process_job_revert_is_handoff(self):
+    def test_process_job_revert_is_handoff_fails(self):
        replicas = self.policy.object_ring.replicas
        frag_index = random.randint(0, replicas - 1)
        sync_to = [random.choice([n for n in self.policy.object_ring.devs
@@ -2376,7 +2453,8 @@ class TestObjectReconstructor(unittest.TestCase):

        def ssync_response_callback(*args):
            # in this test ssync always fails, until we encounter ourselves in
-            # the list of possible handoff's to sync to
+            # the list of possible handoff's to sync to, so handoffs_remaining
+            # should increment
            return False, {}

        expected_suffix_calls = set([
@@ -2406,6 +2484,7 @@ class TestObjectReconstructor(unittest.TestCase):
        call = ssync_calls[0]
        self.assertEqual(call['node'], sync_to[0])
        self.assertEqual(set(call['suffixes']), set(['123', 'abc']))
+        self.assertEqual(self.reconstructor.handoffs_remaining, 1)

    def test_process_job_revert_cleanup(self):
        replicas = self.policy.object_ring.replicas
@@ -2451,6 +2530,7 @@ class TestObjectReconstructor(unittest.TestCase):
        }

        def ssync_response_callback(*args):
+            # success should not increment handoffs_remaining
            return True, {ohash: {'ts_data': ts}}

        ssync_calls = []
@@ -2474,6 +2554,7 @@ class TestObjectReconstructor(unittest.TestCase):
        df_mgr.get_hashes(self.local_dev['device'], str(partition), [],
                          self.policy)
        self.assertFalse(os.access(df._datadir, os.F_OK))
+        self.assertEqual(self.reconstructor.handoffs_remaining, 0)

    def test_process_job_revert_cleanup_tombstone(self):
        sync_to = [random.choice([n for n in self.policy.object_ring.devs