From c0dbf5b8854a0887edba82a4591b0b1d0810acaf Mon Sep 17 00:00:00 2001
From: Tim Burke <tim.burke@gmail.com>
Date: Thu, 31 Jan 2019 14:55:05 -0800
Subject: [PATCH] sharding: Make replicator logging less scary

When we abort the replication process because we've got shard ranges and
the sharder is now responsible for ensuring object-row durability, we
log a warning like "refusing to replicate objects" which sounds scary.

That's because it *is*, of course -- if the sharder isn't running,
whatever rows that DB has may only exist in that DB, meaning we're one
drive failure away from losing track of them entirely.

However, when the sharder *is* running and everything's happy, we reach
a steady-state where the root containers are all sharded and none of
them have any object rows to lose. At that point, the warning does more
harm than good.

Only print the scary "refusing to replicate" warning if we're still
responsible for some object rows, whether deleted or not.

Change-Id: I35de08d6c1617b2e446e969a54b79b42e8cfafef
---
 swift/container/replicator.py | 21 +++++++++++++++------
 test/probe/test_sharder.py    |  4 ++--
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/swift/container/replicator.py b/swift/container/replicator.py
index e0e771c997..ea18fbd962 100644
--- a/swift/container/replicator.py
+++ b/swift/container/replicator.py
@@ -20,7 +20,7 @@ from collections import defaultdict
 from eventlet import Timeout
 
 from swift.container.sync_store import ContainerSyncStore
-from swift.container.backend import ContainerBroker, DATADIR
+from swift.container.backend import ContainerBroker, DATADIR, SHARDED
 from swift.container.reconciler import (
     MISPLACED_OBJECTS_ACCOUNT, incorrect_policy_index,
     get_reconciler_container_name, get_row_to_q_entry_translator)
@@ -113,11 +113,20 @@ class ContainerReplicator(db_replicator.Replicator):
                 'peer may need upgrading', broker.db_file,
                 '%(ip)s:%(port)s/%(device)s' % node)
         if broker.sharding_initiated():
-            self.logger.warning(
-                '%s is able to shard -- refusing to replicate objects to peer '
-                '%s; have shard ranges and will wait for cleaving',
-                broker.db_file,
-                '%(ip)s:%(port)s/%(device)s' % node)
+            if info['db_state'] == SHARDED and len(
+                    broker.get_objects(limit=1)) == 0:
+                self.logger.debug('%s is sharded and has nothing more to '
+                                  'replicate to peer %s',
+                                  broker.db_file,
+                                  '%(ip)s:%(port)s/%(device)s' % node)
+            else:
+                # Only print the scary warning if there was something that
+                # didn't get replicated
+                self.logger.warning(
+                    '%s is able to shard -- refusing to replicate objects to '
+                    'peer %s; have shard ranges and will wait for cleaving',
+                    broker.db_file,
+                    '%(ip)s:%(port)s/%(device)s' % node)
             self.stats['deferred'] += 1
             return shard_range_success
 
diff --git a/test/probe/test_sharder.py b/test/probe/test_sharder.py
index 94a057fcf1..0befd604f0 100644
--- a/test/probe/test_sharder.py
+++ b/test/probe/test_sharder.py
@@ -1902,7 +1902,7 @@ class TestContainerSharding(BaseTestContainerSharding):
         old_primary_dir, container_hash = self.get_storage_dir(
             self.brain.part, handoff_node)
         utils.mkdirs(os.path.dirname(old_primary_dir))
-        os.rename(new_primary_dir, old_primary_dir)
+        shutil.move(new_primary_dir, old_primary_dir)
 
         # make the cluster more or less "healthy" again
         self.brain.servers.start(number=new_primary_node_number)
@@ -2009,7 +2009,7 @@ class TestContainerSharding(BaseTestContainerSharding):
         old_primary_dir, container_hash = self.get_storage_dir(
             self.brain.part, handoff_node)
         utils.mkdirs(os.path.dirname(old_primary_dir))
-        os.rename(new_primary_dir, old_primary_dir)
+        shutil.move(new_primary_dir, old_primary_dir)
         self.assert_container_state(handoff_node, 'sharding', 3)
 
         # run replicator on handoff node to create a fresh db on new primary