From c0dbf5b8854a0887edba82a4591b0b1d0810acaf Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Thu, 31 Jan 2019 14:55:05 -0800 Subject: [PATCH] sharding: Make replicator logging less scary When we abort the replication process because we've got shard ranges and the sharder is now responsible for ensuring object-row durability, we log a warning like "refusing to replicate objects" which sounds scary. That's because it *is*, of course -- if the sharder isn't running, whatever rows that DB has may only exist in that DB, meaning we're one drive failure away from losing track of them entirely. However, when the sharder *is* running and everything's happy, we reach a steady-state where the root containers are all sharded and none of them have any object rows to lose. At that point, the warning does more harm than good. Only print the scary "refusing to replicate" warning if we're still responsible for some object rows, whether deleted or not. Change-Id: I35de08d6c1617b2e446e969a54b79b42e8cfafef --- swift/container/replicator.py | 21 +++++++++++++++------ test/probe/test_sharder.py | 4 ++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/swift/container/replicator.py b/swift/container/replicator.py index e0e771c997..ea18fbd962 100644 --- a/swift/container/replicator.py +++ b/swift/container/replicator.py @@ -20,7 +20,7 @@ from collections import defaultdict from eventlet import Timeout from swift.container.sync_store import ContainerSyncStore -from swift.container.backend import ContainerBroker, DATADIR +from swift.container.backend import ContainerBroker, DATADIR, SHARDED from swift.container.reconciler import ( MISPLACED_OBJECTS_ACCOUNT, incorrect_policy_index, get_reconciler_container_name, get_row_to_q_entry_translator) @@ -113,11 +113,20 @@ class ContainerReplicator(db_replicator.Replicator): 'peer may need upgrading', broker.db_file, '%(ip)s:%(port)s/%(device)s' % node) if broker.sharding_initiated(): - self.logger.warning( - '%s is able to shard -- refusing to replicate objects to peer ' - '%s; have shard ranges and will wait for cleaving', - broker.db_file, - '%(ip)s:%(port)s/%(device)s' % node) + if info['db_state'] == SHARDED and len( + broker.get_objects(limit=1)) == 0: + self.logger.debug('%s is sharded and has nothing more to ' + 'replicate to peer %s', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % node) + else: + # Only print the scary warning if there was something that + # didn't get replicated + self.logger.warning( + '%s is able to shard -- refusing to replicate objects to ' + 'peer %s; have shard ranges and will wait for cleaving', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % node) self.stats['deferred'] += 1 return shard_range_success diff --git a/test/probe/test_sharder.py b/test/probe/test_sharder.py index 94a057fcf1..0befd604f0 100644 --- a/test/probe/test_sharder.py +++ b/test/probe/test_sharder.py @@ -1902,7 +1902,7 @@ class TestContainerSharding(BaseTestContainerSharding): old_primary_dir, container_hash = self.get_storage_dir( self.brain.part, handoff_node) utils.mkdirs(os.path.dirname(old_primary_dir)) - os.rename(new_primary_dir, old_primary_dir) + shutil.move(new_primary_dir, old_primary_dir) # make the cluster more or less "healthy" again self.brain.servers.start(number=new_primary_node_number) @@ -2009,7 +2009,7 @@ class TestContainerSharding(BaseTestContainerSharding): old_primary_dir, container_hash = self.get_storage_dir( self.brain.part, handoff_node) utils.mkdirs(os.path.dirname(old_primary_dir)) - os.rename(new_primary_dir, old_primary_dir) + shutil.move(new_primary_dir, old_primary_dir) self.assert_container_state(handoff_node, 'sharding', 3) # run replicator on handoff node to create a fresh db on new primary