sharding: Make replicator logging less scary

When we abort the replication process because we've got shard ranges and
the sharder is now responsible for ensuring object-row durability, we
log a warning like "refusing to replicate objects" which sounds scary.

That's because it *is*, of course -- if the sharder isn't running,
whatever rows that DB has may only exist in that DB, meaning we're one
drive failure away from losing track of them entirely.

However, when the sharder *is* running and everything's happy, we reach
a steady-state where the root containers are all sharded and none of
them have any object rows to lose. At that point, the warning does more
harm than good.

Only print the scary "refusing to replicate" warning if we're still
responsible for some object rows, whether deleted or not.

Change-Id: I35de08d6c1617b2e446e969a54b79b42e8cfafef
This commit is contained in:
Tim Burke 2019-01-31 14:55:05 -08:00
parent 0c316a134f
commit c0dbf5b885
2 changed files with 17 additions and 8 deletions

View File

@ -20,7 +20,7 @@ from collections import defaultdict
from eventlet import Timeout
from swift.container.sync_store import ContainerSyncStore
from swift.container.backend import ContainerBroker, DATADIR
from swift.container.backend import ContainerBroker, DATADIR, SHARDED
from swift.container.reconciler import (
MISPLACED_OBJECTS_ACCOUNT, incorrect_policy_index,
get_reconciler_container_name, get_row_to_q_entry_translator)
@ -113,11 +113,20 @@ class ContainerReplicator(db_replicator.Replicator):
'peer may need upgrading', broker.db_file,
'%(ip)s:%(port)s/%(device)s' % node)
if broker.sharding_initiated():
self.logger.warning(
'%s is able to shard -- refusing to replicate objects to peer '
'%s; have shard ranges and will wait for cleaving',
broker.db_file,
'%(ip)s:%(port)s/%(device)s' % node)
if info['db_state'] == SHARDED and len(
broker.get_objects(limit=1)) == 0:
self.logger.debug('%s is sharded and has nothing more to '
'replicate to peer %s',
broker.db_file,
'%(ip)s:%(port)s/%(device)s' % node)
else:
# Only print the scary warning if there was something that
# didn't get replicated
self.logger.warning(
'%s is able to shard -- refusing to replicate objects to '
'peer %s; have shard ranges and will wait for cleaving',
broker.db_file,
'%(ip)s:%(port)s/%(device)s' % node)
self.stats['deferred'] += 1
return shard_range_success

View File

@ -1902,7 +1902,7 @@ class TestContainerSharding(BaseTestContainerSharding):
old_primary_dir, container_hash = self.get_storage_dir(
self.brain.part, handoff_node)
utils.mkdirs(os.path.dirname(old_primary_dir))
os.rename(new_primary_dir, old_primary_dir)
shutil.move(new_primary_dir, old_primary_dir)
# make the cluster more or less "healthy" again
self.brain.servers.start(number=new_primary_node_number)
@ -2009,7 +2009,7 @@ class TestContainerSharding(BaseTestContainerSharding):
old_primary_dir, container_hash = self.get_storage_dir(
self.brain.part, handoff_node)
utils.mkdirs(os.path.dirname(old_primary_dir))
os.rename(new_primary_dir, old_primary_dir)
shutil.move(new_primary_dir, old_primary_dir)
self.assert_container_state(handoff_node, 'sharding', 3)
# run replicator on handoff node to create a fresh db on new primary